mySQL 5.0.11 sources for tomato
[tomato.git] / release / src / router / mysql / server-tools / instance-manager / guardian.cc
blob4ae9d824e5be1544457dddf9839108f26669f4c7
1 /* Copyright (c) 2004-2007 MySQL AB
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
17 #if defined(__GNUC__) && defined(USE_PRAGMA_IMPLEMENTATION)
18 #pragma implementation
19 #endif
21 #include "guardian.h"
22 #include <string.h>
23 #include <sys/types.h>
24 #include <signal.h>
26 #include "instance.h"
27 #include "instance_map.h"
28 #include "log.h"
29 #include "mysql_manager_error.h"
30 #include "options.h"
33 /*************************************************************************
34 {{{ Constructor & destructor.
35 *************************************************************************/
37 /**
38 Guardian constructor.
40 SYNOPSIS
41 Guardian()
42 thread_registry_arg
43 instance_map_arg
45 DESCRIPTION
46 Nominal contructor intended for assigning references and initialize
47 trivial objects. Real initialization is made by init() method.
50 Guardian::Guardian(Thread_registry *thread_registry_arg,
51 Instance_map *instance_map_arg)
52 :shutdown_requested(FALSE),
53 stopped(FALSE),
54 thread_registry(thread_registry_arg),
55 instance_map(instance_map_arg)
57 pthread_mutex_init(&LOCK_guardian, 0);
58 pthread_cond_init(&COND_guardian, 0);
62 Guardian::~Guardian()
65 NOTE: it's necessary to synchronize here, because Guiardian thread can be
66 still alive an hold the mutex (because it is detached and we have no
67 control over it).
70 lock();
71 unlock();
73 pthread_mutex_destroy(&LOCK_guardian);
74 pthread_cond_destroy(&COND_guardian);
77 /*************************************************************************
78 }}}
79 *************************************************************************/
82 /**
83 Send request to stop Guardian.
85 SYNOPSIS
86 request_shutdown()
89 void Guardian::request_shutdown()
91 stop_instances();
93 lock();
94 shutdown_requested= TRUE;
95 unlock();
97 ping();
102 Process an instance.
104 SYNOPSIS
105 process_instance()
106 instance a pointer to the instance for processing
108 MT-NOTE:
109 - the given instance must be locked before calling this operation;
110 - Guardian must be locked before calling this operation.
113 void Guardian::process_instance(Instance *instance)
115 int restart_retry= 100;
116 time_t current_time= time(NULL);
118 if (instance->get_state() == Instance::STOPPING)
120 /* This brach is executed during shutdown. */
122 /* This returns TRUE if and only if an instance was stopped for sure. */
123 if (instance->is_crashed())
125 log_info("Guardian: '%s' stopped.",
126 (const char *) instance->get_name()->str);
128 instance->set_state(Instance::STOPPED);
130 else if ((uint) (current_time - instance->last_checked) >=
131 instance->options.get_shutdown_delay())
133 log_info("Guardian: '%s' hasn't stopped within %d secs.",
134 (const char *) instance->get_name()->str,
135 (int) instance->options.get_shutdown_delay());
137 instance->kill_mysqld(SIGKILL);
139 log_info("Guardian: pretend that '%s' is killed.",
140 (const char *) instance->get_name()->str);
142 instance->set_state(Instance::STOPPED);
144 else
146 log_info("Guardian: waiting for '%s' to stop (%d secs left).",
147 (const char *) instance->get_name()->str,
148 (int) (instance->options.get_shutdown_delay() -
149 current_time + instance->last_checked));
152 return;
155 if (instance->is_mysqld_running())
157 /* The instance can be contacted on it's port */
159 /* If STARTING also check that pidfile has been created */
160 if (instance->get_state() == Instance::STARTING &&
161 instance->options.load_pid() == 0)
163 /* Pid file not created yet, don't go to STARTED state yet */
165 else if (instance->get_state() != Instance::STARTED)
167 /* clear status fields */
168 log_info("Guardian: '%s' is running, set state to STARTED.",
169 (const char *) instance->options.instance_name.str);
170 instance->reset_stat();
171 instance->set_state(Instance::STARTED);
174 else
176 switch (instance->get_state()) {
177 case Instance::NOT_STARTED:
178 log_info("Guardian: starting '%s'...",
179 (const char *) instance->options.instance_name.str);
181 /* NOTE: set state to STARTING _before_ start() is called. */
182 instance->set_state(Instance::STARTING);
183 instance->last_checked= current_time;
185 instance->start_mysqld();
187 return;
189 case Instance::STARTED: /* fallthrough */
190 case Instance::STARTING: /* let the instance start or crash */
191 if (!instance->is_crashed())
192 return;
194 instance->crash_moment= current_time;
195 instance->last_checked= current_time;
196 instance->set_state(Instance::JUST_CRASHED);
197 /* fallthrough -- restart an instance immediately */
199 case Instance::JUST_CRASHED:
200 if (current_time - instance->crash_moment <= 2)
202 if (instance->is_crashed())
204 instance->start_mysqld();
205 log_info("Guardian: starting '%s'...",
206 (const char *) instance->options.instance_name.str);
209 else
210 instance->set_state(Instance::CRASHED);
212 return;
214 case Instance::CRASHED: /* just regular restarts */
215 if ((ulong) (current_time - instance->last_checked) <=
216 (ulong) Options::Main::monitoring_interval)
217 return;
219 if (instance->restart_counter < restart_retry)
221 if (instance->is_crashed())
223 instance->start_mysqld();
224 instance->last_checked= current_time;
226 log_info("Guardian: restarting '%s'...",
227 (const char *) instance->options.instance_name.str);
230 else
232 log_info("Guardian: can not start '%s'. "
233 "Abandoning attempts to (re)start it",
234 (const char *) instance->options.instance_name.str);
236 instance->set_state(Instance::CRASHED_AND_ABANDONED);
239 return;
241 case Instance::CRASHED_AND_ABANDONED:
242 return; /* do nothing */
244 default:
245 DBUG_ASSERT(0);
252 Main function of Guardian thread.
254 SYNOPSIS
255 run()
257 DESCRIPTION
258 Check for all guarded instances and restart them if needed.
261 void Guardian::run()
263 struct timespec timeout;
265 log_info("Guardian: started.");
267 thread_registry->register_thread(&thread_info);
269 /* Loop, until all instances were shut down at the end. */
271 while (true)
273 Instance_map::Iterator instances_it(instance_map);
274 Instance *instance;
275 bool all_instances_stopped= TRUE;
277 instance_map->lock();
279 while ((instance= instances_it.next()))
281 instance->lock();
283 if (!instance->is_guarded() ||
284 instance->get_state() == Instance::STOPPED)
286 instance->unlock();
287 continue;
290 process_instance(instance);
292 if (instance->get_state() != Instance::STOPPED)
293 all_instances_stopped= FALSE;
295 instance->unlock();
298 instance_map->unlock();
300 lock();
302 if (shutdown_requested && all_instances_stopped)
304 log_info("Guardian: all guarded mysqlds stopped.");
306 stopped= TRUE;
307 unlock();
308 break;
311 set_timespec(timeout, Options::Main::monitoring_interval);
313 thread_registry->cond_timedwait(&thread_info, &COND_guardian,
314 &LOCK_guardian, &timeout);
315 unlock();
318 log_info("Guardian: stopped.");
320 /* Now, when the Guardian is stopped we can stop the IM. */
322 thread_registry->unregister_thread(&thread_info);
323 thread_registry->request_shutdown();
325 log_info("Guardian: finished.");
330 Return the value of stopped flag.
333 bool Guardian::is_stopped()
335 int var;
337 lock();
338 var= stopped;
339 unlock();
341 return var;
346 Wake up Guardian thread.
348 MT-NOTE: though usually the mutex associated with condition variable should
349 be acquired before signalling the variable, here this is not needed.
350 Signalling under locked mutex is used to avoid lost signals. In the current
351 logic however locking mutex does not guarantee that the signal will not be
352 lost.
355 void Guardian::ping()
357 pthread_cond_signal(&COND_guardian);
362 Prepare list of instances.
364 SYNOPSIS
365 init()
367 MT-NOTE: Instance Map must be locked before calling the operation.
370 void Guardian::init()
372 Instance *instance;
373 Instance_map::Iterator iterator(instance_map);
375 while ((instance= iterator.next()))
377 instance->lock();
379 instance->reset_stat();
380 instance->set_state(Instance::NOT_STARTED);
382 instance->unlock();
388 An internal method which is called at shutdown to unregister instances and
389 attempt to stop them if requested.
391 SYNOPSIS
392 stop_instances()
394 DESCRIPTION
395 Loops through the guarded_instances list and prepares them for shutdown.
396 For each instance we issue a stop command and change the state
397 accordingly.
399 NOTE
400 Guardian object should be locked by the caller.
404 void Guardian::stop_instances()
406 static const int NUM_STOP_ATTEMPTS = 100;
408 Instance_map::Iterator instances_it(instance_map);
409 Instance *instance;
411 instance_map->lock();
413 while ((instance= instances_it.next()))
415 instance->lock();
417 if (!instance->is_guarded() ||
418 instance->get_state() == Instance::STOPPED)
420 instance->unlock();
421 continue;
425 If instance is running or was running (and now probably hanging),
426 request stop.
429 if (instance->is_mysqld_running() ||
430 instance->get_state() == Instance::STARTED)
432 instance->set_state(Instance::STOPPING);
433 instance->last_checked= time(NULL);
435 else
437 /* Otherwise mark it as STOPPED. */
438 instance->set_state(Instance::STOPPED);
441 /* Request mysqld to stop. */
443 bool instance_stopped= FALSE;
445 for (int cur_attempt= 0; cur_attempt < NUM_STOP_ATTEMPTS; ++cur_attempt)
447 if (!instance->kill_mysqld(SIGTERM))
449 instance_stopped= TRUE;
450 break;
453 if (!instance->is_active())
455 instance_stopped= TRUE;
456 break;
459 /* Sleep for 0.3 sec and check again. */
461 my_sleep(300000);
465 Abort if we failed to stop mysqld instance. That should not happen,
466 but if it happened, we don't know what to do and prefer to have clear
467 failure with coredump.
470 DBUG_ASSERT(instance_stopped);
472 instance->unlock();
475 instance_map->unlock();
480 Lock Guardian.
483 void Guardian::lock()
485 pthread_mutex_lock(&LOCK_guardian);
490 Unlock Guardian.
493 void Guardian::unlock()
495 pthread_mutex_unlock(&LOCK_guardian);