Patch to remove segfault on the exiting of a service.
[openais.git] / exec / amfcluster.c
blob76f28465a77c54e3f7c649654f2ccf5a6d6064df
2 /** @file amfcluster.c
3 *
4 * Copyright (c) 2006 Ericsson AB.
5 * Author: Hans Feldt, Anders Eriksson, Lars Holm
6 * - Refactoring of code into several AMF files
7 * - Constructors/destructors
8 * - Serializers/deserializers
10 * All rights reserved.
13 * This software licensed under BSD license, the text of which follows:
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions are met:
18 * - Redistributions of source code must retain the above copyright notice,
19 * this list of conditions and the following disclaimer.
20 * - Redistributions in binary form must reproduce the above copyright notice,
21 * this list of conditions and the following disclaimer in the documentation
22 * and/or other materials provided with the distribution.
23 * - Neither the name of the MontaVista Software, Inc. nor the names of its
24 * contributors may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
37 * THE POSSIBILITY OF SUCH DAMAGE.
39 * AMF Cluster Class Implementation
41 * This file contains functions for handling the AMF cluster. It can be
42 * viewed as the implementation of the AMF Cluster class
43 * as described in SAI-Overview-B.02.01. The SA Forum specification
44 * SAI-AIS-AMF-B.02.01 has been used as specification of the behaviour
45 * and is referred to as 'the spec' below.
47 * The functions in this file are responsible for:
48 * - to start the cluster initially
49 * - to handle the administrative operation support for the cluster (FUTURE)
51 * The cluster class contains the following state machines:
52 * - administrative state machine (ADSM)
53 * - availability control state machine (ACSM)
55 * The administrative state machine will be implemented in the future.
57 * ACSM handles initial start of the cluster. In the future it will also handle
58 * administrative commands on the cluster as described in paragraph 7.4 of the
59 * spec. ACSM includes two stable states (UNINSTANTIATED and STARTED) and a
60 * number of states to control the transition between the stable states.
62 * The cluster is in state UNINSTANTIATED when the cluster starts. (In the
63 * future this state will also be assumed after the LOCK_INSTANTIATION
64 * administrative command.)
66 * State STARTED is assumed when the cluster has been initially started and
67 * will in the future be re-assumed after the administrative command RESTART
68 * have been executed.
70 * 1. Cluster Availability Control State Machine
71 * =============================================
73 * 1.1 State Transition Table
75 * State: Event: Action: New state:
76 * ===========================================================================
77 * UNINSTANTIATED sync_ready [C1] A2,A1 STARTING_APPS
78 * STARTING_APPS sync_ready A2,A1 STARTING_APPS
79 * STARTING_APPS app_started [C3] A7,A3 ASSIGNING_WORKLOAD
80 * STARTING_APPS local_timer_expired A8 STARTING_APPS
81 * STARTING_APPS time_out A7,A8 WAITING_OVERTIME_1
82 * WAITING_OVERTIME_1 sync_ready A4 WAITING_OVERTIME_1
83 * WAITING_OVERTIME_1 time_out [C2] A7 ASSIGNING_WORKLOAD
84 * WAITING_OVERTIME_1 time_out A7 WAITING_OVERTIME_2
85 * WAITING_OVERTIME_1 app_started [C2] A3 ASSIGNING_WORKLOAD
86 * WAITING_OVERTIME_2 sync_ready A4 WAITING_OVERTIME_2
87 * WAITING_OVERTIME_2 app_started [C2] A3 ASSIGNING_WORKLOAD
88 * ASSIGNING_WORKLOAD sync_ready A4 ASSIGNING_WORKLOAD
89 * ASSIGNING_WORKLOAD app_assigned [C4] A6 STARTED
90 * STARTED sync_ready A5 STARTED
92 * 1.2 State Description
93 * =====================
94 * UNINSTANTIATED - No SUs within any SG in any Application is instantiated.
95 * STARTING_APPLICATIONS - All applications have been requested to start
96 * their contained SGs, which in its turn has requested
97 * their contained SUs to instantiate all their
98 * components. The cluster startup timer is running.
99 * WAITING_OVERTIME_1 - The cluster startup timer has expired but all
100 * applications have yet not responded that they have been
101 * started. The time-out message is broadcasted again to
102 * make sure there are no other broadcast messages pending.
103 * (This assures first of all that there is no pending
104 * 'component instantiate' message.)
105 * WAITING_OVERTIME_2 - The cluster startup timer has expired but all
106 * applications have yet not responded that they have been
107 * started. Cluster will wait infinitely for the
108 * applications to respond. It is correct to do so even when
109 * the startup timer has expired, because the applications
110 * will report they are started as soon as there is no
111 * attempt to instantiate any of its components pending,
112 * because attempts to instantiate a component can not go on
113 * forever, see saAmfCompInstantiateTimeout,
114 * saAmfCompNumMaxInstantiateWithoutDelay and
115 * saAmfCompNumMaxInstantiateWithDelay.
116 * ASSIGNING_WORKLOAD - All applications have been requested to assign it's
117 * specified workload to it's service units according to
118 * the redundancy model specified by it's SGs.
119 * STARTED - A best effort has been made to instatiate the components of all
120 * applications and assign the specified workload as close as possible
121 * to what is described in the configuration.
123 * 1.3 Actions
124 * ===========
125 * A1 - [foreach application in cluster]/start application
126 * A2 - start cluster startup timer
127 * A3 - [foreach application in cluster]/assign workload to application
128 * A4 - defer sync_ready event
129 * A5 - forward sync_ready to appropriate node object
130 * A6 - recall deferred event
131 * A7 - stop node local instance of cluster startup timer
132 * A8 - multicast 'cluster startup timer time-out' event (time_out)
134 * 1.4 Guards
135 * ==========
136 * C1 - Administrative state == UNLOCKED
137 * C2 - No SU has presence state == INSTANTIATING
138 * C3 - All SGs are fully instantiated
139 * C4 - No Application has Availability Control state == ASSIGNING_WORKLOAD
143 #include <stdlib.h>
144 #include <errno.h>
146 #include "logsys.h"
147 #include "amf.h"
148 #include "util.h"
149 #include "main.h"
150 #include "service.h"
152 LOGSYS_DECLARE_SUBSYS ("AMF", LOG_INFO);
154 typedef struct cluster_event {
155 amf_cluster_event_type_t event_type;
156 amf_cluster_t *cluster;
157 amf_node_t *node;
158 } cluster_event_t;
160 /******************************************************************************
161 * Internal (static) utility functions
162 *****************************************************************************/
164 static void cluster_defer_event (amf_cluster_event_type_t event_type,
165 struct amf_cluster *cluster, struct amf_node * node)
167 cluster_event_t sync_ready_event = {event_type, cluster, node};
168 amf_fifo_put (event_type, &cluster->deferred_events,
169 sizeof (cluster_event_t),
170 &sync_ready_event);
173 static void cluster_recall_deferred_events (amf_cluster_t *cluster)
175 cluster_event_t cluster_event;
177 if (amf_fifo_get (&cluster->deferred_events, &cluster_event)) {
178 switch (cluster_event.event_type) {
179 case CLUSTER_SYNC_READY_EV:
180 log_printf (LOG_NOTICE,
181 "Recall CLUSTER_SYNC_READY_EV");
183 amf_node_sync_ready (cluster_event.node);
184 break;
185 default:
186 assert (0);
187 break;
192 static void timer_function_cluster_recall_deferred_events (void *data)
194 amf_cluster_t *cluster = (amf_cluster_t*)data;
196 ENTER ("");
197 cluster_recall_deferred_events (cluster);
201 * Determine if all applications are started so that all
202 * SUs is in SA_AMF_PRESENCE_INSTANTIATED presense state
203 * @param cluster
205 * @return 1; All applications are started
207 static int cluster_applications_started_instantiated (struct amf_cluster *cluster)
209 int all_started = 1;
210 struct amf_application *app;
211 struct amf_sg *sg;
212 struct amf_su *su;
214 for (app = cluster->application_head; app != NULL; app = app->next) {
215 for (sg = app->sg_head; sg != NULL; sg = sg->next) {
216 for (su = sg->su_head; su != NULL; su = su->next) {
217 if (su->saAmfSUPresenceState != SA_AMF_PRESENCE_INSTANTIATED) {
218 all_started = 0;
219 goto done;
225 done:
226 return all_started;
230 * Determine if any SGs are in the process of instantiating their SUs.
231 * @param cluster
233 * @return 1; At least one SG is in the process of instantiating.
235 static int cluster_applications_are_starting_sgs(struct amf_cluster *cluster)
237 amf_application_t *application;
238 amf_sg_t *sg;
239 amf_su_t *su;
240 int is_starting_sgs = 0;
242 for (application = cluster->application_head; application != NULL;
243 application = application->next) {
244 for (sg = application->sg_head; sg != NULL; sg = sg->next) {
245 for (su = sg->su_head; su != NULL; su = su->next) {
247 if (su->saAmfSUPresenceState ==
248 SA_AMF_PRESENCE_INSTANTIATING) {
249 is_starting_sgs = 1;
250 break;
255 return is_starting_sgs;
258 static void amf_cluster_assign_workload (struct amf_cluster *cluster)
260 struct amf_application *app;
261 ENTER ("");
263 for (app = cluster->application_head; app != NULL; app = app->next) {
264 amf_application_assign_workload (app, NULL);
268 static void acsm_cluster_enter_assigning_workload (struct amf_cluster *cluster)
270 log_printf(LOG_NOTICE,
271 "Cluster: all applications started, assigning workload.");
272 cluster->acsm_state = CLUSTER_AC_ASSIGNING_WORKLOAD;
273 amf_cluster_assign_workload (cluster);
276 static void timer_function_cluster_assign_workload_tmo (void *cluster)
278 ((struct amf_cluster*)cluster)->timeout_handle = 0;
280 ENTER ("");
282 amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_CLUSTER_START_TMO, &this_amf_node->name,
283 sizeof(SaNameT));
286 static inline void stop_cluster_startup_timer (struct amf_cluster *cluster)
288 if (cluster->timeout_handle) {
289 dprintf ("Stop cluster startup timer");
290 poll_timer_delete (aisexec_poll_handle,
291 cluster->timeout_handle);
292 cluster->timeout_handle = 0;
296 static void start_cluster_startup_timer (struct amf_cluster *cluster)
298 if (cluster->timeout_handle == 0) {
299 poll_timer_add (aisexec_poll_handle,
300 cluster->saAmfClusterStartupTimeout,
301 cluster,
302 timer_function_cluster_assign_workload_tmo,
303 &cluster->timeout_handle);
307 static inline void cluster_enter_starting_applications (
308 struct amf_cluster *cluster)
310 ENTER ("");
311 start_cluster_startup_timer (cluster);
312 amf_cluster->acsm_state = CLUSTER_AC_STARTING_APPLICATIONS;
313 amf_cluster_start_applications (cluster);
316 static void acsm_cluster_enter_started (amf_cluster_t *cluster)
318 ENTER ("");
319 amf_cluster->acsm_state = CLUSTER_AC_STARTED;
320 amf_call_function_asynchronous (
321 timer_function_cluster_recall_deferred_events, cluster);
324 /******************************************************************************
325 * Event methods
326 *****************************************************************************/
328 void amf_cluster_start_tmo_event (int is_sync_masterm,
329 struct amf_cluster *cluster, SaNameT *sourceNodeName)
331 ENTER ("acsm_state = %d", amf_cluster->acsm_state);
333 stop_cluster_startup_timer (cluster);
335 switch (cluster->acsm_state) {
336 case CLUSTER_AC_WAITING_OVER_TIME_1:
337 if (cluster_applications_are_starting_sgs (cluster)) {
338 dprintf ("Cluster startup timeout,"
339 "start waiting over time");
340 amf_cluster->acsm_state =
341 CLUSTER_AC_WAITING_OVER_TIME_2;
342 } else {
343 dprintf ("Cluster startup timeout,"
344 " assigning workload");
345 acsm_cluster_enter_assigning_workload (cluster);
347 break;
348 case CLUSTER_AC_STARTING_APPLICATIONS:
349 cluster->acsm_state = CLUSTER_AC_WAITING_OVER_TIME_1;
350 if (name_match (&this_amf_node->name, sourceNodeName)) {
351 timer_function_cluster_assign_workload_tmo (cluster);
354 break;
355 case CLUSTER_AC_ASSIGNING_WORKLOAD:
356 /* ignore cluster startup timer expiration */
357 case CLUSTER_AC_STARTED:
358 /* ignore cluster startup timer expiration */
359 case CLUSTER_AC_WAITING_OVER_TIME_2:
360 /* ignore cluster startup timer expiration */
361 break;
362 default:
363 log_printf(LOG_LEVEL_ERROR, "Cluster timout expired"
364 " in wrong cluster"
365 " state = %d", cluster->acsm_state);
366 assert(0);
367 break;
373 * Start all applications in the cluster and start
374 * the cluster startup timeout.
375 * @param cluster
376 * @param app
378 void amf_cluster_start_applications(struct amf_cluster *cluster)
380 struct amf_application *app;
381 for (app = cluster->application_head; app != NULL; app = app->next) {
382 amf_application_start (app, NULL);
387 * A new node has joined the cluster and is now synchronized with the nodes that
388 * was part of the cluster before.
389 * @param cluster
390 * @param node
392 void amf_cluster_sync_ready (struct amf_cluster *cluster, struct amf_node *node)
394 ENTER ("");
395 switch (amf_cluster->acsm_state) {
396 case CLUSTER_AC_UNINSTANTIATED:
397 if (amf_cluster->saAmfClusterAdminState ==
398 SA_AMF_ADMIN_UNLOCKED) {
399 cluster_enter_starting_applications (cluster);
401 break;
402 case CLUSTER_AC_STARTING_APPLICATIONS:
403 cluster_enter_starting_applications(cluster);
404 break;
405 case CLUSTER_AC_ASSIGNING_WORKLOAD:
407 * Defer assigning workload to those syncronized nodes to
408 * CLUSTER_AC_STARTED state.
410 cluster_defer_event (CLUSTER_SYNC_READY_EV, cluster,
411 node);
412 break;
413 case CLUSTER_AC_WAITING_OVER_TIME_2:
415 * Defer assigning workload to those syncronized nodes to
416 * CLUSTER_AC_STARTED state.
418 cluster_defer_event (CLUSTER_SYNC_READY_EV, cluster,
419 node);
420 break;
421 case CLUSTER_AC_STARTED:
422 TRACE1 ("Node sync ready sent from cluster in "
423 "CLUSTER_AC_STARTED state");
424 amf_node_sync_ready (node);
425 break;
427 default:
428 log_printf(LOG_LEVEL_ERROR, "Cluster sync ready event"
429 " received in wrong cluster"
430 " state = %d", cluster->acsm_state);
431 assert (0);
432 break;
436 /******************************************************************************
437 * Event response methods
438 *****************************************************************************/
441 * An application indicates it has been started or the application indicates it
442 * was not even possible to try to start because the required nodes were not
443 * available.
444 * @param cluster
445 * @param application
447 void amf_cluster_application_started (
448 struct amf_cluster *cluster, struct amf_application *application)
450 ENTER ("application '%s' started %d", application->name.value,
451 cluster->acsm_state);
452 switch (cluster->acsm_state) {
453 case CLUSTER_AC_STARTING_APPLICATIONS:
454 if (cluster_applications_started_instantiated (cluster)) {
455 stop_cluster_startup_timer (cluster);
456 acsm_cluster_enter_assigning_workload (cluster);
458 break;
459 case CLUSTER_AC_WAITING_OVER_TIME_1:
460 case CLUSTER_AC_WAITING_OVER_TIME_2:
461 if (amf_cluster_applications_started_with_no_starting_sgs (cluster)) {
462 acsm_cluster_enter_assigning_workload (cluster);
464 break;
465 default: {
466 log_printf (LOG_ERR,"Error invalid cluster availability state %d",
467 cluster->acsm_state);
468 openais_exit_error(cluster->acsm_state);
469 break;
475 * An application indicates it has assigned workload to all its contained SUs.
476 * @param cluster
478 void amf_cluster_application_workload_assigned (
479 struct amf_cluster *cluster, struct amf_application *app)
481 ENTER ("");
482 switch (cluster->acsm_state) {
483 case CLUSTER_AC_ASSIGNING_WORKLOAD:
484 log_printf (LOG_NOTICE, "Cluster: application %s assigned.",
485 app->name.value);
486 if (amf_cluster_applications_assigned (cluster)) {
487 acsm_cluster_enter_started (cluster);
489 break;
490 default:
491 assert(0);
492 break;
496 /******************************************************************************
497 * General methods
498 *****************************************************************************/
500 struct amf_cluster *amf_cluster_new (void)
502 struct amf_cluster *cluster = amf_calloc (1,
503 sizeof (struct amf_cluster));
505 cluster->saAmfClusterStartupTimeout = -1;
506 cluster->saAmfClusterAdminState = SA_AMF_ADMIN_UNLOCKED;
507 cluster->deferred_events = 0;
508 cluster->acsm_state = CLUSTER_AC_UNINSTANTIATED;
509 return cluster;
512 void *amf_cluster_serialize (struct amf_cluster *cluster, int *len)
514 char *buf = NULL;
515 int offset = 0, size = 0;
517 TRACE8 ("%s", cluster->name.value);
519 buf = amf_serialize_SaNameT (buf, &size, &offset, &cluster->name);
520 buf = amf_serialize_SaUint32T (buf, &size, &offset,
521 cluster->saAmfClusterStartupTimeout);
522 buf = amf_serialize_SaNameT (buf, &size, &offset,
523 &cluster->saAmfClusterClmCluster);
524 buf = amf_serialize_SaUint32T (buf, &size, &offset,
525 cluster->saAmfClusterAdminState);
526 buf = amf_serialize_SaUint32T (buf, &size, &offset, cluster->acsm_state);
528 *len = offset;
530 return buf;
533 struct amf_cluster *amf_cluster_deserialize (char *buf)
535 char *tmp = buf;
536 struct amf_cluster *cluster = amf_cluster_new ();
538 tmp = amf_deserialize_SaNameT (tmp, &cluster->name);
539 tmp = amf_deserialize_SaUint32T (tmp, &cluster->saAmfClusterStartupTimeout);
540 tmp = amf_deserialize_SaNameT (tmp, &cluster->saAmfClusterClmCluster);
541 tmp = amf_deserialize_SaUint32T (tmp, &cluster->saAmfClusterAdminState);
542 tmp = amf_deserialize_SaUint32T (tmp, &cluster->acsm_state);
544 return cluster;
548 * Determine if any SGs are in the process of instantiating their SUs.
549 * @param cluster
551 * @return 1; At least one SG is in the process of instantiating.
553 int amf_cluster_applications_started_with_no_starting_sgs (
554 struct amf_cluster *cluster)
556 return !cluster_applications_are_starting_sgs (cluster);
560 * Determine if all Applications have been assigned workload.
561 * @param cluster
563 * @return 1; All Applications have been assigned workload.
565 int amf_cluster_applications_assigned (struct amf_cluster *cluster)
567 struct amf_application *app = 0;
568 int is_all_application_assigned = 1;
570 for (app = cluster->application_head; app != NULL; app = app->next) {
571 if (app->acsm_state != APP_AC_WORKLOAD_ASSIGNED) {
572 is_all_application_assigned = 0;
573 break;
576 return is_all_application_assigned;