release/src-rt-6.x.4708/linux/linux-2.6.36/kernel/rcutree_plugin.h

   1 /*
   2  * Read-Copy Update mechanism for mutual exclusion (tree-based version)
   3  * Internal non-public definitions that provide either classic
   4  * or preemptable semantics.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published by
   8  * the Free Software Foundation; either version 2 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * This program is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with this program; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  19  *
  20  * Copyright Red Hat, 2009
  21  * Copyright IBM Corporation, 2009
  22  *
  23  * Author: Ingo Molnar <mingo@elte.hu>
  24  *         Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  25  */
  26
  27 #include <linux/delay.h>
  28
  29 /*
  30  * Check the RCU kernel configuration parameters and print informative
  31  * messages about anything out of the ordinary.  If you like #ifdef, you
  32  * will love this function.
  33  */
  34 static void __init rcu_bootup_announce_oddness(void)
  35 {
  36 #ifdef CONFIG_RCU_TRACE
  37         printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n");
  38 #endif
  39 #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && \
  40         CONFIG_RCU_FANOUT != 32)
  41         printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
  42                CONFIG_RCU_FANOUT);
  43 #endif
  44 #ifdef CONFIG_RCU_FANOUT_EXACT
  45         printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n");
  46 #endif
  47 #ifdef CONFIG_RCU_FAST_NO_HZ
  48         printk(KERN_INFO
  49                "\tRCU dyntick-idle grace-period acceleration is enabled.\n");
  50 #endif
  51 #ifdef CONFIG_PROVE_RCU
  52         printk(KERN_INFO "\tRCU lockdep checking is enabled.\n");
  53 #endif
  54 #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
  55         printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
  56 #endif
  57 #ifndef CONFIG_RCU_CPU_STALL_DETECTOR
  58         printk(KERN_INFO
  59                "\tRCU-based detection of stalled CPUs is disabled.\n");
  60 #endif
  61 #ifndef CONFIG_RCU_CPU_STALL_VERBOSE
  62         printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n");
  63 #endif
  64 #if NUM_RCU_LVL_4 != 0
  65         printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
  66 #endif
  67 }
  68
  69 #ifdef CONFIG_TREE_PREEMPT_RCU
  70
  71 struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
  72 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
  73
  74 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
  75
  76 /*
  77  * Tell them what RCU they are running.
  78  */
  79 static void __init rcu_bootup_announce(void)
  80 {
  81         printk(KERN_INFO "Preemptable hierarchical RCU implementation.\n");
  82         rcu_bootup_announce_oddness();
  83 }
  84
  85 /*
  86  * Return the number of RCU-preempt batches processed thus far
  87  * for debug and statistics.
  88  */
  89 long rcu_batches_completed_preempt(void)
  90 {
  91         return rcu_preempt_state.completed;
  92 }
  93 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
  94
  95 /*
  96  * Return the number of RCU batches processed thus far for debug & stats.
  97  */
  98 long rcu_batches_completed(void)
  99 {
 100         return rcu_batches_completed_preempt();
 101 }
 102 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 103
 104 /*
 105  * Force a quiescent state for preemptible RCU.
 106  */
 107 void rcu_force_quiescent_state(void)
 108 {
 109         force_quiescent_state(&rcu_preempt_state, 0);
 110 }
 111 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 112
 113 /*
 114  * Record a preemptable-RCU quiescent state for the specified CPU.  Note
 115  * that this just means that the task currently running on the CPU is
 116  * not in a quiescent state.  There might be any number of tasks blocked
 117  * while in an RCU read-side critical section.
 118  *
 119  * Unlike the other rcu_*_qs() functions, callers to this function
 120  * must disable irqs in order to protect the assignment to
 121  * ->rcu_read_unlock_special.
 122  */
 123 static void rcu_preempt_qs(int cpu)
 124 {
 125         struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
 126
 127         rdp->passed_quiesc_completed = rdp->gpnum - 1;
 128         barrier();
 129         rdp->passed_quiesc = 1;
 130         current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 131 }
 132
 133 /*
 134  * We have entered the scheduler, and the current task might soon be
 135  * context-switched away from.  If this task is in an RCU read-side
 136  * critical section, we will no longer be able to rely on the CPU to
 137  * record that fact, so we enqueue the task on the appropriate entry
 138  * of the blocked_tasks[] array.  The task will dequeue itself when
 139  * it exits the outermost enclosing RCU read-side critical section.
 140  * Therefore, the current grace period cannot be permitted to complete
 141  * until the blocked_tasks[] entry indexed by the low-order bit of
 142  * rnp->gpnum empties.
 143  *
 144  * Caller must disable preemption.
 145  */
 146 static void rcu_preempt_note_context_switch(int cpu)
 147 {
 148         struct task_struct *t = current;
 149         unsigned long flags;
 150         int phase;
 151         struct rcu_data *rdp;
 152         struct rcu_node *rnp;
 153
 154         if (t->rcu_read_lock_nesting &&
 155             (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
 156
 157                 /* Possibly blocking in an RCU read-side critical section. */
 158                 rdp = rcu_preempt_state.rda[cpu];
 159                 rnp = rdp->mynode;
 160                 raw_spin_lock_irqsave(&rnp->lock, flags);
 161                 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
 162                 t->rcu_blocked_node = rnp;
 163
 164                 /*
 165                  * If this CPU has already checked in, then this task
 166                  * will hold up the next grace period rather than the
 167                  * current grace period.  Queue the task accordingly.
 168                  * If the task is queued for the current grace period
 169                  * (i.e., this CPU has not yet passed through a quiescent
 170                  * state for the current grace period), then as long
 171                  * as that task remains queued, the current grace period
 172                  * cannot end.
 173                  *
 174                  * But first, note that the current CPU must still be
 175                  * on line!
 176                  */
 177                 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
 178                 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
 179                 phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1;
 180                 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
 181                 raw_spin_unlock_irqrestore(&rnp->lock, flags);
 182         }
 183
 184         /*
 185          * Either we were not in an RCU read-side critical section to
 186          * begin with, or we have now recorded that critical section
 187          * globally.  Either way, we can now note a quiescent state
 188          * for this CPU.  Again, if we were in an RCU read-side critical
 189          * section, and if that critical section was blocking the current
 190          * grace period, then the fact that the task has been enqueued
 191          * means that we continue to block the current grace period.
 192          */
 193         local_irq_save(flags);
 194         rcu_preempt_qs(cpu);
 195         local_irq_restore(flags);
 196 }
 197
 198 /*
 199  * Tree-preemptable RCU implementation for rcu_read_lock().
 200  * Just increment ->rcu_read_lock_nesting, shared state will be updated
 201  * if we block.
 202  */
 203 void __rcu_read_lock(void)
 204 {
 205         ACCESS_ONCE(current->rcu_read_lock_nesting)++;
 206         barrier();  /* needed if we ever invoke rcu_read_lock in rcutree.c */
 207 }
 208 EXPORT_SYMBOL_GPL(__rcu_read_lock);
 209
 210 /*
 211  * Check for preempted RCU readers blocking the current grace period
 212  * for the specified rcu_node structure.  If the caller needs a reliable
 213  * answer, it must hold the rcu_node's ->lock.
 214  */
 215 static int rcu_preempted_readers(struct rcu_node *rnp)
 216 {
 217         int phase = rnp->gpnum & 0x1;
 218
 219         return !list_empty(&rnp->blocked_tasks[phase]) ||
 220                !list_empty(&rnp->blocked_tasks[phase + 2]);
 221 }
 222
 223 /*
 224  * Record a quiescent state for all tasks that were previously queued
 225  * on the specified rcu_node structure and that were blocking the current
 226  * RCU grace period.  The caller must hold the specified rnp->lock with
 227  * irqs disabled, and this lock is released upon return, but irqs remain
 228  * disabled.
 229  */
 230 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
 231         __releases(rnp->lock)
 232 {
 233         unsigned long mask;
 234         struct rcu_node *rnp_p;
 235
 236         if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
 237                 raw_spin_unlock_irqrestore(&rnp->lock, flags);
 238                 return;  /* Still need more quiescent states! */
 239         }
 240
 241         rnp_p = rnp->parent;
 242         if (rnp_p == NULL) {
 243                 /*
 244                  * Either there is only one rcu_node in the tree,
 245                  * or tasks were kicked up to root rcu_node due to
 246                  * CPUs going offline.
 247                  */
 248                 rcu_report_qs_rsp(&rcu_preempt_state, flags);
 249                 return;
 250         }
 251
 252         /* Report up the rest of the hierarchy. */
 253         mask = rnp->grpmask;
 254         raw_spin_unlock(&rnp->lock);    /* irqs remain disabled. */
 255         raw_spin_lock(&rnp_p->lock);    /* irqs already disabled. */
 256         rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
 257 }
 258
 259 /*
 260  * Handle special cases during rcu_read_unlock(), such as needing to
 261  * notify RCU core processing or task having blocked during the RCU
 262  * read-side critical section.
 263  */
 264 static void rcu_read_unlock_special(struct task_struct *t)
 265 {
 266         int empty;
 267         int empty_exp;
 268         unsigned long flags;
 269         struct rcu_node *rnp;
 270         int special;
 271
 272         /* NMI handlers cannot block and cannot safely manipulate state. */
 273         if (in_nmi())
 274                 return;
 275
 276         local_irq_save(flags);
 277
 278         /*
 279          * If RCU core is waiting for this CPU to exit critical section,
 280          * let it know that we have done so.
 281          */
 282         special = t->rcu_read_unlock_special;
 283         if (special & RCU_READ_UNLOCK_NEED_QS) {
 284                 rcu_preempt_qs(smp_processor_id());
 285         }
 286
 287         /* Hardware IRQ handlers cannot block. */
 288         if (in_irq()) {
 289                 local_irq_restore(flags);
 290                 return;
 291         }
 292
 293         /* Clean up if blocked during RCU read-side critical section. */
 294         if (special & RCU_READ_UNLOCK_BLOCKED) {
 295                 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
 296
 297                 /*
 298                  * Remove this task from the list it blocked on.  The
 299                  * task can migrate while we acquire the lock, but at
 300                  * most one time.  So at most two passes through loop.
 301                  */
 302                 for (;;) {
 303                         rnp = t->rcu_blocked_node;
 304                         raw_spin_lock(&rnp->lock);  /* irqs already disabled. */
 305                         if (rnp == t->rcu_blocked_node)
 306                                 break;
 307                         raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
 308                 }
 309                 empty = !rcu_preempted_readers(rnp);
 310                 empty_exp = !rcu_preempted_readers_exp(rnp);
 311                 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
 312                 list_del_init(&t->rcu_node_entry);
 313                 t->rcu_blocked_node = NULL;
 314
 315                 /*
 316                  * If this was the last task on the current list, and if
 317                  * we aren't waiting on any CPUs, report the quiescent state.
 318                  * Note that rcu_report_unblock_qs_rnp() releases rnp->lock.
 319                  */
 320                 if (empty)
 321                         raw_spin_unlock_irqrestore(&rnp->lock, flags);
 322                 else
 323                         rcu_report_unblock_qs_rnp(rnp, flags);
 324
 325                 /*
 326                  * If this was the last task on the expedited lists,
 327                  * then we need to report up the rcu_node hierarchy.
 328                  */
 329                 if (!empty_exp && !rcu_preempted_readers_exp(rnp))
 330                         rcu_report_exp_rnp(&rcu_preempt_state, rnp);
 331         } else {
 332                 local_irq_restore(flags);
 333         }
 334 }
 335
 336 /*
 337  * Tree-preemptable RCU implementation for rcu_read_unlock().
 338  * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
 339  * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
 340  * invoke rcu_read_unlock_special() to clean up after a context switch
 341  * in an RCU read-side critical section and other special cases.
 342  */
 343 void __rcu_read_unlock(void)
 344 {
 345         struct task_struct *t = current;
 346
 347         barrier();  /* needed if we ever invoke rcu_read_unlock in rcutree.c */
 348         if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
 349             unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
 350                 rcu_read_unlock_special(t);
 351 #ifdef CONFIG_PROVE_LOCKING
 352         WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0);
 353 #endif /* #ifdef CONFIG_PROVE_LOCKING */
 354 }
 355 EXPORT_SYMBOL_GPL(__rcu_read_unlock);
 356
 357 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 358
 359 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE
 360
 361 /*
 362  * Dump detailed information for all tasks blocking the current RCU
 363  * grace period on the specified rcu_node structure.
 364  */
 365 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
 366 {
 367         unsigned long flags;
 368         struct list_head *lp;
 369         int phase;
 370         struct task_struct *t;
 371
 372         if (rcu_preempted_readers(rnp)) {
 373                 raw_spin_lock_irqsave(&rnp->lock, flags);
 374                 phase = rnp->gpnum & 0x1;
 375                 lp = &rnp->blocked_tasks[phase];
 376                 list_for_each_entry(t, lp, rcu_node_entry)
 377                         sched_show_task(t);
 378                 raw_spin_unlock_irqrestore(&rnp->lock, flags);
 379         }
 380 }
 381
 382 /*
 383  * Dump detailed information for all tasks blocking the current RCU
 384  * grace period.
 385  */
 386 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
 387 {
 388         struct rcu_node *rnp = rcu_get_root(rsp);
 389
 390         rcu_print_detail_task_stall_rnp(rnp);
 391         rcu_for_each_leaf_node(rsp, rnp)
 392                 rcu_print_detail_task_stall_rnp(rnp);
 393 }
 394
 395 #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
 396
 397 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
 398 {
 399 }
 400
 401 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
 402
 403 /*
 404  * Scan the current list of tasks blocked within RCU read-side critical
 405  * sections, printing out the tid of each.
 406  */
 407 static void rcu_print_task_stall(struct rcu_node *rnp)
 408 {
 409         struct list_head *lp;
 410         int phase;
 411         struct task_struct *t;
 412
 413         if (rcu_preempted_readers(rnp)) {
 414                 phase = rnp->gpnum & 0x1;
 415                 lp = &rnp->blocked_tasks[phase];
 416                 list_for_each_entry(t, lp, rcu_node_entry)
 417                         printk(" P%d", t->pid);
 418         }
 419 }
 420
 421 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 422
 423 /*
 424  * Check that the list of blocked tasks for the newly completed grace
 425  * period is in fact empty.  It is a serious bug to complete a grace
 426  * period that still has RCU readers blocked!  This function must be
 427  * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
 428  * must be held by the caller.
 429  */
 430 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 431 {
 432         WARN_ON_ONCE(rcu_preempted_readers(rnp));
 433         WARN_ON_ONCE(rnp->qsmask);
 434 }
 435
 436 #ifdef CONFIG_HOTPLUG_CPU
 437
 438 /*
 439  * Handle tasklist migration for case in which all CPUs covered by the
 440  * specified rcu_node have gone offline.  Move them up to the root
 441  * rcu_node.  The reason for not just moving them to the immediate
 442  * parent is to remove the need for rcu_read_unlock_special() to
 443  * make more than two attempts to acquire the target rcu_node's lock.
 444  * Returns true if there were tasks blocking the current RCU grace
 445  * period.
 446  *
 447  * Returns 1 if there was previously a task blocking the current grace
 448  * period on the specified rcu_node structure.
 449  *
 450  * The caller must hold rnp->lock with irqs disabled.
 451  */
 452 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
 453                                      struct rcu_node *rnp,
 454                                      struct rcu_data *rdp)
 455 {
 456         int i;
 457         struct list_head *lp;
 458         struct list_head *lp_root;
 459         int retval = 0;
 460         struct rcu_node *rnp_root = rcu_get_root(rsp);
 461         struct task_struct *tp;
 462
 463         if (rnp == rnp_root) {
 464                 WARN_ONCE(1, "Last CPU thought to be offlined?");
 465                 return 0;  /* Shouldn't happen: at least one CPU online. */
 466         }
 467         WARN_ON_ONCE(rnp != rdp->mynode &&
 468                      (!list_empty(&rnp->blocked_tasks[0]) ||
 469                       !list_empty(&rnp->blocked_tasks[1]) ||
 470                       !list_empty(&rnp->blocked_tasks[2]) ||
 471                       !list_empty(&rnp->blocked_tasks[3])));
 472
 473         /*
 474          * Move tasks up to root rcu_node.  Rely on the fact that the
 475          * root rcu_node can be at most one ahead of the rest of the
 476          * rcu_nodes in terms of gp_num value.  This fact allows us to
 477          * move the blocked_tasks[] array directly, element by element.
 478          */
 479         if (rcu_preempted_readers(rnp))
 480                 retval |= RCU_OFL_TASKS_NORM_GP;
 481         if (rcu_preempted_readers_exp(rnp))
 482                 retval |= RCU_OFL_TASKS_EXP_GP;
 483         for (i = 0; i < 4; i++) {
 484                 lp = &rnp->blocked_tasks[i];
 485                 lp_root = &rnp_root->blocked_tasks[i];
 486                 while (!list_empty(lp)) {
 487                         tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
 488                         raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
 489                         list_del(&tp->rcu_node_entry);
 490                         tp->rcu_blocked_node = rnp_root;
 491                         list_add(&tp->rcu_node_entry, lp_root);
 492                         raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */
 493                 }
 494         }
 495         return retval;
 496 }
 497
 498 /*
 499  * Do CPU-offline processing for preemptable RCU.
 500  */
 501 static void rcu_preempt_offline_cpu(int cpu)
 502 {
 503         __rcu_offline_cpu(cpu, &rcu_preempt_state);
 504 }
 505
 506 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 507
 508 /*
 509  * Check for a quiescent state from the current CPU.  When a task blocks,
 510  * the task is recorded in the corresponding CPU's rcu_node structure,
 511  * which is checked elsewhere.
 512  *
 513  * Caller must disable hard irqs.
 514  */
 515 static void rcu_preempt_check_callbacks(int cpu)
 516 {
 517         struct task_struct *t = current;
 518
 519         if (t->rcu_read_lock_nesting == 0) {
 520                 rcu_preempt_qs(cpu);
 521                 return;
 522         }
 523         if (per_cpu(rcu_preempt_data, cpu).qs_pending)
 524                 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
 525 }
 526
 527 /*
 528  * Process callbacks for preemptable RCU.
 529  */
 530 static void rcu_preempt_process_callbacks(void)
 531 {
 532         __rcu_process_callbacks(&rcu_preempt_state,
 533                                 &__get_cpu_var(rcu_preempt_data));
 534 }
 535
 536 /*
 537  * Queue a preemptable-RCU callback for invocation after a grace period.
 538  */
 539 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 540 {
 541         __call_rcu(head, func, &rcu_preempt_state);
 542 }
 543 EXPORT_SYMBOL_GPL(call_rcu);
 544
 545 /**
 546  * synchronize_rcu - wait until a grace period has elapsed.
 547  *
 548  * Control will return to the caller some time after a full grace
 549  * period has elapsed, in other words after all currently executing RCU
 550  * read-side critical sections have completed.  RCU read-side critical
 551  * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
 552  * and may be nested.
 553  */
 554 void synchronize_rcu(void)
 555 {
 556         struct rcu_synchronize rcu;
 557
 558         if (!rcu_scheduler_active)
 559                 return;
 560
 561         init_rcu_head_on_stack(&rcu.head);
 562         init_completion(&rcu.completion);
 563         /* Will wake me after RCU finished. */
 564         call_rcu(&rcu.head, wakeme_after_rcu);
 565         /* Wait for it. */
 566         wait_for_completion(&rcu.completion);
 567         destroy_rcu_head_on_stack(&rcu.head);
 568 }
 569 EXPORT_SYMBOL_GPL(synchronize_rcu);
 570
 571 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
 572 static long sync_rcu_preempt_exp_count;
 573 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
 574
 575 /*
 576  * Return non-zero if there are any tasks in RCU read-side critical
 577  * sections blocking the current preemptible-RCU expedited grace period.
 578  * If there is no preemptible-RCU expedited grace period currently in
 579  * progress, returns zero unconditionally.
 580  */
 581 static int rcu_preempted_readers_exp(struct rcu_node *rnp)
 582 {
 583         return !list_empty(&rnp->blocked_tasks[2]) ||
 584                !list_empty(&rnp->blocked_tasks[3]);
 585 }
 586
 587 /*
 588  * return non-zero if there is no RCU expedited grace period in progress
 589  * for the specified rcu_node structure, in other words, if all CPUs and
 590  * tasks covered by the specified rcu_node structure have done their bit
 591  * for the current expedited grace period.  Works only for preemptible
 592  * RCU -- other RCU implementation use other means.
 593  *
 594  * Caller must hold sync_rcu_preempt_exp_mutex.
 595  */
 596 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
 597 {
 598         return !rcu_preempted_readers_exp(rnp) &&
 599                ACCESS_ONCE(rnp->expmask) == 0;
 600 }
 601
 602 /*
 603  * Report the exit from RCU read-side critical section for the last task
 604  * that queued itself during or before the current expedited preemptible-RCU
 605  * grace period.  This event is reported either to the rcu_node structure on
 606  * which the task was queued or to one of that rcu_node structure's ancestors,
 607  * recursively up the tree.  (Calm down, calm down, we do the recursion
 608  * iteratively!)
 609  *
 610  * Caller must hold sync_rcu_preempt_exp_mutex.
 611  */
 612 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
 613 {
 614         unsigned long flags;
 615         unsigned long mask;
 616
 617         raw_spin_lock_irqsave(&rnp->lock, flags);
 618         for (;;) {
 619                 if (!sync_rcu_preempt_exp_done(rnp))
 620                         break;
 621                 if (rnp->parent == NULL) {
 622                         wake_up(&sync_rcu_preempt_exp_wq);
 623                         break;
 624                 }
 625                 mask = rnp->grpmask;
 626                 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
 627                 rnp = rnp->parent;
 628                 raw_spin_lock(&rnp->lock); /* irqs already disabled */
 629                 rnp->expmask &= ~mask;
 630         }
 631         raw_spin_unlock_irqrestore(&rnp->lock, flags);
 632 }
 633
 634 /*
 635  * Snapshot the tasks blocking the newly started preemptible-RCU expedited
 636  * grace period for the specified rcu_node structure.  If there are no such
 637  * tasks, report it up the rcu_node hierarchy.
 638  *
 639  * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock.
 640  */
 641 static void
 642 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
 643 {
 644         int must_wait;
 645
 646         raw_spin_lock(&rnp->lock); /* irqs already disabled */
 647         list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]);
 648         list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]);
 649         must_wait = rcu_preempted_readers_exp(rnp);
 650         raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
 651         if (!must_wait)
 652                 rcu_report_exp_rnp(rsp, rnp);
 653 }
 654
 655 /*
 656  * Wait for an rcu-preempt grace period, but expedite it.  The basic idea
 657  * is to invoke synchronize_sched_expedited() to push all the tasks to
 658  * the ->blocked_tasks[] lists, move all entries from the first set of
 659  * ->blocked_tasks[] lists to the second set, and finally wait for this
 660  * second set to drain.
 661  */
 662 void synchronize_rcu_expedited(void)
 663 {
 664         unsigned long flags;
 665         struct rcu_node *rnp;
 666         struct rcu_state *rsp = &rcu_preempt_state;
 667         long snap;
 668         int trycount = 0;
 669
 670         smp_mb(); /* Caller's modifications seen first by other CPUs. */
 671         snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
 672         smp_mb(); /* Above access cannot bleed into critical section. */
 673
 674         /*
 675          * Acquire lock, falling back to synchronize_rcu() if too many
 676          * lock-acquisition failures.  Of course, if someone does the
 677          * expedited grace period for us, just leave.
 678          */
 679         while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
 680                 if (trycount++ < 10)
 681                         udelay(trycount * num_online_cpus());
 682                 else {
 683                         synchronize_rcu();
 684                         return;
 685                 }
 686                 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
 687                         goto mb_ret; /* Others did our work for us. */
 688         }
 689         if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
 690                 goto unlock_mb_ret; /* Others did our work for us. */
 691
 692         /* force all RCU readers onto blocked_tasks[]. */
 693         synchronize_sched_expedited();
 694
 695         raw_spin_lock_irqsave(&rsp->onofflock, flags);
 696
 697         /* Initialize ->expmask for all non-leaf rcu_node structures. */
 698         rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
 699                 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
 700                 rnp->expmask = rnp->qsmaskinit;
 701                 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
 702         }
 703
 704         /* Snapshot current state of ->blocked_tasks[] lists. */
 705         rcu_for_each_leaf_node(rsp, rnp)
 706                 sync_rcu_preempt_exp_init(rsp, rnp);
 707         if (NUM_RCU_NODES > 1)
 708                 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
 709
 710         raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
 711
 712         /* Wait for snapshotted ->blocked_tasks[] lists to drain. */
 713         rnp = rcu_get_root(rsp);
 714         wait_event(sync_rcu_preempt_exp_wq,
 715                    sync_rcu_preempt_exp_done(rnp));
 716
 717         /* Clean up and exit. */
 718         smp_mb(); /* ensure expedited GP seen before counter increment. */
 719         ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
 720 unlock_mb_ret:
 721         mutex_unlock(&sync_rcu_preempt_exp_mutex);
 722 mb_ret:
 723         smp_mb(); /* ensure subsequent action seen after grace period. */
 724 }
 725 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 726
 727 /*
 728  * Check to see if there is any immediate preemptable-RCU-related work
 729  * to be done.
 730  */
 731 static int rcu_preempt_pending(int cpu)
 732 {
 733         return __rcu_pending(&rcu_preempt_state,
 734                              &per_cpu(rcu_preempt_data, cpu));
 735 }
 736
 737 /*
 738  * Does preemptable RCU need the CPU to stay out of dynticks mode?
 739  */
 740 static int rcu_preempt_needs_cpu(int cpu)
 741 {
 742         return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
 743 }
 744
 745 /**
 746  * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
 747  */
 748 void rcu_barrier(void)
 749 {
 750         _rcu_barrier(&rcu_preempt_state, call_rcu);
 751 }
 752 EXPORT_SYMBOL_GPL(rcu_barrier);
 753
 754 /*
 755  * Initialize preemptable RCU's per-CPU data.
 756  */
 757 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 758 {
 759         rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
 760 }
 761
 762 /*
 763  * Move preemptable RCU's callbacks to ->orphan_cbs_list.
 764  */
 765 static void rcu_preempt_send_cbs_to_orphanage(void)
 766 {
 767         rcu_send_cbs_to_orphanage(&rcu_preempt_state);
 768 }
 769
 770 /*
 771  * Initialize preemptable RCU's state structures.
 772  */
 773 static void __init __rcu_init_preempt(void)
 774 {
 775         RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data);
 776 }
 777
 778 /*
 779  * Check for a task exiting while in a preemptable-RCU read-side
 780  * critical section, clean up if so.  No need to issue warnings,
 781  * as debug_check_no_locks_held() already does this if lockdep
 782  * is enabled.
 783  */
 784 void exit_rcu(void)
 785 {
 786         struct task_struct *t = current;
 787
 788         if (t->rcu_read_lock_nesting == 0)
 789                 return;
 790         t->rcu_read_lock_nesting = 1;
 791         rcu_read_unlock();
 792 }
 793
 794 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 795
 796 /*
 797  * Tell them what RCU they are running.
 798  */
 799 static void __init rcu_bootup_announce(void)
 800 {
 801         printk(KERN_INFO "Hierarchical RCU implementation.\n");
 802         rcu_bootup_announce_oddness();
 803 }
 804
 805 /*
 806  * Return the number of RCU batches processed thus far for debug & stats.
 807  */
 808 long rcu_batches_completed(void)
 809 {
 810         return rcu_batches_completed_sched();
 811 }
 812 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 813
 814 /*
 815  * Force a quiescent state for RCU, which, because there is no preemptible
 816  * RCU, becomes the same as rcu-sched.
 817  */
 818 void rcu_force_quiescent_state(void)
 819 {
 820         rcu_sched_force_quiescent_state();
 821 }
 822 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 823
 824 /*
 825  * Because preemptable RCU does not exist, we never have to check for
 826  * CPUs being in quiescent states.
 827  */
 828 static void rcu_preempt_note_context_switch(int cpu)
 829 {
 830 }
 831
 832 /*
 833  * Because preemptable RCU does not exist, there are never any preempted
 834  * RCU readers.
 835  */
 836 static int rcu_preempted_readers(struct rcu_node *rnp)
 837 {
 838         return 0;
 839 }
 840
 841 #ifdef CONFIG_HOTPLUG_CPU
 842
 843 /* Because preemptible RCU does not exist, no quieting of tasks. */
 844 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
 845 {
 846         raw_spin_unlock_irqrestore(&rnp->lock, flags);
 847 }
 848
 849 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 850
 851 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 852
 853 /*
 854  * Because preemptable RCU does not exist, we never have to check for
 855  * tasks blocked within RCU read-side critical sections.
 856  */
 857 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
 858 {
 859 }
 860
 861 /*
 862  * Because preemptable RCU does not exist, we never have to check for
 863  * tasks blocked within RCU read-side critical sections.
 864  */
 865 static void rcu_print_task_stall(struct rcu_node *rnp)
 866 {
 867 }
 868
 869 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 870
 871 /*
 872  * Because there is no preemptable RCU, there can be no readers blocked,
 873  * so there is no need to check for blocked tasks.  So check only for
 874  * bogus qsmask values.
 875  */
 876 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 877 {
 878         WARN_ON_ONCE(rnp->qsmask);
 879 }
 880
 881 #ifdef CONFIG_HOTPLUG_CPU
 882
 883 /*
 884  * Because preemptable RCU does not exist, it never needs to migrate
 885  * tasks that were blocked within RCU read-side critical sections, and
 886  * such non-existent tasks cannot possibly have been blocking the current
 887  * grace period.
 888  */
 889 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
 890                                      struct rcu_node *rnp,
 891                                      struct rcu_data *rdp)
 892 {
 893         return 0;
 894 }
 895
 896 /*
 897  * Because preemptable RCU does not exist, it never needs CPU-offline
 898  * processing.
 899  */
 900 static void rcu_preempt_offline_cpu(int cpu)
 901 {
 902 }
 903
 904 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 905
 906 /*
 907  * Because preemptable RCU does not exist, it never has any callbacks
 908  * to check.
 909  */
 910 static void rcu_preempt_check_callbacks(int cpu)
 911 {
 912 }
 913
 914 /*
 915  * Because preemptable RCU does not exist, it never has any callbacks
 916  * to process.
 917  */
 918 static void rcu_preempt_process_callbacks(void)
 919 {
 920 }
 921
 922 /*
 923  * In classic RCU, call_rcu() is just call_rcu_sched().
 924  */
 925 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 926 {
 927         call_rcu_sched(head, func);
 928 }
 929 EXPORT_SYMBOL_GPL(call_rcu);
 930
 931 /*
 932  * Wait for an rcu-preempt grace period, but make it happen quickly.
 933  * But because preemptable RCU does not exist, map to rcu-sched.
 934  */
 935 void synchronize_rcu_expedited(void)
 936 {
 937         synchronize_sched_expedited();
 938 }
 939 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 940
 941 #ifdef CONFIG_HOTPLUG_CPU
 942
 943 /*
 944  * Because preemptable RCU does not exist, there is never any need to
 945  * report on tasks preempted in RCU read-side critical sections during
 946  * expedited RCU grace periods.
 947  */
 948 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
 949 {
 950         return;
 951 }
 952
 953 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 954
 955 /*
 956  * Because preemptable RCU does not exist, it never has any work to do.
 957  */
 958 static int rcu_preempt_pending(int cpu)
 959 {
 960         return 0;
 961 }
 962
 963 /*
 964  * Because preemptable RCU does not exist, it never needs any CPU.
 965  */
 966 static int rcu_preempt_needs_cpu(int cpu)
 967 {
 968         return 0;
 969 }
 970
 971 /*
 972  * Because preemptable RCU does not exist, rcu_barrier() is just
 973  * another name for rcu_barrier_sched().
 974  */
 975 void rcu_barrier(void)
 976 {
 977         rcu_barrier_sched();
 978 }
 979 EXPORT_SYMBOL_GPL(rcu_barrier);
 980
 981 /*
 982  * Because preemptable RCU does not exist, there is no per-CPU
 983  * data to initialize.
 984  */
 985 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 986 {
 987 }
 988
 989 /*
 990  * Because there is no preemptable RCU, there are no callbacks to move.
 991  */
 992 static void rcu_preempt_send_cbs_to_orphanage(void)
 993 {
 994 }
 995
 996 /*
 997  * Because preemptable RCU does not exist, it need not be initialized.
 998  */
 999 static void __init __rcu_init_preempt(void)
1000 {
1001 }
1002
1003 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1004
1005 #if !defined(CONFIG_RCU_FAST_NO_HZ)
1006
1007 /*
1008  * Check to see if any future RCU-related work will need to be done
1009  * by the current CPU, even if none need be done immediately, returning
1010  * 1 if so.  This function is part of the RCU implementation; it is -not-
1011  * an exported member of the RCU API.
1012  *
1013  * Because we have preemptible RCU, just check whether this CPU needs
1014  * any flavor of RCU.  Do not chew up lots of CPU cycles with preemption
1015  * disabled in a most-likely vain attempt to cause RCU not to need this CPU.
1016  */
1017 int rcu_needs_cpu(int cpu)
1018 {
1019         return rcu_needs_cpu_quick_check(cpu);
1020 }
1021
1022 /*
1023  * Check to see if we need to continue a callback-flush operations to
1024  * allow the last CPU to enter dyntick-idle mode.  But fast dyntick-idle
1025  * entry is not configured, so we never do need to.
1026  */
1027 static void rcu_needs_cpu_flush(void)
1028 {
1029 }
1030
1031 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1032
1033 #define RCU_NEEDS_CPU_FLUSHES 5
1034 static DEFINE_PER_CPU(int, rcu_dyntick_drain);
1035 static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
1036
1037 /*
1038  * Check to see if any future RCU-related work will need to be done
1039  * by the current CPU, even if none need be done immediately, returning
1040  * 1 if so.  This function is part of the RCU implementation; it is -not-
1041  * an exported member of the RCU API.
1042  *
1043  * Because we are not supporting preemptible RCU, attempt to accelerate
1044  * any current grace periods so that RCU no longer needs this CPU, but
1045  * only if all other CPUs are already in dynticks-idle mode.  This will
1046  * allow the CPU cores to be powered down immediately, as opposed to after
1047  * waiting many milliseconds for grace periods to elapse.
1048  *
1049  * Because it is not legal to invoke rcu_process_callbacks() with irqs
1050  * disabled, we do one pass of force_quiescent_state(), then do a
1051  * raise_softirq() to cause rcu_process_callbacks() to be invoked later.
1052  * The per-cpu rcu_dyntick_drain variable controls the sequencing.
1053  */
1054 int rcu_needs_cpu(int cpu)
1055 {
1056         int c = 0;
1057         int snap;
1058         int snap_nmi;
1059         int thatcpu;
1060
1061         /* Check for being in the holdoff period. */
1062         if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies)
1063                 return rcu_needs_cpu_quick_check(cpu);
1064
1065         /* Don't bother unless we are the last non-dyntick-idle CPU. */
1066         for_each_online_cpu(thatcpu) {
1067                 if (thatcpu == cpu)
1068                         continue;
1069                 snap = per_cpu(rcu_dynticks, thatcpu).dynticks;
1070                 snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi;
1071                 smp_mb(); /* Order sampling of snap with end of grace period. */
1072                 if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) {
1073                         per_cpu(rcu_dyntick_drain, cpu) = 0;
1074                         per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
1075                         return rcu_needs_cpu_quick_check(cpu);
1076                 }
1077         }
1078
1079         /* Check and update the rcu_dyntick_drain sequencing. */
1080         if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
1081                 /* First time through, initialize the counter. */
1082                 per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES;
1083         } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
1084                 /* We have hit the limit, so time to give up. */
1085                 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
1086                 return rcu_needs_cpu_quick_check(cpu);
1087         }
1088
1089         /* Do one step pushing remaining RCU callbacks through. */
1090         if (per_cpu(rcu_sched_data, cpu).nxtlist) {
1091                 rcu_sched_qs(cpu);
1092                 force_quiescent_state(&rcu_sched_state, 0);
1093                 c = c || per_cpu(rcu_sched_data, cpu).nxtlist;
1094         }
1095         if (per_cpu(rcu_bh_data, cpu).nxtlist) {
1096                 rcu_bh_qs(cpu);
1097                 force_quiescent_state(&rcu_bh_state, 0);
1098                 c = c || per_cpu(rcu_bh_data, cpu).nxtlist;
1099         }
1100
1101         /* If RCU callbacks are still pending, RCU still needs this CPU. */
1102         if (c)
1103                 raise_softirq(RCU_SOFTIRQ);
1104         return c;
1105 }
1106
1107 /*
1108  * Check to see if we need to continue a callback-flush operations to
1109  * allow the last CPU to enter dyntick-idle mode.
1110  */
1111 static void rcu_needs_cpu_flush(void)
1112 {
1113         int cpu = smp_processor_id();
1114         unsigned long flags;
1115
1116         if (per_cpu(rcu_dyntick_drain, cpu) <= 0)
1117                 return;
1118         local_irq_save(flags);
1119         (void)rcu_needs_cpu(cpu);
1120         local_irq_restore(flags);
1121 }
1122
1123 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */