1 /*
2 * Read-Copy Update mechanism for mutual exclusion
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; If not, see <http://www.gnu.org/licenses/>.
16 *
17 * Copyright (C) IBM Corporation, 2001
18 *
19 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
20 * Manfred Spraul <manfred@colorfullife.com>
21 *
22 * Modifications for Xen: Jose Renato Santos
23 * Copyright (C) Hewlett-Packard, 2006
24 *
25 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
26 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
27 * Papers:
28 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
29 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
30 *
31 * For detailed explanation of Read-Copy Update mechanism see -
32 * http://lse.sourceforge.net/locking/rcupdate.html
33 */
34 #include <xen/types.h>
35 #include <xen/kernel.h>
36 #include <xen/init.h>
37 #include <xen/param.h>
38 #include <xen/spinlock.h>
39 #include <xen/smp.h>
40 #include <xen/rcupdate.h>
41 #include <xen/sched.h>
42 #include <asm/atomic.h>
43 #include <xen/bitops.h>
44 #include <xen/percpu.h>
45 #include <xen/softirq.h>
46 #include <xen/cpu.h>
47 #include <xen/stop_machine.h>
48
49 DEFINE_PER_CPU(unsigned int, rcu_lock_cnt);
50
51 /* Global control variables for rcupdate callback mechanism. */
52 static struct rcu_ctrlblk {
53 long cur; /* Current batch number. */
54 long completed; /* Number of the last completed batch */
55 int next_pending; /* Is the next batch already waiting? */
56
57 spinlock_t lock __cacheline_aligned;
58 cpumask_t cpumask; /* CPUs that need to switch in order ... */
59 cpumask_t idle_cpumask; /* ... unless they are already idle */
60 /* for current batch to proceed. */
61 } __cacheline_aligned rcu_ctrlblk = {
62 .cur = -300,
63 .completed = -300,
64 .lock = SPIN_LOCK_UNLOCKED,
65 };
66
67 /*
68 * Per-CPU data for Read-Copy Update.
69 * nxtlist - new callbacks are added here
70 * curlist - current batch for which quiescent cycle started if any
71 */
72 struct rcu_data {
73 /* 1) quiescent state handling : */
74 long quiescbatch; /* Batch # for grace period */
75 int qs_pending; /* core waits for quiesc state */
76
77 /* 2) batch handling */
78 long batch; /* Batch # for current RCU batch */
79 struct rcu_head *nxtlist;
80 struct rcu_head **nxttail;
81 long qlen; /* # of queued callbacks */
82 struct rcu_head *curlist;
83 struct rcu_head **curtail;
84 struct rcu_head *donelist;
85 struct rcu_head **donetail;
86 long blimit; /* Upper limit on a processed batch */
87 int cpu;
88 long last_rs_qlen; /* qlen during the last resched */
89
90 /* 3) idle CPUs handling */
91 struct timer idle_timer;
92 bool idle_timer_active;
93
94 bool process_callbacks;
95 bool barrier_active;
96 };
97
98 /*
99 * If a CPU with RCU callbacks queued goes idle, when the grace period is
100 * not finished yet, how can we make sure that the callbacks will eventually
101 * be executed? In Linux (2.6.21, the first "tickless idle" Linux kernel),
102 * the periodic timer tick would not be stopped for such CPU. Here in Xen,
103 * we (may) don't even have a periodic timer tick, so we need to use a
104 * special purpose timer.
105 *
106 * Such timer:
107 * 1) is armed only when a CPU with an RCU callback(s) queued goes idle
108 * before the end of the current grace period (_not_ for any CPUs that
109 * go idle!);
110 * 2) when it fires, it is only re-armed if the grace period is still
111 * running;
112 * 3) it is stopped immediately, if the CPU wakes up from idle and
113 * resumes 'normal' execution.
114 *
115 * About how far in the future the timer should be programmed each time,
116 * it's hard to tell (guess!!). Since this mimics Linux's periodic timer
117 * tick, take values used there as an indication. In Linux 2.6.21, tick
118 * period can be 10ms, 4ms, 3.33ms or 1ms.
119 *
120 * By default, we use 10ms, to enable at least some power saving on the
121 * CPU that is going idle. The user can change this, via a boot time
122 * parameter, but only up to 100ms.
123 */
124 #define IDLE_TIMER_PERIOD_MAX MILLISECS(100)
125 #define IDLE_TIMER_PERIOD_DEFAULT MILLISECS(10)
126 #define IDLE_TIMER_PERIOD_MIN MICROSECS(100)
127
128 static s_time_t __read_mostly idle_timer_period;
129
130 /*
131 * Increment and decrement values for the idle timer handler. The algorithm
132 * works as follows:
133 * - if the timer actually fires, and it finds out that the grace period isn't
134 * over yet, we add IDLE_TIMER_PERIOD_INCR to the timer's period;
135 * - if the timer actually fires and it finds the grace period over, we
136 * subtract IDLE_TIMER_PERIOD_DECR from the timer's period.
137 */
138 #define IDLE_TIMER_PERIOD_INCR MILLISECS(10)
139 #define IDLE_TIMER_PERIOD_DECR MICROSECS(100)
140
141 static DEFINE_PER_CPU(struct rcu_data, rcu_data);
142
143 static int blimit = 10;
144 static int qhimark = 10000;
145 static int qlowmark = 100;
146 static int rsinterval = 1000;
147
148 /*
149 * rcu_barrier() handling:
150 * Two counters are used to synchronize rcu_barrier() work:
151 * - cpu_count holds the number of cpus required to finish barrier handling.
152 * It is decremented by each cpu when it has performed all pending rcu calls.
153 * - pending_count shows whether any rcu_barrier() activity is running and
154 * it is used to synchronize leaving rcu_barrier() only after all cpus
155 * have finished their processing. pending_count is initialized to nr_cpus + 1
156 * and it is decremented by each cpu when it has seen that cpu_count has
157 * reached 0. The cpu where rcu_barrier() has been called will wait until
158 * pending_count has been decremented to 1 (so all cpus have seen cpu_count
159 * reaching 0) and will then set pending_count to 0 indicating there is no
160 * rcu_barrier() running.
161 * Cpus are synchronized via softirq mechanism. rcu_barrier() is regarded to
162 * be active if pending_count is not zero. In case rcu_barrier() is called on
163 * multiple cpus it is enough to check for pending_count being not zero on entry
164 * and to call process_pending_softirqs() in a loop until pending_count drops to
165 * zero, before starting the new rcu_barrier() processing.
166 */
167 static atomic_t cpu_count = ATOMIC_INIT(0);
168 static atomic_t pending_count = ATOMIC_INIT(0);
169
rcu_barrier_callback(struct rcu_head * head)170 static void rcu_barrier_callback(struct rcu_head *head)
171 {
172 /*
173 * We need a barrier making all previous writes visible to other cpus
174 * before doing the atomic_dec(). This would be something like
175 * smp_mb__before_atomic() limited to writes, which isn't existing.
176 * So we choose the best alternative available which is smp_wmb()
177 * (correct on Arm and only a minor impact on x86, while
178 * smp_mb__before_atomic() would be correct on x86, but with a larger
179 * impact on Arm).
180 */
181 smp_wmb();
182 atomic_dec(&cpu_count);
183 }
184
rcu_barrier_action(void)185 static void rcu_barrier_action(void)
186 {
187 struct rcu_head head;
188
189 /*
190 * When callback is executed, all previously-queued RCU work on this CPU
191 * is completed. When all CPUs have executed their callback, cpu_count
192 * will have been decremented to 0.
193 */
194 call_rcu(&head, rcu_barrier_callback);
195
196 while ( atomic_read(&cpu_count) )
197 {
198 process_pending_softirqs();
199 cpu_relax();
200 }
201
202 smp_mb__before_atomic();
203 atomic_dec(&pending_count);
204 }
205
rcu_barrier(void)206 void rcu_barrier(void)
207 {
208 unsigned int n_cpus;
209
210 ASSERT(!in_irq() && local_irq_is_enabled());
211
212 for ( ; ; )
213 {
214 if ( !atomic_read(&pending_count) && get_cpu_maps() )
215 {
216 n_cpus = num_online_cpus();
217
218 if ( atomic_cmpxchg(&pending_count, 0, n_cpus + 1) == 0 )
219 break;
220
221 put_cpu_maps();
222 }
223
224 process_pending_softirqs();
225 cpu_relax();
226 }
227
228 atomic_set(&cpu_count, n_cpus);
229 cpumask_raise_softirq(&cpu_online_map, RCU_SOFTIRQ);
230
231 while ( atomic_read(&pending_count) != 1 )
232 {
233 process_pending_softirqs();
234 cpu_relax();
235 }
236
237 atomic_set(&pending_count, 0);
238
239 put_cpu_maps();
240 }
241
242 /* Is batch a before batch b ? */
rcu_batch_before(long a,long b)243 static inline int rcu_batch_before(long a, long b)
244 {
245 return (a - b) < 0;
246 }
247
force_quiescent_state(struct rcu_data * rdp,struct rcu_ctrlblk * rcp)248 static void force_quiescent_state(struct rcu_data *rdp,
249 struct rcu_ctrlblk *rcp)
250 {
251 cpumask_t cpumask;
252 raise_softirq(RCU_SOFTIRQ);
253 if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) {
254 rdp->last_rs_qlen = rdp->qlen;
255 /*
256 * Don't send IPI to itself. With irqs disabled,
257 * rdp->cpu is the current cpu.
258 */
259 cpumask_andnot(&cpumask, &rcp->cpumask, cpumask_of(rdp->cpu));
260 cpumask_raise_softirq(&cpumask, RCU_SOFTIRQ);
261 }
262 }
263
264 /**
265 * call_rcu - Queue an RCU callback for invocation after a grace period.
266 * @head: structure to be used for queueing the RCU updates.
267 * @func: actual update function to be invoked after the grace period
268 *
269 * The update function will be invoked some time after a full grace
270 * period elapses, in other words after all currently executing RCU
271 * read-side critical sections have completed. RCU read-side critical
272 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
273 * and may be nested.
274 */
call_rcu(struct rcu_head * head,void (* func)(struct rcu_head * rcu))275 void call_rcu(struct rcu_head *head,
276 void (*func)(struct rcu_head *rcu))
277 {
278 unsigned long flags;
279 struct rcu_data *rdp;
280
281 head->func = func;
282 head->next = NULL;
283 local_irq_save(flags);
284 rdp = &this_cpu(rcu_data);
285 *rdp->nxttail = head;
286 rdp->nxttail = &head->next;
287 if (unlikely(++rdp->qlen > qhimark)) {
288 rdp->blimit = INT_MAX;
289 force_quiescent_state(rdp, &rcu_ctrlblk);
290 }
291 local_irq_restore(flags);
292 }
293
294 /*
295 * Invoke the completed RCU callbacks. They are expected to be in
296 * a per-cpu list.
297 */
rcu_do_batch(struct rcu_data * rdp)298 static void rcu_do_batch(struct rcu_data *rdp)
299 {
300 struct rcu_head *next, *list;
301 int count = 0;
302
303 list = rdp->donelist;
304 while (list) {
305 next = rdp->donelist = list->next;
306 list->func(list);
307 list = next;
308 rdp->qlen--;
309 if (++count >= rdp->blimit)
310 break;
311 }
312 if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
313 rdp->blimit = blimit;
314 if (!rdp->donelist)
315 rdp->donetail = &rdp->donelist;
316 else
317 {
318 rdp->process_callbacks = true;
319 raise_softirq(RCU_SOFTIRQ);
320 }
321 }
322
323 /*
324 * Grace period handling:
325 * The grace period handling consists out of two steps:
326 * - A new grace period is started.
327 * This is done by rcu_start_batch. The start is not broadcasted to
328 * all cpus, they must pick this up by comparing rcp->cur with
329 * rdp->quiescbatch. All cpus are recorded in the
330 * rcu_ctrlblk.cpumask bitmap.
331 * - All cpus must go through a quiescent state.
332 * Since the start of the grace period is not broadcasted, at least two
333 * calls to rcu_check_quiescent_state are required:
334 * The first call just notices that a new grace period is running. The
335 * following calls check if there was a quiescent state since the beginning
336 * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
337 * the bitmap is empty, then the grace period is completed.
338 * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
339 * period (if necessary).
340 */
341 /*
342 * Register a new batch of callbacks, and start it up if there is currently no
343 * active batch and the batch to be registered has not already occurred.
344 * Caller must hold rcu_ctrlblk.lock.
345 */
rcu_start_batch(struct rcu_ctrlblk * rcp)346 static void rcu_start_batch(struct rcu_ctrlblk *rcp)
347 {
348 if (rcp->next_pending &&
349 rcp->completed == rcp->cur) {
350 rcp->next_pending = 0;
351 /*
352 * next_pending == 0 must be visible in
353 * __rcu_process_callbacks() before it can see new value of cur.
354 */
355 smp_wmb();
356 rcp->cur++;
357
358 /*
359 * Make sure the increment of rcp->cur is visible so, even if a
360 * CPU that is about to go idle, is captured inside rcp->cpumask,
361 * rcu_pending() will return false, which then means cpu_quiet()
362 * will be invoked, before the CPU would actually enter idle.
363 *
364 * This barrier is paired with the one in rcu_idle_enter().
365 */
366 smp_mb();
367 cpumask_andnot(&rcp->cpumask, &cpu_online_map, &rcp->idle_cpumask);
368 }
369 }
370
371 /*
372 * cpu went through a quiescent state since the beginning of the grace period.
373 * Clear it from the cpu mask and complete the grace period if it was the last
374 * cpu. Start another grace period if someone has further entries pending
375 */
cpu_quiet(int cpu,struct rcu_ctrlblk * rcp)376 static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
377 {
378 cpumask_clear_cpu(cpu, &rcp->cpumask);
379 if (cpumask_empty(&rcp->cpumask)) {
380 /* batch completed ! */
381 rcp->completed = rcp->cur;
382 rcu_start_batch(rcp);
383 }
384 }
385
386 /*
387 * Check if the cpu has gone through a quiescent state (say context
388 * switch). If so and if it already hasn't done so in this RCU
389 * quiescent cycle, then indicate that it has done so.
390 */
rcu_check_quiescent_state(struct rcu_ctrlblk * rcp,struct rcu_data * rdp)391 static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
392 struct rcu_data *rdp)
393 {
394 if (rdp->quiescbatch != rcp->cur) {
395 /* start new grace period: */
396 rdp->qs_pending = 1;
397 rdp->quiescbatch = rcp->cur;
398 return;
399 }
400
401 /* Grace period already completed for this cpu?
402 * qs_pending is checked instead of the actual bitmap to avoid
403 * cacheline trashing.
404 */
405 if (!rdp->qs_pending)
406 return;
407
408 rdp->qs_pending = 0;
409
410 spin_lock(&rcp->lock);
411 /*
412 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
413 * during cpu startup. Ignore the quiescent state.
414 */
415 if (likely(rdp->quiescbatch == rcp->cur))
416 cpu_quiet(rdp->cpu, rcp);
417
418 spin_unlock(&rcp->lock);
419 }
420
421
422 /*
423 * This does the RCU processing work from softirq context.
424 */
__rcu_process_callbacks(struct rcu_ctrlblk * rcp,struct rcu_data * rdp)425 static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
426 struct rcu_data *rdp)
427 {
428 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
429 *rdp->donetail = rdp->curlist;
430 rdp->donetail = rdp->curtail;
431 rdp->curlist = NULL;
432 rdp->curtail = &rdp->curlist;
433 }
434
435 local_irq_disable();
436 if (rdp->nxtlist && !rdp->curlist) {
437 rdp->curlist = rdp->nxtlist;
438 rdp->curtail = rdp->nxttail;
439 rdp->nxtlist = NULL;
440 rdp->nxttail = &rdp->nxtlist;
441 local_irq_enable();
442
443 /*
444 * start the next batch of callbacks
445 */
446
447 /* determine batch number */
448 rdp->batch = rcp->cur + 1;
449 /* see the comment and corresponding wmb() in
450 * the rcu_start_batch()
451 */
452 smp_rmb();
453
454 if (!rcp->next_pending) {
455 /* and start it/schedule start if it's a new batch */
456 spin_lock(&rcp->lock);
457 rcp->next_pending = 1;
458 rcu_start_batch(rcp);
459 spin_unlock(&rcp->lock);
460 }
461 } else {
462 local_irq_enable();
463 }
464 rcu_check_quiescent_state(rcp, rdp);
465 if (rdp->donelist)
466 rcu_do_batch(rdp);
467 }
468
rcu_process_callbacks(void)469 static void rcu_process_callbacks(void)
470 {
471 struct rcu_data *rdp = &this_cpu(rcu_data);
472
473 if ( rdp->process_callbacks )
474 {
475 rdp->process_callbacks = false;
476 __rcu_process_callbacks(&rcu_ctrlblk, rdp);
477 }
478
479 if ( atomic_read(&cpu_count) && !rdp->barrier_active )
480 {
481 rdp->barrier_active = true;
482 rcu_barrier_action();
483 rdp->barrier_active = false;
484 }
485 }
486
__rcu_pending(struct rcu_ctrlblk * rcp,struct rcu_data * rdp)487 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
488 {
489 /* This cpu has pending rcu entries and the grace period
490 * for them has completed.
491 */
492 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
493 return 1;
494
495 /* This cpu has no pending entries, but there are new entries */
496 if (!rdp->curlist && rdp->nxtlist)
497 return 1;
498
499 /* This cpu has finished callbacks to invoke */
500 if (rdp->donelist)
501 return 1;
502
503 /* The rcu core waits for a quiescent state from the cpu */
504 if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
505 return 1;
506
507 /* nothing to do */
508 return 0;
509 }
510
rcu_pending(int cpu)511 int rcu_pending(int cpu)
512 {
513 return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu));
514 }
515
516 /*
517 * Check to see if any future RCU-related work will need to be done
518 * by the current CPU, even if none need be done immediately, returning
519 * 1 if so. This function is part of the RCU implementation; it is -not-
520 * an exported member of the RCU API.
521 */
rcu_needs_cpu(int cpu)522 int rcu_needs_cpu(int cpu)
523 {
524 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
525
526 return (rdp->curlist && !rdp->idle_timer_active) || rcu_pending(cpu);
527 }
528
529 /*
530 * Timer for making sure the CPU where a callback is queued does
531 * periodically poke rcu_pedning(), so that it will invoke the callback
532 * not too late after the end of the grace period.
533 */
rcu_idle_timer_start(void)534 static void rcu_idle_timer_start(void)
535 {
536 struct rcu_data *rdp = &this_cpu(rcu_data);
537
538 /*
539 * Note that we don't check rcu_pending() here. In fact, we don't want
540 * the timer armed on CPUs that are in the process of quiescing while
541 * going idle, unless they really are the ones with a queued callback.
542 */
543 if (likely(!rdp->curlist))
544 return;
545
546 set_timer(&rdp->idle_timer, NOW() + idle_timer_period);
547 rdp->idle_timer_active = true;
548 }
549
rcu_idle_timer_stop(void)550 static void rcu_idle_timer_stop(void)
551 {
552 struct rcu_data *rdp = &this_cpu(rcu_data);
553
554 if (likely(!rdp->idle_timer_active))
555 return;
556
557 rdp->idle_timer_active = false;
558
559 /*
560 * In general, as the CPU is becoming active again, we don't need the
561 * idle timer, and so we want to stop it.
562 *
563 * However, in case we are here because idle_timer has (just) fired and
564 * has woken up the CPU, we skip stop_timer() now. In fact, when a CPU
565 * wakes up from idle, this code always runs before do_softirq() has the
566 * chance to check and deal with TIMER_SOFTIRQ. And if we stop the timer
567 * now, the TIMER_SOFTIRQ handler will see it as inactive, and will not
568 * call rcu_idle_timer_handler().
569 *
570 * Therefore, if we see that the timer is expired already, we leave it
571 * alone. The TIMER_SOFTIRQ handler will then run the timer routine, and
572 * deactivate it.
573 */
574 if ( !timer_is_expired(&rdp->idle_timer) )
575 stop_timer(&rdp->idle_timer);
576 }
577
rcu_idle_timer_handler(void * data)578 static void rcu_idle_timer_handler(void* data)
579 {
580 perfc_incr(rcu_idle_timer);
581
582 if ( !cpumask_empty(&rcu_ctrlblk.cpumask) )
583 idle_timer_period = min(idle_timer_period + IDLE_TIMER_PERIOD_INCR,
584 IDLE_TIMER_PERIOD_MAX);
585 else
586 idle_timer_period = max(idle_timer_period - IDLE_TIMER_PERIOD_DECR,
587 IDLE_TIMER_PERIOD_MIN);
588 }
589
rcu_check_callbacks(int cpu)590 void rcu_check_callbacks(int cpu)
591 {
592 struct rcu_data *rdp = &this_cpu(rcu_data);
593
594 rdp->process_callbacks = true;
595 raise_softirq(RCU_SOFTIRQ);
596 }
597
rcu_move_batch(struct rcu_data * this_rdp,struct rcu_head * list,struct rcu_head ** tail)598 static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
599 struct rcu_head **tail)
600 {
601 local_irq_disable();
602 *this_rdp->nxttail = list;
603 if (list)
604 this_rdp->nxttail = tail;
605 local_irq_enable();
606 }
607
rcu_offline_cpu(struct rcu_data * this_rdp,struct rcu_ctrlblk * rcp,struct rcu_data * rdp)608 static void rcu_offline_cpu(struct rcu_data *this_rdp,
609 struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
610 {
611 kill_timer(&rdp->idle_timer);
612
613 /* If the cpu going offline owns the grace period we can block
614 * indefinitely waiting for it, so flush it here.
615 */
616 spin_lock(&rcp->lock);
617 if (rcp->cur != rcp->completed)
618 cpu_quiet(rdp->cpu, rcp);
619 spin_unlock(&rcp->lock);
620
621 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
622 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
623 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
624
625 local_irq_disable();
626 this_rdp->qlen += rdp->qlen;
627 local_irq_enable();
628 }
629
rcu_init_percpu_data(int cpu,struct rcu_ctrlblk * rcp,struct rcu_data * rdp)630 static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
631 struct rcu_data *rdp)
632 {
633 memset(rdp, 0, sizeof(*rdp));
634 rdp->curtail = &rdp->curlist;
635 rdp->nxttail = &rdp->nxtlist;
636 rdp->donetail = &rdp->donelist;
637 rdp->quiescbatch = rcp->completed;
638 rdp->qs_pending = 0;
639 rdp->cpu = cpu;
640 rdp->blimit = blimit;
641 init_timer(&rdp->idle_timer, rcu_idle_timer_handler, rdp, cpu);
642 }
643
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)644 static int cpu_callback(
645 struct notifier_block *nfb, unsigned long action, void *hcpu)
646 {
647 unsigned int cpu = (unsigned long)hcpu;
648 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
649
650 switch ( action )
651 {
652 case CPU_UP_PREPARE:
653 rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
654 break;
655 case CPU_UP_CANCELED:
656 case CPU_DEAD:
657 rcu_offline_cpu(&this_cpu(rcu_data), &rcu_ctrlblk, rdp);
658 break;
659 default:
660 break;
661 }
662
663 return NOTIFY_DONE;
664 }
665
666 static struct notifier_block cpu_nfb = {
667 .notifier_call = cpu_callback
668 };
669
rcu_init(void)670 void __init rcu_init(void)
671 {
672 void *cpu = (void *)(long)smp_processor_id();
673 static unsigned int __initdata idle_timer_period_ms =
674 IDLE_TIMER_PERIOD_DEFAULT / MILLISECS(1);
675 integer_param("rcu-idle-timer-period-ms", idle_timer_period_ms);
676
677 /* We don't allow 0, or anything higher than IDLE_TIMER_PERIOD_MAX */
678 if ( idle_timer_period_ms == 0 ||
679 idle_timer_period_ms > IDLE_TIMER_PERIOD_MAX / MILLISECS(1) )
680 {
681 idle_timer_period_ms = IDLE_TIMER_PERIOD_DEFAULT / MILLISECS(1);
682 printk("WARNING: rcu-idle-timer-period-ms outside of "
683 "(0,%"PRI_stime"]. Resetting it to %u.\n",
684 IDLE_TIMER_PERIOD_MAX / MILLISECS(1), idle_timer_period_ms);
685 }
686 idle_timer_period = MILLISECS(idle_timer_period_ms);
687
688 cpumask_clear(&rcu_ctrlblk.idle_cpumask);
689 cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
690 register_cpu_notifier(&cpu_nfb);
691 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
692 }
693
694 /*
695 * The CPU is becoming idle, so no more read side critical
696 * sections, and one more step toward grace period.
697 */
rcu_idle_enter(unsigned int cpu)698 void rcu_idle_enter(unsigned int cpu)
699 {
700 ASSERT(!cpumask_test_cpu(cpu, &rcu_ctrlblk.idle_cpumask));
701 cpumask_set_cpu(cpu, &rcu_ctrlblk.idle_cpumask);
702 /*
703 * If some other CPU is starting a new grace period, we'll notice that
704 * by seeing a new value in rcp->cur (different than our quiescbatch).
705 * That will force us all the way until cpu_quiet(), clearing our bit
706 * in rcp->cpumask, even in case we managed to get in there.
707 *
708 * Se the comment before cpumask_andnot() in rcu_start_batch().
709 */
710 smp_mb();
711
712 rcu_idle_timer_start();
713 }
714
rcu_idle_exit(unsigned int cpu)715 void rcu_idle_exit(unsigned int cpu)
716 {
717 rcu_idle_timer_stop();
718 ASSERT(cpumask_test_cpu(cpu, &rcu_ctrlblk.idle_cpumask));
719 cpumask_clear_cpu(cpu, &rcu_ctrlblk.idle_cpumask);
720 }
721