1 /*
2  * Read-Copy Update mechanism for mutual exclusion
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; If not, see <http://www.gnu.org/licenses/>.
16  *
17  * Copyright (C) IBM Corporation, 2001
18  *
19  * Authors: Dipankar Sarma <dipankar@in.ibm.com>
20  *          Manfred Spraul <manfred@colorfullife.com>
21  *
22  * Modifications for Xen: Jose Renato Santos
23  * Copyright (C) Hewlett-Packard, 2006
24  *
25  * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
26  * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
27  * Papers:
28  * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
29  * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
30  *
31  * For detailed explanation of Read-Copy Update mechanism see -
32  * http://lse.sourceforge.net/locking/rcupdate.html
33  */
34 #include <xen/types.h>
35 #include <xen/kernel.h>
36 #include <xen/init.h>
37 #include <xen/param.h>
38 #include <xen/spinlock.h>
39 #include <xen/smp.h>
40 #include <xen/rcupdate.h>
41 #include <xen/sched.h>
42 #include <asm/atomic.h>
43 #include <xen/bitops.h>
44 #include <xen/percpu.h>
45 #include <xen/softirq.h>
46 #include <xen/cpu.h>
47 #include <xen/stop_machine.h>
48 
49 DEFINE_PER_CPU(unsigned int, rcu_lock_cnt);
50 
51 /* Global control variables for rcupdate callback mechanism. */
52 static struct rcu_ctrlblk {
53     long cur;           /* Current batch number.                      */
54     long completed;     /* Number of the last completed batch         */
55     int  next_pending;  /* Is the next batch already waiting?         */
56 
57     spinlock_t  lock __cacheline_aligned;
58     cpumask_t   cpumask; /* CPUs that need to switch in order ... */
59     cpumask_t   idle_cpumask; /* ... unless they are already idle */
60     /* for current batch to proceed.        */
61 } __cacheline_aligned rcu_ctrlblk = {
62     .cur = -300,
63     .completed = -300,
64     .lock = SPIN_LOCK_UNLOCKED,
65 };
66 
67 /*
68  * Per-CPU data for Read-Copy Update.
69  * nxtlist - new callbacks are added here
70  * curlist - current batch for which quiescent cycle started if any
71  */
72 struct rcu_data {
73     /* 1) quiescent state handling : */
74     long quiescbatch;    /* Batch # for grace period */
75     int  qs_pending;     /* core waits for quiesc state */
76 
77     /* 2) batch handling */
78     long            batch;            /* Batch # for current RCU batch */
79     struct rcu_head *nxtlist;
80     struct rcu_head **nxttail;
81     long            qlen;             /* # of queued callbacks */
82     struct rcu_head *curlist;
83     struct rcu_head **curtail;
84     struct rcu_head *donelist;
85     struct rcu_head **donetail;
86     long            blimit;           /* Upper limit on a processed batch */
87     int cpu;
88     long            last_rs_qlen;     /* qlen during the last resched */
89 
90     /* 3) idle CPUs handling */
91     struct timer idle_timer;
92     bool idle_timer_active;
93 
94     bool            process_callbacks;
95     bool            barrier_active;
96 };
97 
98 /*
99  * If a CPU with RCU callbacks queued goes idle, when the grace period is
100  * not finished yet, how can we make sure that the callbacks will eventually
101  * be executed? In Linux (2.6.21, the first "tickless idle" Linux kernel),
102  * the periodic timer tick would not be stopped for such CPU. Here in Xen,
103  * we (may) don't even have a periodic timer tick, so we need to use a
104  * special purpose timer.
105  *
106  * Such timer:
107  * 1) is armed only when a CPU with an RCU callback(s) queued goes idle
108  *    before the end of the current grace period (_not_ for any CPUs that
109  *    go idle!);
110  * 2) when it fires, it is only re-armed if the grace period is still
111  *    running;
112  * 3) it is stopped immediately, if the CPU wakes up from idle and
113  *    resumes 'normal' execution.
114  *
115  * About how far in the future the timer should be programmed each time,
116  * it's hard to tell (guess!!). Since this mimics Linux's periodic timer
117  * tick, take values used there as an indication. In Linux 2.6.21, tick
118  * period can be 10ms, 4ms, 3.33ms or 1ms.
119  *
120  * By default, we use 10ms, to enable at least some power saving on the
121  * CPU that is going idle. The user can change this, via a boot time
122  * parameter, but only up to 100ms.
123  */
124 #define IDLE_TIMER_PERIOD_MAX     MILLISECS(100)
125 #define IDLE_TIMER_PERIOD_DEFAULT MILLISECS(10)
126 #define IDLE_TIMER_PERIOD_MIN     MICROSECS(100)
127 
128 static s_time_t __read_mostly idle_timer_period;
129 
130 /*
131  * Increment and decrement values for the idle timer handler. The algorithm
132  * works as follows:
133  * - if the timer actually fires, and it finds out that the grace period isn't
134  *   over yet, we add IDLE_TIMER_PERIOD_INCR to the timer's period;
135  * - if the timer actually fires and it finds the grace period over, we
136  *   subtract IDLE_TIMER_PERIOD_DECR from the timer's period.
137  */
138 #define IDLE_TIMER_PERIOD_INCR    MILLISECS(10)
139 #define IDLE_TIMER_PERIOD_DECR    MICROSECS(100)
140 
141 static DEFINE_PER_CPU(struct rcu_data, rcu_data);
142 
143 static int blimit = 10;
144 static int qhimark = 10000;
145 static int qlowmark = 100;
146 static int rsinterval = 1000;
147 
148 /*
149  * rcu_barrier() handling:
150  * Two counters are used to synchronize rcu_barrier() work:
151  * - cpu_count holds the number of cpus required to finish barrier handling.
152  *   It is decremented by each cpu when it has performed all pending rcu calls.
153  * - pending_count shows whether any rcu_barrier() activity is running and
154  *   it is used to synchronize leaving rcu_barrier() only after all cpus
155  *   have finished their processing. pending_count is initialized to nr_cpus + 1
156  *   and it is decremented by each cpu when it has seen that cpu_count has
157  *   reached 0. The cpu where rcu_barrier() has been called will wait until
158  *   pending_count has been decremented to 1 (so all cpus have seen cpu_count
159  *   reaching 0) and will then set pending_count to 0 indicating there is no
160  *   rcu_barrier() running.
161  * Cpus are synchronized via softirq mechanism. rcu_barrier() is regarded to
162  * be active if pending_count is not zero. In case rcu_barrier() is called on
163  * multiple cpus it is enough to check for pending_count being not zero on entry
164  * and to call process_pending_softirqs() in a loop until pending_count drops to
165  * zero, before starting the new rcu_barrier() processing.
166  */
167 static atomic_t cpu_count = ATOMIC_INIT(0);
168 static atomic_t pending_count = ATOMIC_INIT(0);
169 
rcu_barrier_callback(struct rcu_head * head)170 static void rcu_barrier_callback(struct rcu_head *head)
171 {
172     /*
173      * We need a barrier making all previous writes visible to other cpus
174      * before doing the atomic_dec(). This would be something like
175      * smp_mb__before_atomic() limited to writes, which isn't existing.
176      * So we choose the best alternative available which is smp_wmb()
177      * (correct on Arm and only a minor impact on x86, while
178      * smp_mb__before_atomic() would be correct on x86, but with a larger
179      * impact on Arm).
180      */
181     smp_wmb();
182     atomic_dec(&cpu_count);
183 }
184 
rcu_barrier_action(void)185 static void rcu_barrier_action(void)
186 {
187     struct rcu_head head;
188 
189     /*
190      * When callback is executed, all previously-queued RCU work on this CPU
191      * is completed. When all CPUs have executed their callback, cpu_count
192      * will have been decremented to 0.
193      */
194     call_rcu(&head, rcu_barrier_callback);
195 
196     while ( atomic_read(&cpu_count) )
197     {
198         process_pending_softirqs();
199         cpu_relax();
200     }
201 
202     smp_mb__before_atomic();
203     atomic_dec(&pending_count);
204 }
205 
rcu_barrier(void)206 void rcu_barrier(void)
207 {
208     unsigned int n_cpus;
209 
210     ASSERT(!in_irq() && local_irq_is_enabled());
211 
212     for ( ; ; )
213     {
214         if ( !atomic_read(&pending_count) && get_cpu_maps() )
215         {
216             n_cpus = num_online_cpus();
217 
218             if ( atomic_cmpxchg(&pending_count, 0, n_cpus + 1) == 0 )
219                 break;
220 
221             put_cpu_maps();
222         }
223 
224         process_pending_softirqs();
225         cpu_relax();
226     }
227 
228     atomic_set(&cpu_count, n_cpus);
229     cpumask_raise_softirq(&cpu_online_map, RCU_SOFTIRQ);
230 
231     while ( atomic_read(&pending_count) != 1 )
232     {
233         process_pending_softirqs();
234         cpu_relax();
235     }
236 
237     atomic_set(&pending_count, 0);
238 
239     put_cpu_maps();
240 }
241 
242 /* Is batch a before batch b ? */
rcu_batch_before(long a,long b)243 static inline int rcu_batch_before(long a, long b)
244 {
245     return (a - b) < 0;
246 }
247 
force_quiescent_state(struct rcu_data * rdp,struct rcu_ctrlblk * rcp)248 static void force_quiescent_state(struct rcu_data *rdp,
249                                   struct rcu_ctrlblk *rcp)
250 {
251     cpumask_t cpumask;
252     raise_softirq(RCU_SOFTIRQ);
253     if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) {
254         rdp->last_rs_qlen = rdp->qlen;
255         /*
256          * Don't send IPI to itself. With irqs disabled,
257          * rdp->cpu is the current cpu.
258          */
259         cpumask_andnot(&cpumask, &rcp->cpumask, cpumask_of(rdp->cpu));
260         cpumask_raise_softirq(&cpumask, RCU_SOFTIRQ);
261     }
262 }
263 
264 /**
265  * call_rcu - Queue an RCU callback for invocation after a grace period.
266  * @head: structure to be used for queueing the RCU updates.
267  * @func: actual update function to be invoked after the grace period
268  *
269  * The update function will be invoked some time after a full grace
270  * period elapses, in other words after all currently executing RCU
271  * read-side critical sections have completed.  RCU read-side critical
272  * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
273  * and may be nested.
274  */
call_rcu(struct rcu_head * head,void (* func)(struct rcu_head * rcu))275 void call_rcu(struct rcu_head *head,
276               void (*func)(struct rcu_head *rcu))
277 {
278     unsigned long flags;
279     struct rcu_data *rdp;
280 
281     head->func = func;
282     head->next = NULL;
283     local_irq_save(flags);
284     rdp = &this_cpu(rcu_data);
285     *rdp->nxttail = head;
286     rdp->nxttail = &head->next;
287     if (unlikely(++rdp->qlen > qhimark)) {
288         rdp->blimit = INT_MAX;
289         force_quiescent_state(rdp, &rcu_ctrlblk);
290     }
291     local_irq_restore(flags);
292 }
293 
294 /*
295  * Invoke the completed RCU callbacks. They are expected to be in
296  * a per-cpu list.
297  */
rcu_do_batch(struct rcu_data * rdp)298 static void rcu_do_batch(struct rcu_data *rdp)
299 {
300     struct rcu_head *next, *list;
301     int count = 0;
302 
303     list = rdp->donelist;
304     while (list) {
305         next = rdp->donelist = list->next;
306         list->func(list);
307         list = next;
308         rdp->qlen--;
309         if (++count >= rdp->blimit)
310             break;
311     }
312     if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
313         rdp->blimit = blimit;
314     if (!rdp->donelist)
315         rdp->donetail = &rdp->donelist;
316     else
317     {
318         rdp->process_callbacks = true;
319         raise_softirq(RCU_SOFTIRQ);
320     }
321 }
322 
323 /*
324  * Grace period handling:
325  * The grace period handling consists out of two steps:
326  * - A new grace period is started.
327  *   This is done by rcu_start_batch. The start is not broadcasted to
328  *   all cpus, they must pick this up by comparing rcp->cur with
329  *   rdp->quiescbatch. All cpus are recorded  in the
330  *   rcu_ctrlblk.cpumask bitmap.
331  * - All cpus must go through a quiescent state.
332  *   Since the start of the grace period is not broadcasted, at least two
333  *   calls to rcu_check_quiescent_state are required:
334  *   The first call just notices that a new grace period is running. The
335  *   following calls check if there was a quiescent state since the beginning
336  *   of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
337  *   the bitmap is empty, then the grace period is completed.
338  *   rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
339  *   period (if necessary).
340  */
341 /*
342  * Register a new batch of callbacks, and start it up if there is currently no
343  * active batch and the batch to be registered has not already occurred.
344  * Caller must hold rcu_ctrlblk.lock.
345  */
rcu_start_batch(struct rcu_ctrlblk * rcp)346 static void rcu_start_batch(struct rcu_ctrlblk *rcp)
347 {
348     if (rcp->next_pending &&
349         rcp->completed == rcp->cur) {
350         rcp->next_pending = 0;
351         /*
352          * next_pending == 0 must be visible in
353          * __rcu_process_callbacks() before it can see new value of cur.
354          */
355         smp_wmb();
356         rcp->cur++;
357 
358        /*
359         * Make sure the increment of rcp->cur is visible so, even if a
360         * CPU that is about to go idle, is captured inside rcp->cpumask,
361         * rcu_pending() will return false, which then means cpu_quiet()
362         * will be invoked, before the CPU would actually enter idle.
363         *
364         * This barrier is paired with the one in rcu_idle_enter().
365         */
366         smp_mb();
367         cpumask_andnot(&rcp->cpumask, &cpu_online_map, &rcp->idle_cpumask);
368     }
369 }
370 
371 /*
372  * cpu went through a quiescent state since the beginning of the grace period.
373  * Clear it from the cpu mask and complete the grace period if it was the last
374  * cpu. Start another grace period if someone has further entries pending
375  */
cpu_quiet(int cpu,struct rcu_ctrlblk * rcp)376 static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
377 {
378     cpumask_clear_cpu(cpu, &rcp->cpumask);
379     if (cpumask_empty(&rcp->cpumask)) {
380         /* batch completed ! */
381         rcp->completed = rcp->cur;
382         rcu_start_batch(rcp);
383     }
384 }
385 
386 /*
387  * Check if the cpu has gone through a quiescent state (say context
388  * switch). If so and if it already hasn't done so in this RCU
389  * quiescent cycle, then indicate that it has done so.
390  */
rcu_check_quiescent_state(struct rcu_ctrlblk * rcp,struct rcu_data * rdp)391 static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
392                                       struct rcu_data *rdp)
393 {
394     if (rdp->quiescbatch != rcp->cur) {
395         /* start new grace period: */
396         rdp->qs_pending = 1;
397         rdp->quiescbatch = rcp->cur;
398         return;
399     }
400 
401     /* Grace period already completed for this cpu?
402      * qs_pending is checked instead of the actual bitmap to avoid
403      * cacheline trashing.
404      */
405     if (!rdp->qs_pending)
406         return;
407 
408     rdp->qs_pending = 0;
409 
410     spin_lock(&rcp->lock);
411     /*
412      * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
413      * during cpu startup. Ignore the quiescent state.
414      */
415     if (likely(rdp->quiescbatch == rcp->cur))
416         cpu_quiet(rdp->cpu, rcp);
417 
418     spin_unlock(&rcp->lock);
419 }
420 
421 
422 /*
423  * This does the RCU processing work from softirq context.
424  */
__rcu_process_callbacks(struct rcu_ctrlblk * rcp,struct rcu_data * rdp)425 static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
426                                     struct rcu_data *rdp)
427 {
428     if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
429         *rdp->donetail = rdp->curlist;
430         rdp->donetail = rdp->curtail;
431         rdp->curlist = NULL;
432         rdp->curtail = &rdp->curlist;
433     }
434 
435     local_irq_disable();
436     if (rdp->nxtlist && !rdp->curlist) {
437         rdp->curlist = rdp->nxtlist;
438         rdp->curtail = rdp->nxttail;
439         rdp->nxtlist = NULL;
440         rdp->nxttail = &rdp->nxtlist;
441         local_irq_enable();
442 
443         /*
444          * start the next batch of callbacks
445          */
446 
447         /* determine batch number */
448         rdp->batch = rcp->cur + 1;
449         /* see the comment and corresponding wmb() in
450          * the rcu_start_batch()
451          */
452         smp_rmb();
453 
454         if (!rcp->next_pending) {
455             /* and start it/schedule start if it's a new batch */
456             spin_lock(&rcp->lock);
457             rcp->next_pending = 1;
458             rcu_start_batch(rcp);
459             spin_unlock(&rcp->lock);
460         }
461     } else {
462         local_irq_enable();
463     }
464     rcu_check_quiescent_state(rcp, rdp);
465     if (rdp->donelist)
466         rcu_do_batch(rdp);
467 }
468 
rcu_process_callbacks(void)469 static void rcu_process_callbacks(void)
470 {
471     struct rcu_data *rdp = &this_cpu(rcu_data);
472 
473     if ( rdp->process_callbacks )
474     {
475         rdp->process_callbacks = false;
476         __rcu_process_callbacks(&rcu_ctrlblk, rdp);
477     }
478 
479     if ( atomic_read(&cpu_count) && !rdp->barrier_active )
480     {
481         rdp->barrier_active = true;
482         rcu_barrier_action();
483         rdp->barrier_active = false;
484     }
485 }
486 
__rcu_pending(struct rcu_ctrlblk * rcp,struct rcu_data * rdp)487 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
488 {
489     /* This cpu has pending rcu entries and the grace period
490      * for them has completed.
491      */
492     if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
493         return 1;
494 
495     /* This cpu has no pending entries, but there are new entries */
496     if (!rdp->curlist && rdp->nxtlist)
497         return 1;
498 
499     /* This cpu has finished callbacks to invoke */
500     if (rdp->donelist)
501         return 1;
502 
503     /* The rcu core waits for a quiescent state from the cpu */
504     if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
505         return 1;
506 
507     /* nothing to do */
508     return 0;
509 }
510 
rcu_pending(int cpu)511 int rcu_pending(int cpu)
512 {
513     return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu));
514 }
515 
516 /*
517  * Check to see if any future RCU-related work will need to be done
518  * by the current CPU, even if none need be done immediately, returning
519  * 1 if so.  This function is part of the RCU implementation; it is -not-
520  * an exported member of the RCU API.
521  */
rcu_needs_cpu(int cpu)522 int rcu_needs_cpu(int cpu)
523 {
524     struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
525 
526     return (rdp->curlist && !rdp->idle_timer_active) || rcu_pending(cpu);
527 }
528 
529 /*
530  * Timer for making sure the CPU where a callback is queued does
531  * periodically poke rcu_pedning(), so that it will invoke the callback
532  * not too late after the end of the grace period.
533  */
rcu_idle_timer_start(void)534 static void rcu_idle_timer_start(void)
535 {
536     struct rcu_data *rdp = &this_cpu(rcu_data);
537 
538     /*
539      * Note that we don't check rcu_pending() here. In fact, we don't want
540      * the timer armed on CPUs that are in the process of quiescing while
541      * going idle, unless they really are the ones with a queued callback.
542      */
543     if (likely(!rdp->curlist))
544         return;
545 
546     set_timer(&rdp->idle_timer, NOW() + idle_timer_period);
547     rdp->idle_timer_active = true;
548 }
549 
rcu_idle_timer_stop(void)550 static void rcu_idle_timer_stop(void)
551 {
552     struct rcu_data *rdp = &this_cpu(rcu_data);
553 
554     if (likely(!rdp->idle_timer_active))
555         return;
556 
557     rdp->idle_timer_active = false;
558 
559     /*
560      * In general, as the CPU is becoming active again, we don't need the
561      * idle timer, and so we want to stop it.
562      *
563      * However, in case we are here because idle_timer has (just) fired and
564      * has woken up the CPU, we skip stop_timer() now. In fact, when a CPU
565      * wakes up from idle, this code always runs before do_softirq() has the
566      * chance to check and deal with TIMER_SOFTIRQ. And if we stop the timer
567      * now, the TIMER_SOFTIRQ handler will see it as inactive, and will not
568      * call rcu_idle_timer_handler().
569      *
570      * Therefore, if we see that the timer is expired already, we leave it
571      * alone. The TIMER_SOFTIRQ handler will then run the timer routine, and
572      * deactivate it.
573      */
574     if ( !timer_is_expired(&rdp->idle_timer) )
575         stop_timer(&rdp->idle_timer);
576 }
577 
rcu_idle_timer_handler(void * data)578 static void rcu_idle_timer_handler(void* data)
579 {
580     perfc_incr(rcu_idle_timer);
581 
582     if ( !cpumask_empty(&rcu_ctrlblk.cpumask) )
583         idle_timer_period = min(idle_timer_period + IDLE_TIMER_PERIOD_INCR,
584                                 IDLE_TIMER_PERIOD_MAX);
585     else
586         idle_timer_period = max(idle_timer_period - IDLE_TIMER_PERIOD_DECR,
587                                 IDLE_TIMER_PERIOD_MIN);
588 }
589 
rcu_check_callbacks(int cpu)590 void rcu_check_callbacks(int cpu)
591 {
592     struct rcu_data *rdp = &this_cpu(rcu_data);
593 
594     rdp->process_callbacks = true;
595     raise_softirq(RCU_SOFTIRQ);
596 }
597 
rcu_move_batch(struct rcu_data * this_rdp,struct rcu_head * list,struct rcu_head ** tail)598 static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
599                            struct rcu_head **tail)
600 {
601     local_irq_disable();
602     *this_rdp->nxttail = list;
603     if (list)
604         this_rdp->nxttail = tail;
605     local_irq_enable();
606 }
607 
rcu_offline_cpu(struct rcu_data * this_rdp,struct rcu_ctrlblk * rcp,struct rcu_data * rdp)608 static void rcu_offline_cpu(struct rcu_data *this_rdp,
609                             struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
610 {
611     kill_timer(&rdp->idle_timer);
612 
613     /* If the cpu going offline owns the grace period we can block
614      * indefinitely waiting for it, so flush it here.
615      */
616     spin_lock(&rcp->lock);
617     if (rcp->cur != rcp->completed)
618         cpu_quiet(rdp->cpu, rcp);
619     spin_unlock(&rcp->lock);
620 
621     rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
622     rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
623     rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
624 
625     local_irq_disable();
626     this_rdp->qlen += rdp->qlen;
627     local_irq_enable();
628 }
629 
rcu_init_percpu_data(int cpu,struct rcu_ctrlblk * rcp,struct rcu_data * rdp)630 static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
631                                  struct rcu_data *rdp)
632 {
633     memset(rdp, 0, sizeof(*rdp));
634     rdp->curtail = &rdp->curlist;
635     rdp->nxttail = &rdp->nxtlist;
636     rdp->donetail = &rdp->donelist;
637     rdp->quiescbatch = rcp->completed;
638     rdp->qs_pending = 0;
639     rdp->cpu = cpu;
640     rdp->blimit = blimit;
641     init_timer(&rdp->idle_timer, rcu_idle_timer_handler, rdp, cpu);
642 }
643 
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)644 static int cpu_callback(
645     struct notifier_block *nfb, unsigned long action, void *hcpu)
646 {
647     unsigned int cpu = (unsigned long)hcpu;
648     struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
649 
650     switch ( action )
651     {
652     case CPU_UP_PREPARE:
653         rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
654         break;
655     case CPU_UP_CANCELED:
656     case CPU_DEAD:
657         rcu_offline_cpu(&this_cpu(rcu_data), &rcu_ctrlblk, rdp);
658         break;
659     default:
660         break;
661     }
662 
663     return NOTIFY_DONE;
664 }
665 
666 static struct notifier_block cpu_nfb = {
667     .notifier_call = cpu_callback
668 };
669 
rcu_init(void)670 void __init rcu_init(void)
671 {
672     void *cpu = (void *)(long)smp_processor_id();
673     static unsigned int __initdata idle_timer_period_ms =
674                                     IDLE_TIMER_PERIOD_DEFAULT / MILLISECS(1);
675     integer_param("rcu-idle-timer-period-ms", idle_timer_period_ms);
676 
677     /* We don't allow 0, or anything higher than IDLE_TIMER_PERIOD_MAX */
678     if ( idle_timer_period_ms == 0 ||
679          idle_timer_period_ms > IDLE_TIMER_PERIOD_MAX / MILLISECS(1) )
680     {
681         idle_timer_period_ms = IDLE_TIMER_PERIOD_DEFAULT / MILLISECS(1);
682         printk("WARNING: rcu-idle-timer-period-ms outside of "
683                "(0,%"PRI_stime"]. Resetting it to %u.\n",
684                IDLE_TIMER_PERIOD_MAX / MILLISECS(1), idle_timer_period_ms);
685     }
686     idle_timer_period = MILLISECS(idle_timer_period_ms);
687 
688     cpumask_clear(&rcu_ctrlblk.idle_cpumask);
689     cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
690     register_cpu_notifier(&cpu_nfb);
691     open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
692 }
693 
694 /*
695  * The CPU is becoming idle, so no more read side critical
696  * sections, and one more step toward grace period.
697  */
rcu_idle_enter(unsigned int cpu)698 void rcu_idle_enter(unsigned int cpu)
699 {
700     ASSERT(!cpumask_test_cpu(cpu, &rcu_ctrlblk.idle_cpumask));
701     cpumask_set_cpu(cpu, &rcu_ctrlblk.idle_cpumask);
702     /*
703      * If some other CPU is starting a new grace period, we'll notice that
704      * by seeing a new value in rcp->cur (different than our quiescbatch).
705      * That will force us all the way until cpu_quiet(), clearing our bit
706      * in rcp->cpumask, even in case we managed to get in there.
707      *
708      * Se the comment before cpumask_andnot() in  rcu_start_batch().
709      */
710     smp_mb();
711 
712     rcu_idle_timer_start();
713 }
714 
rcu_idle_exit(unsigned int cpu)715 void rcu_idle_exit(unsigned int cpu)
716 {
717     rcu_idle_timer_stop();
718     ASSERT(cpumask_test_cpu(cpu, &rcu_ctrlblk.idle_cpumask));
719     cpumask_clear_cpu(cpu, &rcu_ctrlblk.idle_cpumask);
720 }
721