1 #include <xen/lib.h>
2 #include <xen/irq.h>
3 #include <xen/smp.h>
4 #include <xen/time.h>
5 #include <xen/spinlock.h>
6 #include <xen/guest_access.h>
7 #include <xen/preempt.h>
8 #include <public/sysctl.h>
9 #include <asm/processor.h>
10 #include <asm/atomic.h>
11 
12 #ifdef CONFIG_DEBUG_LOCKS
13 
14 static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
15 
check_lock(union lock_debug * debug)16 static void check_lock(union lock_debug *debug)
17 {
18     bool irq_safe = !local_irq_is_enabled();
19 
20     BUILD_BUG_ON(LOCK_DEBUG_PAD_BITS <= 0);
21 
22     if ( unlikely(atomic_read(&spin_debug) <= 0) )
23         return;
24 
25     /* A few places take liberties with this. */
26     /* BUG_ON(in_irq() && !irq_safe); */
27 
28     /*
29      * We partition locks into IRQ-safe (always held with IRQs disabled) and
30      * IRQ-unsafe (always held with IRQs enabled) types. The convention for
31      * every lock must be consistently observed else we can deadlock in
32      * IRQ-context rendezvous functions (a rendezvous which gets every CPU
33      * into IRQ context before any CPU is released from the rendezvous).
34      *
35      * If we can mix IRQ-disabled and IRQ-enabled callers, the following can
36      * happen:
37      *  * Lock is held by CPU A, with IRQs enabled
38      *  * CPU B is spinning on same lock, with IRQs disabled
39      *  * Rendezvous starts -- CPU A takes interrupt and enters rendezbous spin
40      *  * DEADLOCK -- CPU B will never enter rendezvous, CPU A will never exit
41      *                the rendezvous, and will hence never release the lock.
42      *
43      * To guard against this subtle bug we latch the IRQ safety of every
44      * spinlock in the system, on first use.
45      */
46     if ( unlikely(debug->irq_safe != irq_safe) )
47     {
48         union lock_debug seen, new = { 0 };
49 
50         new.irq_safe = irq_safe;
51         seen.val = cmpxchg(&debug->val, LOCK_DEBUG_INITVAL, new.val);
52 
53         if ( !seen.unseen && seen.irq_safe == !irq_safe )
54         {
55             printk("CHECKLOCK FAILURE: prev irqsafe: %d, curr irqsafe %d\n",
56                    seen.irq_safe, irq_safe);
57             BUG();
58         }
59     }
60 }
61 
check_barrier(union lock_debug * debug)62 static void check_barrier(union lock_debug *debug)
63 {
64     if ( unlikely(atomic_read(&spin_debug) <= 0) )
65         return;
66 
67     /*
68      * For a barrier, we have a relaxed IRQ-safety-consistency check.
69      *
70      * It is always safe to spin at the barrier with IRQs enabled -- that does
71      * not prevent us from entering an IRQ-context rendezvous, and nor are
72      * we preventing anyone else from doing so (since we do not actually
73      * acquire the lock during a barrier operation).
74      *
75      * However, if we spin on an IRQ-unsafe lock with IRQs disabled then that
76      * is clearly wrong, for the same reason outlined in check_lock() above.
77      */
78     BUG_ON(!local_irq_is_enabled() && !debug->irq_safe);
79 }
80 
got_lock(union lock_debug * debug)81 static void got_lock(union lock_debug *debug)
82 {
83     debug->cpu = smp_processor_id();
84 }
85 
rel_lock(union lock_debug * debug)86 static void rel_lock(union lock_debug *debug)
87 {
88     if ( atomic_read(&spin_debug) > 0 )
89         BUG_ON(debug->cpu != smp_processor_id());
90     debug->cpu = SPINLOCK_NO_CPU;
91 }
92 
spin_debug_enable(void)93 void spin_debug_enable(void)
94 {
95     atomic_inc(&spin_debug);
96 }
97 
spin_debug_disable(void)98 void spin_debug_disable(void)
99 {
100     atomic_dec(&spin_debug);
101 }
102 
103 #else /* CONFIG_DEBUG_LOCKS */
104 
105 #define check_lock(l) ((void)0)
106 #define check_barrier(l) ((void)0)
107 #define got_lock(l) ((void)0)
108 #define rel_lock(l) ((void)0)
109 
110 #endif
111 
112 #ifdef CONFIG_DEBUG_LOCK_PROFILE
113 
114 #define LOCK_PROFILE_REL                                                     \
115     if (lock->profile)                                                       \
116     {                                                                        \
117         lock->profile->time_hold += NOW() - lock->profile->time_locked;      \
118         lock->profile->lock_cnt++;                                           \
119     }
120 #define LOCK_PROFILE_VAR    s_time_t block = 0
121 #define LOCK_PROFILE_BLOCK  block = block ? : NOW();
122 #define LOCK_PROFILE_GOT                                                     \
123     if (lock->profile)                                                       \
124     {                                                                        \
125         lock->profile->time_locked = NOW();                                  \
126         if (block)                                                           \
127         {                                                                    \
128             lock->profile->time_block += lock->profile->time_locked - block; \
129             lock->profile->block_cnt++;                                      \
130         }                                                                    \
131     }
132 
133 #else
134 
135 #define LOCK_PROFILE_REL
136 #define LOCK_PROFILE_VAR
137 #define LOCK_PROFILE_BLOCK
138 #define LOCK_PROFILE_GOT
139 
140 #endif
141 
observe_lock(spinlock_tickets_t * t)142 static always_inline spinlock_tickets_t observe_lock(spinlock_tickets_t *t)
143 {
144     spinlock_tickets_t v;
145 
146     smp_rmb();
147     v.head_tail = read_atomic(&t->head_tail);
148     return v;
149 }
150 
observe_head(spinlock_tickets_t * t)151 static always_inline u16 observe_head(spinlock_tickets_t *t)
152 {
153     smp_rmb();
154     return read_atomic(&t->head);
155 }
156 
_spin_lock_cb(spinlock_t * lock,void (* cb)(void *),void * data)157 void inline _spin_lock_cb(spinlock_t *lock, void (*cb)(void *), void *data)
158 {
159     spinlock_tickets_t tickets = SPINLOCK_TICKET_INC;
160     LOCK_PROFILE_VAR;
161 
162     check_lock(&lock->debug);
163     preempt_disable();
164     tickets.head_tail = arch_fetch_and_add(&lock->tickets.head_tail,
165                                            tickets.head_tail);
166     while ( tickets.tail != observe_head(&lock->tickets) )
167     {
168         LOCK_PROFILE_BLOCK;
169         if ( unlikely(cb) )
170             cb(data);
171         arch_lock_relax();
172     }
173     got_lock(&lock->debug);
174     LOCK_PROFILE_GOT;
175     arch_lock_acquire_barrier();
176 }
177 
_spin_lock(spinlock_t * lock)178 void _spin_lock(spinlock_t *lock)
179 {
180      _spin_lock_cb(lock, NULL, NULL);
181 }
182 
_spin_lock_irq(spinlock_t * lock)183 void _spin_lock_irq(spinlock_t *lock)
184 {
185     ASSERT(local_irq_is_enabled());
186     local_irq_disable();
187     _spin_lock(lock);
188 }
189 
_spin_lock_irqsave(spinlock_t * lock)190 unsigned long _spin_lock_irqsave(spinlock_t *lock)
191 {
192     unsigned long flags;
193 
194     local_irq_save(flags);
195     _spin_lock(lock);
196     return flags;
197 }
198 
_spin_unlock(spinlock_t * lock)199 void _spin_unlock(spinlock_t *lock)
200 {
201     arch_lock_release_barrier();
202     LOCK_PROFILE_REL;
203     rel_lock(&lock->debug);
204     add_sized(&lock->tickets.head, 1);
205     arch_lock_signal();
206     preempt_enable();
207 }
208 
_spin_unlock_irq(spinlock_t * lock)209 void _spin_unlock_irq(spinlock_t *lock)
210 {
211     _spin_unlock(lock);
212     local_irq_enable();
213 }
214 
_spin_unlock_irqrestore(spinlock_t * lock,unsigned long flags)215 void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
216 {
217     _spin_unlock(lock);
218     local_irq_restore(flags);
219 }
220 
_spin_is_locked(spinlock_t * lock)221 int _spin_is_locked(spinlock_t *lock)
222 {
223     check_lock(&lock->debug);
224 
225     /*
226      * Recursive locks may be locked by another CPU, yet we return
227      * "false" here, making this function suitable only for use in
228      * ASSERT()s and alike.
229      */
230     return lock->recurse_cpu == SPINLOCK_NO_CPU
231            ? lock->tickets.head != lock->tickets.tail
232            : lock->recurse_cpu == smp_processor_id();
233 }
234 
_spin_trylock(spinlock_t * lock)235 int _spin_trylock(spinlock_t *lock)
236 {
237     spinlock_tickets_t old, new;
238 
239     check_lock(&lock->debug);
240     old = observe_lock(&lock->tickets);
241     if ( old.head != old.tail )
242         return 0;
243     new = old;
244     new.tail++;
245     preempt_disable();
246     if ( cmpxchg(&lock->tickets.head_tail,
247                  old.head_tail, new.head_tail) != old.head_tail )
248     {
249         preempt_enable();
250         return 0;
251     }
252     got_lock(&lock->debug);
253 #ifdef CONFIG_DEBUG_LOCK_PROFILE
254     if (lock->profile)
255         lock->profile->time_locked = NOW();
256 #endif
257     /*
258      * cmpxchg() is a full barrier so no need for an
259      * arch_lock_acquire_barrier().
260      */
261     return 1;
262 }
263 
_spin_barrier(spinlock_t * lock)264 void _spin_barrier(spinlock_t *lock)
265 {
266     spinlock_tickets_t sample;
267 #ifdef CONFIG_DEBUG_LOCK_PROFILE
268     s_time_t block = NOW();
269 #endif
270 
271     check_barrier(&lock->debug);
272     smp_mb();
273     sample = observe_lock(&lock->tickets);
274     if ( sample.head != sample.tail )
275     {
276         while ( observe_head(&lock->tickets) == sample.head )
277             arch_lock_relax();
278 #ifdef CONFIG_DEBUG_LOCK_PROFILE
279         if ( lock->profile )
280         {
281             lock->profile->time_block += NOW() - block;
282             lock->profile->block_cnt++;
283         }
284 #endif
285     }
286     smp_mb();
287 }
288 
_spin_trylock_recursive(spinlock_t * lock)289 int _spin_trylock_recursive(spinlock_t *lock)
290 {
291     unsigned int cpu = smp_processor_id();
292 
293     /* Don't allow overflow of recurse_cpu field. */
294     BUILD_BUG_ON(NR_CPUS > SPINLOCK_NO_CPU);
295     BUILD_BUG_ON(SPINLOCK_RECURSE_BITS < 3);
296 
297     check_lock(&lock->debug);
298 
299     if ( likely(lock->recurse_cpu != cpu) )
300     {
301         if ( !spin_trylock(lock) )
302             return 0;
303         lock->recurse_cpu = cpu;
304     }
305 
306     /* We support only fairly shallow recursion, else the counter overflows. */
307     ASSERT(lock->recurse_cnt < SPINLOCK_MAX_RECURSE);
308     lock->recurse_cnt++;
309 
310     return 1;
311 }
312 
_spin_lock_recursive(spinlock_t * lock)313 void _spin_lock_recursive(spinlock_t *lock)
314 {
315     unsigned int cpu = smp_processor_id();
316 
317     if ( likely(lock->recurse_cpu != cpu) )
318     {
319         _spin_lock(lock);
320         lock->recurse_cpu = cpu;
321     }
322 
323     /* We support only fairly shallow recursion, else the counter overflows. */
324     ASSERT(lock->recurse_cnt < SPINLOCK_MAX_RECURSE);
325     lock->recurse_cnt++;
326 }
327 
_spin_unlock_recursive(spinlock_t * lock)328 void _spin_unlock_recursive(spinlock_t *lock)
329 {
330     if ( likely(--lock->recurse_cnt == 0) )
331     {
332         lock->recurse_cpu = SPINLOCK_NO_CPU;
333         spin_unlock(lock);
334     }
335 }
336 
337 #ifdef CONFIG_DEBUG_LOCK_PROFILE
338 
339 struct lock_profile_anc {
340     struct lock_profile_qhead *head_q;   /* first head of this type */
341     char                      *name;     /* descriptive string for print */
342 };
343 
344 typedef void lock_profile_subfunc(
345     struct lock_profile *, int32_t, int32_t, void *);
346 
347 extern struct lock_profile *__lock_profile_start;
348 extern struct lock_profile *__lock_profile_end;
349 
350 static s_time_t lock_profile_start;
351 static struct lock_profile_anc lock_profile_ancs[LOCKPROF_TYPE_N];
352 static struct lock_profile_qhead lock_profile_glb_q;
353 static spinlock_t lock_profile_lock = SPIN_LOCK_UNLOCKED;
354 
spinlock_profile_iterate(lock_profile_subfunc * sub,void * par)355 static void spinlock_profile_iterate(lock_profile_subfunc *sub, void *par)
356 {
357     int i;
358     struct lock_profile_qhead *hq;
359     struct lock_profile *eq;
360 
361     spin_lock(&lock_profile_lock);
362     for ( i = 0; i < LOCKPROF_TYPE_N; i++ )
363         for ( hq = lock_profile_ancs[i].head_q; hq; hq = hq->head_q )
364             for ( eq = hq->elem_q; eq; eq = eq->next )
365                 sub(eq, i, hq->idx, par);
366     spin_unlock(&lock_profile_lock);
367 }
368 
spinlock_profile_print_elem(struct lock_profile * data,int32_t type,int32_t idx,void * par)369 static void spinlock_profile_print_elem(struct lock_profile *data,
370     int32_t type, int32_t idx, void *par)
371 {
372     struct spinlock *lock = data->lock;
373 
374     printk("%s ", lock_profile_ancs[type].name);
375     if ( type != LOCKPROF_TYPE_GLOBAL )
376         printk("%d ", idx);
377     printk("%s: addr=%p, lockval=%08x, ", data->name, lock,
378            lock->tickets.head_tail);
379     if ( lock->debug.cpu == SPINLOCK_NO_CPU )
380         printk("not locked\n");
381     else
382         printk("cpu=%d\n", lock->debug.cpu);
383     printk("  lock:%" PRId64 "(%" PRI_stime "), block:%" PRId64 "(%" PRI_stime ")\n",
384            data->lock_cnt, data->time_hold, data->block_cnt, data->time_block);
385 }
386 
spinlock_profile_printall(unsigned char key)387 void spinlock_profile_printall(unsigned char key)
388 {
389     s_time_t now = NOW();
390     s_time_t diff;
391 
392     diff = now - lock_profile_start;
393     printk("Xen lock profile info SHOW  (now = %"PRI_stime" total = "
394            "%"PRI_stime")\n", now, diff);
395     spinlock_profile_iterate(spinlock_profile_print_elem, NULL);
396 }
397 
spinlock_profile_reset_elem(struct lock_profile * data,int32_t type,int32_t idx,void * par)398 static void spinlock_profile_reset_elem(struct lock_profile *data,
399     int32_t type, int32_t idx, void *par)
400 {
401     data->lock_cnt = 0;
402     data->block_cnt = 0;
403     data->time_hold = 0;
404     data->time_block = 0;
405 }
406 
spinlock_profile_reset(unsigned char key)407 void spinlock_profile_reset(unsigned char key)
408 {
409     s_time_t now = NOW();
410 
411     if ( key != '\0' )
412         printk("Xen lock profile info RESET (now = %"PRI_stime")\n", now);
413     lock_profile_start = now;
414     spinlock_profile_iterate(spinlock_profile_reset_elem, NULL);
415 }
416 
417 typedef struct {
418     struct xen_sysctl_lockprof_op *pc;
419     int                      rc;
420 } spinlock_profile_ucopy_t;
421 
spinlock_profile_ucopy_elem(struct lock_profile * data,int32_t type,int32_t idx,void * par)422 static void spinlock_profile_ucopy_elem(struct lock_profile *data,
423     int32_t type, int32_t idx, void *par)
424 {
425     spinlock_profile_ucopy_t *p = par;
426     struct xen_sysctl_lockprof_data elem;
427 
428     if ( p->rc )
429         return;
430 
431     if ( p->pc->nr_elem < p->pc->max_elem )
432     {
433         safe_strcpy(elem.name, data->name);
434         elem.type = type;
435         elem.idx = idx;
436         elem.lock_cnt = data->lock_cnt;
437         elem.block_cnt = data->block_cnt;
438         elem.lock_time = data->time_hold;
439         elem.block_time = data->time_block;
440         if ( copy_to_guest_offset(p->pc->data, p->pc->nr_elem, &elem, 1) )
441             p->rc = -EFAULT;
442     }
443 
444     if ( !p->rc )
445         p->pc->nr_elem++;
446 }
447 
448 /* Dom0 control of lock profiling */
spinlock_profile_control(struct xen_sysctl_lockprof_op * pc)449 int spinlock_profile_control(struct xen_sysctl_lockprof_op *pc)
450 {
451     int rc = 0;
452     spinlock_profile_ucopy_t par;
453 
454     switch ( pc->cmd )
455     {
456     case XEN_SYSCTL_LOCKPROF_reset:
457         spinlock_profile_reset('\0');
458         break;
459     case XEN_SYSCTL_LOCKPROF_query:
460         pc->nr_elem = 0;
461         par.rc = 0;
462         par.pc = pc;
463         spinlock_profile_iterate(spinlock_profile_ucopy_elem, &par);
464         pc->time = NOW() - lock_profile_start;
465         rc = par.rc;
466         break;
467     default:
468         rc = -EINVAL;
469         break;
470     }
471 
472     return rc;
473 }
474 
_lock_profile_register_struct(int32_t type,struct lock_profile_qhead * qhead,int32_t idx,char * name)475 void _lock_profile_register_struct(
476     int32_t type, struct lock_profile_qhead *qhead, int32_t idx, char *name)
477 {
478     qhead->idx = idx;
479     spin_lock(&lock_profile_lock);
480     qhead->head_q = lock_profile_ancs[type].head_q;
481     lock_profile_ancs[type].head_q = qhead;
482     lock_profile_ancs[type].name = name;
483     spin_unlock(&lock_profile_lock);
484 }
485 
_lock_profile_deregister_struct(int32_t type,struct lock_profile_qhead * qhead)486 void _lock_profile_deregister_struct(
487     int32_t type, struct lock_profile_qhead *qhead)
488 {
489     struct lock_profile_qhead **q;
490 
491     spin_lock(&lock_profile_lock);
492     for ( q = &lock_profile_ancs[type].head_q; *q; q = &(*q)->head_q )
493     {
494         if ( *q == qhead )
495         {
496             *q = qhead->head_q;
497             break;
498         }
499     }
500     spin_unlock(&lock_profile_lock);
501 }
502 
lock_prof_init(void)503 static int __init lock_prof_init(void)
504 {
505     struct lock_profile **q;
506 
507     for ( q = &__lock_profile_start; q < &__lock_profile_end; q++ )
508     {
509         (*q)->next = lock_profile_glb_q.elem_q;
510         lock_profile_glb_q.elem_q = *q;
511         (*q)->lock->profile = *q;
512     }
513 
514     _lock_profile_register_struct(
515         LOCKPROF_TYPE_GLOBAL, &lock_profile_glb_q,
516         0, "Global lock");
517 
518     return 0;
519 }
520 __initcall(lock_prof_init);
521 
522 #endif /* CONFIG_DEBUG_LOCK_PROFILE */
523