1 #include <xen/lib.h>
2 #include <xen/irq.h>
3 #include <xen/smp.h>
4 #include <xen/time.h>
5 #include <xen/spinlock.h>
6 #include <xen/guest_access.h>
7 #include <xen/preempt.h>
8 #include <public/sysctl.h>
9 #include <asm/processor.h>
10 #include <asm/atomic.h>
11
12 #ifdef CONFIG_DEBUG_LOCKS
13
14 static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
15
check_lock(union lock_debug * debug)16 static void check_lock(union lock_debug *debug)
17 {
18 bool irq_safe = !local_irq_is_enabled();
19
20 BUILD_BUG_ON(LOCK_DEBUG_PAD_BITS <= 0);
21
22 if ( unlikely(atomic_read(&spin_debug) <= 0) )
23 return;
24
25 /* A few places take liberties with this. */
26 /* BUG_ON(in_irq() && !irq_safe); */
27
28 /*
29 * We partition locks into IRQ-safe (always held with IRQs disabled) and
30 * IRQ-unsafe (always held with IRQs enabled) types. The convention for
31 * every lock must be consistently observed else we can deadlock in
32 * IRQ-context rendezvous functions (a rendezvous which gets every CPU
33 * into IRQ context before any CPU is released from the rendezvous).
34 *
35 * If we can mix IRQ-disabled and IRQ-enabled callers, the following can
36 * happen:
37 * * Lock is held by CPU A, with IRQs enabled
38 * * CPU B is spinning on same lock, with IRQs disabled
39 * * Rendezvous starts -- CPU A takes interrupt and enters rendezbous spin
40 * * DEADLOCK -- CPU B will never enter rendezvous, CPU A will never exit
41 * the rendezvous, and will hence never release the lock.
42 *
43 * To guard against this subtle bug we latch the IRQ safety of every
44 * spinlock in the system, on first use.
45 */
46 if ( unlikely(debug->irq_safe != irq_safe) )
47 {
48 union lock_debug seen, new = { 0 };
49
50 new.irq_safe = irq_safe;
51 seen.val = cmpxchg(&debug->val, LOCK_DEBUG_INITVAL, new.val);
52
53 if ( !seen.unseen && seen.irq_safe == !irq_safe )
54 {
55 printk("CHECKLOCK FAILURE: prev irqsafe: %d, curr irqsafe %d\n",
56 seen.irq_safe, irq_safe);
57 BUG();
58 }
59 }
60 }
61
check_barrier(union lock_debug * debug)62 static void check_barrier(union lock_debug *debug)
63 {
64 if ( unlikely(atomic_read(&spin_debug) <= 0) )
65 return;
66
67 /*
68 * For a barrier, we have a relaxed IRQ-safety-consistency check.
69 *
70 * It is always safe to spin at the barrier with IRQs enabled -- that does
71 * not prevent us from entering an IRQ-context rendezvous, and nor are
72 * we preventing anyone else from doing so (since we do not actually
73 * acquire the lock during a barrier operation).
74 *
75 * However, if we spin on an IRQ-unsafe lock with IRQs disabled then that
76 * is clearly wrong, for the same reason outlined in check_lock() above.
77 */
78 BUG_ON(!local_irq_is_enabled() && !debug->irq_safe);
79 }
80
got_lock(union lock_debug * debug)81 static void got_lock(union lock_debug *debug)
82 {
83 debug->cpu = smp_processor_id();
84 }
85
rel_lock(union lock_debug * debug)86 static void rel_lock(union lock_debug *debug)
87 {
88 if ( atomic_read(&spin_debug) > 0 )
89 BUG_ON(debug->cpu != smp_processor_id());
90 debug->cpu = SPINLOCK_NO_CPU;
91 }
92
spin_debug_enable(void)93 void spin_debug_enable(void)
94 {
95 atomic_inc(&spin_debug);
96 }
97
spin_debug_disable(void)98 void spin_debug_disable(void)
99 {
100 atomic_dec(&spin_debug);
101 }
102
103 #else /* CONFIG_DEBUG_LOCKS */
104
105 #define check_lock(l) ((void)0)
106 #define check_barrier(l) ((void)0)
107 #define got_lock(l) ((void)0)
108 #define rel_lock(l) ((void)0)
109
110 #endif
111
112 #ifdef CONFIG_DEBUG_LOCK_PROFILE
113
114 #define LOCK_PROFILE_REL \
115 if (lock->profile) \
116 { \
117 lock->profile->time_hold += NOW() - lock->profile->time_locked; \
118 lock->profile->lock_cnt++; \
119 }
120 #define LOCK_PROFILE_VAR s_time_t block = 0
121 #define LOCK_PROFILE_BLOCK block = block ? : NOW();
122 #define LOCK_PROFILE_GOT \
123 if (lock->profile) \
124 { \
125 lock->profile->time_locked = NOW(); \
126 if (block) \
127 { \
128 lock->profile->time_block += lock->profile->time_locked - block; \
129 lock->profile->block_cnt++; \
130 } \
131 }
132
133 #else
134
135 #define LOCK_PROFILE_REL
136 #define LOCK_PROFILE_VAR
137 #define LOCK_PROFILE_BLOCK
138 #define LOCK_PROFILE_GOT
139
140 #endif
141
observe_lock(spinlock_tickets_t * t)142 static always_inline spinlock_tickets_t observe_lock(spinlock_tickets_t *t)
143 {
144 spinlock_tickets_t v;
145
146 smp_rmb();
147 v.head_tail = read_atomic(&t->head_tail);
148 return v;
149 }
150
observe_head(spinlock_tickets_t * t)151 static always_inline u16 observe_head(spinlock_tickets_t *t)
152 {
153 smp_rmb();
154 return read_atomic(&t->head);
155 }
156
_spin_lock_cb(spinlock_t * lock,void (* cb)(void *),void * data)157 void inline _spin_lock_cb(spinlock_t *lock, void (*cb)(void *), void *data)
158 {
159 spinlock_tickets_t tickets = SPINLOCK_TICKET_INC;
160 LOCK_PROFILE_VAR;
161
162 check_lock(&lock->debug);
163 preempt_disable();
164 tickets.head_tail = arch_fetch_and_add(&lock->tickets.head_tail,
165 tickets.head_tail);
166 while ( tickets.tail != observe_head(&lock->tickets) )
167 {
168 LOCK_PROFILE_BLOCK;
169 if ( unlikely(cb) )
170 cb(data);
171 arch_lock_relax();
172 }
173 got_lock(&lock->debug);
174 LOCK_PROFILE_GOT;
175 arch_lock_acquire_barrier();
176 }
177
_spin_lock(spinlock_t * lock)178 void _spin_lock(spinlock_t *lock)
179 {
180 _spin_lock_cb(lock, NULL, NULL);
181 }
182
_spin_lock_irq(spinlock_t * lock)183 void _spin_lock_irq(spinlock_t *lock)
184 {
185 ASSERT(local_irq_is_enabled());
186 local_irq_disable();
187 _spin_lock(lock);
188 }
189
_spin_lock_irqsave(spinlock_t * lock)190 unsigned long _spin_lock_irqsave(spinlock_t *lock)
191 {
192 unsigned long flags;
193
194 local_irq_save(flags);
195 _spin_lock(lock);
196 return flags;
197 }
198
_spin_unlock(spinlock_t * lock)199 void _spin_unlock(spinlock_t *lock)
200 {
201 arch_lock_release_barrier();
202 LOCK_PROFILE_REL;
203 rel_lock(&lock->debug);
204 add_sized(&lock->tickets.head, 1);
205 arch_lock_signal();
206 preempt_enable();
207 }
208
_spin_unlock_irq(spinlock_t * lock)209 void _spin_unlock_irq(spinlock_t *lock)
210 {
211 _spin_unlock(lock);
212 local_irq_enable();
213 }
214
_spin_unlock_irqrestore(spinlock_t * lock,unsigned long flags)215 void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
216 {
217 _spin_unlock(lock);
218 local_irq_restore(flags);
219 }
220
_spin_is_locked(spinlock_t * lock)221 int _spin_is_locked(spinlock_t *lock)
222 {
223 check_lock(&lock->debug);
224
225 /*
226 * Recursive locks may be locked by another CPU, yet we return
227 * "false" here, making this function suitable only for use in
228 * ASSERT()s and alike.
229 */
230 return lock->recurse_cpu == SPINLOCK_NO_CPU
231 ? lock->tickets.head != lock->tickets.tail
232 : lock->recurse_cpu == smp_processor_id();
233 }
234
_spin_trylock(spinlock_t * lock)235 int _spin_trylock(spinlock_t *lock)
236 {
237 spinlock_tickets_t old, new;
238
239 check_lock(&lock->debug);
240 old = observe_lock(&lock->tickets);
241 if ( old.head != old.tail )
242 return 0;
243 new = old;
244 new.tail++;
245 preempt_disable();
246 if ( cmpxchg(&lock->tickets.head_tail,
247 old.head_tail, new.head_tail) != old.head_tail )
248 {
249 preempt_enable();
250 return 0;
251 }
252 got_lock(&lock->debug);
253 #ifdef CONFIG_DEBUG_LOCK_PROFILE
254 if (lock->profile)
255 lock->profile->time_locked = NOW();
256 #endif
257 /*
258 * cmpxchg() is a full barrier so no need for an
259 * arch_lock_acquire_barrier().
260 */
261 return 1;
262 }
263
_spin_barrier(spinlock_t * lock)264 void _spin_barrier(spinlock_t *lock)
265 {
266 spinlock_tickets_t sample;
267 #ifdef CONFIG_DEBUG_LOCK_PROFILE
268 s_time_t block = NOW();
269 #endif
270
271 check_barrier(&lock->debug);
272 smp_mb();
273 sample = observe_lock(&lock->tickets);
274 if ( sample.head != sample.tail )
275 {
276 while ( observe_head(&lock->tickets) == sample.head )
277 arch_lock_relax();
278 #ifdef CONFIG_DEBUG_LOCK_PROFILE
279 if ( lock->profile )
280 {
281 lock->profile->time_block += NOW() - block;
282 lock->profile->block_cnt++;
283 }
284 #endif
285 }
286 smp_mb();
287 }
288
_spin_trylock_recursive(spinlock_t * lock)289 int _spin_trylock_recursive(spinlock_t *lock)
290 {
291 unsigned int cpu = smp_processor_id();
292
293 /* Don't allow overflow of recurse_cpu field. */
294 BUILD_BUG_ON(NR_CPUS > SPINLOCK_NO_CPU);
295 BUILD_BUG_ON(SPINLOCK_RECURSE_BITS < 3);
296
297 check_lock(&lock->debug);
298
299 if ( likely(lock->recurse_cpu != cpu) )
300 {
301 if ( !spin_trylock(lock) )
302 return 0;
303 lock->recurse_cpu = cpu;
304 }
305
306 /* We support only fairly shallow recursion, else the counter overflows. */
307 ASSERT(lock->recurse_cnt < SPINLOCK_MAX_RECURSE);
308 lock->recurse_cnt++;
309
310 return 1;
311 }
312
_spin_lock_recursive(spinlock_t * lock)313 void _spin_lock_recursive(spinlock_t *lock)
314 {
315 unsigned int cpu = smp_processor_id();
316
317 if ( likely(lock->recurse_cpu != cpu) )
318 {
319 _spin_lock(lock);
320 lock->recurse_cpu = cpu;
321 }
322
323 /* We support only fairly shallow recursion, else the counter overflows. */
324 ASSERT(lock->recurse_cnt < SPINLOCK_MAX_RECURSE);
325 lock->recurse_cnt++;
326 }
327
_spin_unlock_recursive(spinlock_t * lock)328 void _spin_unlock_recursive(spinlock_t *lock)
329 {
330 if ( likely(--lock->recurse_cnt == 0) )
331 {
332 lock->recurse_cpu = SPINLOCK_NO_CPU;
333 spin_unlock(lock);
334 }
335 }
336
337 #ifdef CONFIG_DEBUG_LOCK_PROFILE
338
339 struct lock_profile_anc {
340 struct lock_profile_qhead *head_q; /* first head of this type */
341 char *name; /* descriptive string for print */
342 };
343
344 typedef void lock_profile_subfunc(
345 struct lock_profile *, int32_t, int32_t, void *);
346
347 extern struct lock_profile *__lock_profile_start;
348 extern struct lock_profile *__lock_profile_end;
349
350 static s_time_t lock_profile_start;
351 static struct lock_profile_anc lock_profile_ancs[LOCKPROF_TYPE_N];
352 static struct lock_profile_qhead lock_profile_glb_q;
353 static spinlock_t lock_profile_lock = SPIN_LOCK_UNLOCKED;
354
spinlock_profile_iterate(lock_profile_subfunc * sub,void * par)355 static void spinlock_profile_iterate(lock_profile_subfunc *sub, void *par)
356 {
357 int i;
358 struct lock_profile_qhead *hq;
359 struct lock_profile *eq;
360
361 spin_lock(&lock_profile_lock);
362 for ( i = 0; i < LOCKPROF_TYPE_N; i++ )
363 for ( hq = lock_profile_ancs[i].head_q; hq; hq = hq->head_q )
364 for ( eq = hq->elem_q; eq; eq = eq->next )
365 sub(eq, i, hq->idx, par);
366 spin_unlock(&lock_profile_lock);
367 }
368
spinlock_profile_print_elem(struct lock_profile * data,int32_t type,int32_t idx,void * par)369 static void spinlock_profile_print_elem(struct lock_profile *data,
370 int32_t type, int32_t idx, void *par)
371 {
372 struct spinlock *lock = data->lock;
373
374 printk("%s ", lock_profile_ancs[type].name);
375 if ( type != LOCKPROF_TYPE_GLOBAL )
376 printk("%d ", idx);
377 printk("%s: addr=%p, lockval=%08x, ", data->name, lock,
378 lock->tickets.head_tail);
379 if ( lock->debug.cpu == SPINLOCK_NO_CPU )
380 printk("not locked\n");
381 else
382 printk("cpu=%d\n", lock->debug.cpu);
383 printk(" lock:%" PRId64 "(%" PRI_stime "), block:%" PRId64 "(%" PRI_stime ")\n",
384 data->lock_cnt, data->time_hold, data->block_cnt, data->time_block);
385 }
386
spinlock_profile_printall(unsigned char key)387 void spinlock_profile_printall(unsigned char key)
388 {
389 s_time_t now = NOW();
390 s_time_t diff;
391
392 diff = now - lock_profile_start;
393 printk("Xen lock profile info SHOW (now = %"PRI_stime" total = "
394 "%"PRI_stime")\n", now, diff);
395 spinlock_profile_iterate(spinlock_profile_print_elem, NULL);
396 }
397
spinlock_profile_reset_elem(struct lock_profile * data,int32_t type,int32_t idx,void * par)398 static void spinlock_profile_reset_elem(struct lock_profile *data,
399 int32_t type, int32_t idx, void *par)
400 {
401 data->lock_cnt = 0;
402 data->block_cnt = 0;
403 data->time_hold = 0;
404 data->time_block = 0;
405 }
406
spinlock_profile_reset(unsigned char key)407 void spinlock_profile_reset(unsigned char key)
408 {
409 s_time_t now = NOW();
410
411 if ( key != '\0' )
412 printk("Xen lock profile info RESET (now = %"PRI_stime")\n", now);
413 lock_profile_start = now;
414 spinlock_profile_iterate(spinlock_profile_reset_elem, NULL);
415 }
416
417 typedef struct {
418 struct xen_sysctl_lockprof_op *pc;
419 int rc;
420 } spinlock_profile_ucopy_t;
421
spinlock_profile_ucopy_elem(struct lock_profile * data,int32_t type,int32_t idx,void * par)422 static void spinlock_profile_ucopy_elem(struct lock_profile *data,
423 int32_t type, int32_t idx, void *par)
424 {
425 spinlock_profile_ucopy_t *p = par;
426 struct xen_sysctl_lockprof_data elem;
427
428 if ( p->rc )
429 return;
430
431 if ( p->pc->nr_elem < p->pc->max_elem )
432 {
433 safe_strcpy(elem.name, data->name);
434 elem.type = type;
435 elem.idx = idx;
436 elem.lock_cnt = data->lock_cnt;
437 elem.block_cnt = data->block_cnt;
438 elem.lock_time = data->time_hold;
439 elem.block_time = data->time_block;
440 if ( copy_to_guest_offset(p->pc->data, p->pc->nr_elem, &elem, 1) )
441 p->rc = -EFAULT;
442 }
443
444 if ( !p->rc )
445 p->pc->nr_elem++;
446 }
447
448 /* Dom0 control of lock profiling */
spinlock_profile_control(struct xen_sysctl_lockprof_op * pc)449 int spinlock_profile_control(struct xen_sysctl_lockprof_op *pc)
450 {
451 int rc = 0;
452 spinlock_profile_ucopy_t par;
453
454 switch ( pc->cmd )
455 {
456 case XEN_SYSCTL_LOCKPROF_reset:
457 spinlock_profile_reset('\0');
458 break;
459 case XEN_SYSCTL_LOCKPROF_query:
460 pc->nr_elem = 0;
461 par.rc = 0;
462 par.pc = pc;
463 spinlock_profile_iterate(spinlock_profile_ucopy_elem, &par);
464 pc->time = NOW() - lock_profile_start;
465 rc = par.rc;
466 break;
467 default:
468 rc = -EINVAL;
469 break;
470 }
471
472 return rc;
473 }
474
_lock_profile_register_struct(int32_t type,struct lock_profile_qhead * qhead,int32_t idx,char * name)475 void _lock_profile_register_struct(
476 int32_t type, struct lock_profile_qhead *qhead, int32_t idx, char *name)
477 {
478 qhead->idx = idx;
479 spin_lock(&lock_profile_lock);
480 qhead->head_q = lock_profile_ancs[type].head_q;
481 lock_profile_ancs[type].head_q = qhead;
482 lock_profile_ancs[type].name = name;
483 spin_unlock(&lock_profile_lock);
484 }
485
_lock_profile_deregister_struct(int32_t type,struct lock_profile_qhead * qhead)486 void _lock_profile_deregister_struct(
487 int32_t type, struct lock_profile_qhead *qhead)
488 {
489 struct lock_profile_qhead **q;
490
491 spin_lock(&lock_profile_lock);
492 for ( q = &lock_profile_ancs[type].head_q; *q; q = &(*q)->head_q )
493 {
494 if ( *q == qhead )
495 {
496 *q = qhead->head_q;
497 break;
498 }
499 }
500 spin_unlock(&lock_profile_lock);
501 }
502
lock_prof_init(void)503 static int __init lock_prof_init(void)
504 {
505 struct lock_profile **q;
506
507 for ( q = &__lock_profile_start; q < &__lock_profile_end; q++ )
508 {
509 (*q)->next = lock_profile_glb_q.elem_q;
510 lock_profile_glb_q.elem_q = *q;
511 (*q)->lock->profile = *q;
512 }
513
514 _lock_profile_register_struct(
515 LOCKPROF_TYPE_GLOBAL, &lock_profile_glb_q,
516 0, "Global lock");
517
518 return 0;
519 }
520 __initcall(lock_prof_init);
521
522 #endif /* CONFIG_DEBUG_LOCK_PROFILE */
523