1 /******************************************************************************
2 * wait.c
3 *
4 * Sleep in hypervisor context for some event to occur.
5 *
6 * Copyright (c) 2010, Keir Fraser <keir@xen.org>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include <xen/sched.h>
23 #include <xen/softirq.h>
24 #include <xen/wait.h>
25 #include <xen/errno.h>
26
27 struct waitqueue_vcpu {
28 struct list_head list;
29 struct vcpu *vcpu;
30 #ifdef CONFIG_X86
31 /*
32 * Xen/x86 does not have per-vcpu hypervisor stacks. So we must save the
33 * hypervisor context before sleeping (descheduling), setjmp/longjmp-style.
34 */
35 void *esp;
36 char *stack;
37 #endif
38 };
39
init_waitqueue_vcpu(struct vcpu * v)40 int init_waitqueue_vcpu(struct vcpu *v)
41 {
42 struct waitqueue_vcpu *wqv;
43
44 wqv = xzalloc(struct waitqueue_vcpu);
45 if ( wqv == NULL )
46 return -ENOMEM;
47
48 #ifdef CONFIG_X86
49 wqv->stack = alloc_xenheap_page();
50 if ( wqv->stack == NULL )
51 {
52 xfree(wqv);
53 return -ENOMEM;
54 }
55 #endif
56
57 INIT_LIST_HEAD(&wqv->list);
58 wqv->vcpu = v;
59
60 v->waitqueue_vcpu = wqv;
61
62 return 0;
63 }
64
destroy_waitqueue_vcpu(struct vcpu * v)65 void destroy_waitqueue_vcpu(struct vcpu *v)
66 {
67 struct waitqueue_vcpu *wqv;
68
69 wqv = v->waitqueue_vcpu;
70 if ( wqv == NULL )
71 return;
72
73 BUG_ON(!list_empty(&wqv->list));
74 #ifdef CONFIG_X86
75 free_xenheap_page(wqv->stack);
76 #endif
77 xfree(wqv);
78
79 v->waitqueue_vcpu = NULL;
80 }
81
init_waitqueue_head(struct waitqueue_head * wq)82 void init_waitqueue_head(struct waitqueue_head *wq)
83 {
84 spin_lock_init(&wq->lock);
85 INIT_LIST_HEAD(&wq->list);
86 }
87
destroy_waitqueue_head(struct waitqueue_head * wq)88 void destroy_waitqueue_head(struct waitqueue_head *wq)
89 {
90 wake_up_all(wq);
91 }
92
wake_up_nr(struct waitqueue_head * wq,unsigned int nr)93 void wake_up_nr(struct waitqueue_head *wq, unsigned int nr)
94 {
95 struct waitqueue_vcpu *wqv;
96
97 spin_lock(&wq->lock);
98
99 while ( !list_empty(&wq->list) && nr-- )
100 {
101 wqv = list_entry(wq->list.next, struct waitqueue_vcpu, list);
102 list_del_init(&wqv->list);
103 vcpu_unpause(wqv->vcpu);
104 put_domain(wqv->vcpu->domain);
105 }
106
107 spin_unlock(&wq->lock);
108 }
109
wake_up_one(struct waitqueue_head * wq)110 void wake_up_one(struct waitqueue_head *wq)
111 {
112 wake_up_nr(wq, 1);
113 }
114
wake_up_all(struct waitqueue_head * wq)115 void wake_up_all(struct waitqueue_head *wq)
116 {
117 wake_up_nr(wq, UINT_MAX);
118 }
119
120 #ifdef CONFIG_X86
121
__prepare_to_wait(struct waitqueue_vcpu * wqv)122 static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
123 {
124 struct cpu_info *cpu_info = get_cpu_info();
125 struct vcpu *curr = current;
126 unsigned long dummy;
127 u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector;
128
129 ASSERT(wqv->esp == 0);
130
131 /* Save current VCPU affinity; force wakeup on *this* CPU only. */
132 if ( vcpu_temporary_affinity(curr, smp_processor_id(), VCPU_AFFINITY_WAIT) )
133 {
134 gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
135 domain_crash(curr->domain);
136
137 for ( ; ; )
138 do_softirq();
139 }
140
141 /* Hand-rolled setjmp(). */
142 asm volatile (
143 "push %%rax; push %%rbx; push %%rdx; push %%rbp;"
144 "push %%r8; push %%r9; push %%r10; push %%r11;"
145 "push %%r12; push %%r13; push %%r14; push %%r15;"
146
147 "call 1f;"
148 "1: addq $2f-1b,(%%rsp);"
149 "sub %%esp,%%ecx;"
150 "cmp %3,%%ecx;"
151 "ja 3f;"
152 "mov %%rsp,%%rsi;"
153
154 /* check_wakeup_from_wait() longjmp()'s to this point. */
155 "2: rep movsb;"
156 "mov %%rsp,%%rsi;"
157 "3: pop %%rax;"
158
159 "pop %%r15; pop %%r14; pop %%r13; pop %%r12;"
160 "pop %%r11; pop %%r10; pop %%r9; pop %%r8;"
161 "pop %%rbp; pop %%rdx; pop %%rbx; pop %%rax"
162 : "=&S" (wqv->esp), "=&c" (dummy), "=&D" (dummy)
163 : "i" (PAGE_SIZE), "0" (0), "1" (cpu_info), "2" (wqv->stack)
164 : "memory" );
165
166 if ( unlikely(wqv->esp == 0) )
167 {
168 gdprintk(XENLOG_ERR, "Stack too large in %s\n", __func__);
169 domain_crash(curr->domain);
170
171 for ( ; ; )
172 do_softirq();
173 }
174
175 cpu_info->guest_cpu_user_regs.entry_vector = entry_vector;
176 }
177
__finish_wait(struct waitqueue_vcpu * wqv)178 static void __finish_wait(struct waitqueue_vcpu *wqv)
179 {
180 wqv->esp = NULL;
181 vcpu_temporary_affinity(current, NR_CPUS, VCPU_AFFINITY_WAIT);
182 }
183
check_wakeup_from_wait(void)184 void check_wakeup_from_wait(void)
185 {
186 struct vcpu *curr = current;
187 struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
188
189 ASSERT(list_empty(&wqv->list));
190
191 if ( likely(wqv->esp == NULL) )
192 return;
193
194 /* Check if we are still pinned. */
195 if ( unlikely(!(curr->affinity_broken & VCPU_AFFINITY_WAIT)) )
196 {
197 gdprintk(XENLOG_ERR, "vcpu affinity lost\n");
198 domain_crash(curr->domain);
199
200 /* Re-initiate scheduler and don't longjmp(). */
201 raise_softirq(SCHEDULE_SOFTIRQ);
202 for ( ; ; )
203 do_softirq();
204 }
205
206 /*
207 * Hand-rolled longjmp(). Returns to the pointer on the top of
208 * wqv->stack, and lands on a `rep movs` instruction. All other GPRs are
209 * restored from the stack, so are available for use here.
210 */
211 asm volatile (
212 "mov %1,%%"__OP"sp; INDIRECT_JMP %[ip]"
213 : : "S" (wqv->stack), "D" (wqv->esp),
214 "c" ((char *)get_cpu_info() - (char *)wqv->esp),
215 [ip] "r" (*(unsigned long *)wqv->stack)
216 : "memory" );
217 unreachable();
218 }
219
220 #else /* !CONFIG_X86 */
221
222 #define __prepare_to_wait(wqv) ((void)0)
223 #define __finish_wait(wqv) ((void)0)
224
225 #endif
226
prepare_to_wait(struct waitqueue_head * wq)227 void prepare_to_wait(struct waitqueue_head *wq)
228 {
229 struct vcpu *curr = current;
230 struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
231
232 ASSERT_NOT_IN_ATOMIC();
233 __prepare_to_wait(wqv);
234
235 ASSERT(list_empty(&wqv->list));
236 spin_lock(&wq->lock);
237 list_add_tail(&wqv->list, &wq->list);
238 vcpu_pause_nosync(curr);
239 get_knownalive_domain(curr->domain);
240 spin_unlock(&wq->lock);
241 }
242
finish_wait(struct waitqueue_head * wq)243 void finish_wait(struct waitqueue_head *wq)
244 {
245 struct vcpu *curr = current;
246 struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
247
248 __finish_wait(wqv);
249
250 if ( list_empty(&wqv->list) )
251 return;
252
253 spin_lock(&wq->lock);
254 if ( !list_empty(&wqv->list) )
255 {
256 list_del_init(&wqv->list);
257 vcpu_unpause(curr);
258 put_domain(curr->domain);
259 }
260 spin_unlock(&wq->lock);
261 }
262