1 /******************************************************************************
2  * wait.c
3  *
4  * Sleep in hypervisor context for some event to occur.
5  *
6  * Copyright (c) 2010, Keir Fraser <keir@xen.org>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #include <xen/sched.h>
23 #include <xen/softirq.h>
24 #include <xen/wait.h>
25 #include <xen/errno.h>
26 
27 struct waitqueue_vcpu {
28     struct list_head list;
29     struct vcpu *vcpu;
30 #ifdef CONFIG_X86
31     /*
32      * Xen/x86 does not have per-vcpu hypervisor stacks. So we must save the
33      * hypervisor context before sleeping (descheduling), setjmp/longjmp-style.
34      */
35     void *esp;
36     char *stack;
37 #endif
38 };
39 
init_waitqueue_vcpu(struct vcpu * v)40 int init_waitqueue_vcpu(struct vcpu *v)
41 {
42     struct waitqueue_vcpu *wqv;
43 
44     wqv = xzalloc(struct waitqueue_vcpu);
45     if ( wqv == NULL )
46         return -ENOMEM;
47 
48 #ifdef CONFIG_X86
49     wqv->stack = alloc_xenheap_page();
50     if ( wqv->stack == NULL )
51     {
52         xfree(wqv);
53         return -ENOMEM;
54     }
55 #endif
56 
57     INIT_LIST_HEAD(&wqv->list);
58     wqv->vcpu = v;
59 
60     v->waitqueue_vcpu = wqv;
61 
62     return 0;
63 }
64 
destroy_waitqueue_vcpu(struct vcpu * v)65 void destroy_waitqueue_vcpu(struct vcpu *v)
66 {
67     struct waitqueue_vcpu *wqv;
68 
69     wqv = v->waitqueue_vcpu;
70     if ( wqv == NULL )
71         return;
72 
73     BUG_ON(!list_empty(&wqv->list));
74 #ifdef CONFIG_X86
75     free_xenheap_page(wqv->stack);
76 #endif
77     xfree(wqv);
78 
79     v->waitqueue_vcpu = NULL;
80 }
81 
init_waitqueue_head(struct waitqueue_head * wq)82 void init_waitqueue_head(struct waitqueue_head *wq)
83 {
84     spin_lock_init(&wq->lock);
85     INIT_LIST_HEAD(&wq->list);
86 }
87 
destroy_waitqueue_head(struct waitqueue_head * wq)88 void destroy_waitqueue_head(struct waitqueue_head *wq)
89 {
90     wake_up_all(wq);
91 }
92 
wake_up_nr(struct waitqueue_head * wq,unsigned int nr)93 void wake_up_nr(struct waitqueue_head *wq, unsigned int nr)
94 {
95     struct waitqueue_vcpu *wqv;
96 
97     spin_lock(&wq->lock);
98 
99     while ( !list_empty(&wq->list) && nr-- )
100     {
101         wqv = list_entry(wq->list.next, struct waitqueue_vcpu, list);
102         list_del_init(&wqv->list);
103         vcpu_unpause(wqv->vcpu);
104         put_domain(wqv->vcpu->domain);
105     }
106 
107     spin_unlock(&wq->lock);
108 }
109 
wake_up_one(struct waitqueue_head * wq)110 void wake_up_one(struct waitqueue_head *wq)
111 {
112     wake_up_nr(wq, 1);
113 }
114 
wake_up_all(struct waitqueue_head * wq)115 void wake_up_all(struct waitqueue_head *wq)
116 {
117     wake_up_nr(wq, UINT_MAX);
118 }
119 
120 #ifdef CONFIG_X86
121 
__prepare_to_wait(struct waitqueue_vcpu * wqv)122 static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
123 {
124     struct cpu_info *cpu_info = get_cpu_info();
125     struct vcpu *curr = current;
126     unsigned long dummy;
127     u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector;
128 
129     ASSERT(wqv->esp == 0);
130 
131     /* Save current VCPU affinity; force wakeup on *this* CPU only. */
132     if ( vcpu_temporary_affinity(curr, smp_processor_id(), VCPU_AFFINITY_WAIT) )
133     {
134         gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
135         domain_crash(curr->domain);
136 
137         for ( ; ; )
138             do_softirq();
139     }
140 
141     /* Hand-rolled setjmp(). */
142     asm volatile (
143         "push %%rax; push %%rbx; push %%rdx; push %%rbp;"
144         "push %%r8;  push %%r9;  push %%r10; push %%r11;"
145         "push %%r12; push %%r13; push %%r14; push %%r15;"
146 
147         "call 1f;"
148         "1: addq $2f-1b,(%%rsp);"
149         "sub %%esp,%%ecx;"
150         "cmp %3,%%ecx;"
151         "ja 3f;"
152         "mov %%rsp,%%rsi;"
153 
154         /* check_wakeup_from_wait() longjmp()'s to this point. */
155         "2: rep movsb;"
156         "mov %%rsp,%%rsi;"
157         "3: pop %%rax;"
158 
159         "pop %%r15; pop %%r14; pop %%r13; pop %%r12;"
160         "pop %%r11; pop %%r10; pop %%r9;  pop %%r8;"
161         "pop %%rbp; pop %%rdx; pop %%rbx; pop %%rax"
162         : "=&S" (wqv->esp), "=&c" (dummy), "=&D" (dummy)
163         : "i" (PAGE_SIZE), "0" (0), "1" (cpu_info), "2" (wqv->stack)
164         : "memory" );
165 
166     if ( unlikely(wqv->esp == 0) )
167     {
168         gdprintk(XENLOG_ERR, "Stack too large in %s\n", __func__);
169         domain_crash(curr->domain);
170 
171         for ( ; ; )
172             do_softirq();
173     }
174 
175     cpu_info->guest_cpu_user_regs.entry_vector = entry_vector;
176 }
177 
__finish_wait(struct waitqueue_vcpu * wqv)178 static void __finish_wait(struct waitqueue_vcpu *wqv)
179 {
180     wqv->esp = NULL;
181     vcpu_temporary_affinity(current, NR_CPUS, VCPU_AFFINITY_WAIT);
182 }
183 
check_wakeup_from_wait(void)184 void check_wakeup_from_wait(void)
185 {
186     struct vcpu *curr = current;
187     struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
188 
189     ASSERT(list_empty(&wqv->list));
190 
191     if ( likely(wqv->esp == NULL) )
192         return;
193 
194     /* Check if we are still pinned. */
195     if ( unlikely(!(curr->affinity_broken & VCPU_AFFINITY_WAIT)) )
196     {
197         gdprintk(XENLOG_ERR, "vcpu affinity lost\n");
198         domain_crash(curr->domain);
199 
200         /* Re-initiate scheduler and don't longjmp(). */
201         raise_softirq(SCHEDULE_SOFTIRQ);
202         for ( ; ; )
203             do_softirq();
204     }
205 
206     /*
207      * Hand-rolled longjmp().  Returns to the pointer on the top of
208      * wqv->stack, and lands on a `rep movs` instruction.  All other GPRs are
209      * restored from the stack, so are available for use here.
210      */
211     asm volatile (
212         "mov %1,%%"__OP"sp; INDIRECT_JMP %[ip]"
213         : : "S" (wqv->stack), "D" (wqv->esp),
214           "c" ((char *)get_cpu_info() - (char *)wqv->esp),
215           [ip] "r" (*(unsigned long *)wqv->stack)
216         : "memory" );
217     unreachable();
218 }
219 
220 #else /* !CONFIG_X86 */
221 
222 #define __prepare_to_wait(wqv) ((void)0)
223 #define __finish_wait(wqv) ((void)0)
224 
225 #endif
226 
prepare_to_wait(struct waitqueue_head * wq)227 void prepare_to_wait(struct waitqueue_head *wq)
228 {
229     struct vcpu *curr = current;
230     struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
231 
232     ASSERT_NOT_IN_ATOMIC();
233     __prepare_to_wait(wqv);
234 
235     ASSERT(list_empty(&wqv->list));
236     spin_lock(&wq->lock);
237     list_add_tail(&wqv->list, &wq->list);
238     vcpu_pause_nosync(curr);
239     get_knownalive_domain(curr->domain);
240     spin_unlock(&wq->lock);
241 }
242 
finish_wait(struct waitqueue_head * wq)243 void finish_wait(struct waitqueue_head *wq)
244 {
245     struct vcpu *curr = current;
246     struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
247 
248     __finish_wait(wqv);
249 
250     if ( list_empty(&wqv->list) )
251         return;
252 
253     spin_lock(&wq->lock);
254     if ( !list_empty(&wqv->list) )
255     {
256         list_del_init(&wqv->list);
257         vcpu_unpause(curr);
258         put_domain(curr->domain);
259     }
260     spin_unlock(&wq->lock);
261 }
262