1 /*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 */
12 #include <xen/bitops.h>
13 #include <xen/errno.h>
14 #include <xen/grant_table.h>
15 #include <xen/hypercall.h>
16 #include <xen/init.h>
17 #include <xen/lib.h>
18 #include <xen/livepatch.h>
19 #include <xen/sched.h>
20 #include <xen/softirq.h>
21 #include <xen/wait.h>
22
23 #include <asm/alternative.h>
24 #include <asm/cpuerrata.h>
25 #include <asm/cpufeature.h>
26 #include <asm/current.h>
27 #include <asm/event.h>
28 #include <asm/gic.h>
29 #include <asm/guest_access.h>
30 #include <asm/guest_atomics.h>
31 #include <asm/irq.h>
32 #include <asm/p2m.h>
33 #include <asm/platform.h>
34 #include <asm/procinfo.h>
35 #include <asm/regs.h>
36 #include <asm/tee/tee.h>
37 #include <asm/vfp.h>
38 #include <asm/vgic.h>
39 #include <asm/vtimer.h>
40
41 #include "vuart.h"
42
43 DEFINE_PER_CPU(struct vcpu *, curr_vcpu);
44
do_idle(void)45 static void do_idle(void)
46 {
47 unsigned int cpu = smp_processor_id();
48
49 rcu_idle_enter(cpu);
50 /* rcu_idle_enter() can raise TIMER_SOFTIRQ. Process it now. */
51 process_pending_softirqs();
52
53 local_irq_disable();
54 if ( cpu_is_haltable(cpu) )
55 {
56 dsb(sy);
57 wfi();
58 }
59 local_irq_enable();
60
61 rcu_idle_exit(cpu);
62 }
63
idle_loop(void)64 void idle_loop(void)
65 {
66 unsigned int cpu = smp_processor_id();
67
68 for ( ; ; )
69 {
70 if ( cpu_is_offline(cpu) )
71 stop_cpu();
72
73 /* Are we here for running vcpu context tasklets, or for idling? */
74 if ( unlikely(tasklet_work_to_do(cpu)) )
75 {
76 do_tasklet();
77 /* Livepatch work is always kicked off via a tasklet. */
78 check_for_livepatch_work();
79 }
80 /*
81 * Test softirqs twice --- first to see if should even try scrubbing
82 * and then, after it is done, whether softirqs became pending
83 * while we were scrubbing.
84 */
85 else if ( !softirq_pending(cpu) && !scrub_free_pages() &&
86 !softirq_pending(cpu) )
87 do_idle();
88
89 do_softirq();
90 }
91 }
92
ctxt_switch_from(struct vcpu * p)93 static void ctxt_switch_from(struct vcpu *p)
94 {
95 /* When the idle VCPU is running, Xen will always stay in hypervisor
96 * mode. Therefore we don't need to save the context of an idle VCPU.
97 */
98 if ( is_idle_vcpu(p) )
99 return;
100
101 p2m_save_state(p);
102
103 /* CP 15 */
104 p->arch.csselr = READ_SYSREG(CSSELR_EL1);
105
106 /* Control Registers */
107 p->arch.cpacr = READ_SYSREG(CPACR_EL1);
108
109 p->arch.contextidr = READ_SYSREG(CONTEXTIDR_EL1);
110 p->arch.tpidr_el0 = READ_SYSREG(TPIDR_EL0);
111 p->arch.tpidrro_el0 = READ_SYSREG(TPIDRRO_EL0);
112 p->arch.tpidr_el1 = READ_SYSREG(TPIDR_EL1);
113
114 /* Arch timer */
115 p->arch.cntkctl = READ_SYSREG32(CNTKCTL_EL1);
116 virt_timer_save(p);
117
118 if ( is_32bit_domain(p->domain) && cpu_has_thumbee )
119 {
120 p->arch.teecr = READ_SYSREG32(TEECR32_EL1);
121 p->arch.teehbr = READ_SYSREG32(TEEHBR32_EL1);
122 }
123
124 #ifdef CONFIG_ARM_32
125 p->arch.joscr = READ_CP32(JOSCR);
126 p->arch.jmcr = READ_CP32(JMCR);
127 #endif
128
129 isb();
130
131 /* MMU */
132 p->arch.vbar = READ_SYSREG(VBAR_EL1);
133 p->arch.ttbcr = READ_SYSREG(TCR_EL1);
134 p->arch.ttbr0 = READ_SYSREG64(TTBR0_EL1);
135 p->arch.ttbr1 = READ_SYSREG64(TTBR1_EL1);
136 if ( is_32bit_domain(p->domain) )
137 p->arch.dacr = READ_SYSREG(DACR32_EL2);
138 p->arch.par = READ_SYSREG64(PAR_EL1);
139 #if defined(CONFIG_ARM_32)
140 p->arch.mair0 = READ_CP32(MAIR0);
141 p->arch.mair1 = READ_CP32(MAIR1);
142 p->arch.amair0 = READ_CP32(AMAIR0);
143 p->arch.amair1 = READ_CP32(AMAIR1);
144 #else
145 p->arch.mair = READ_SYSREG64(MAIR_EL1);
146 p->arch.amair = READ_SYSREG64(AMAIR_EL1);
147 #endif
148
149 /* Fault Status */
150 #if defined(CONFIG_ARM_32)
151 p->arch.dfar = READ_CP32(DFAR);
152 p->arch.ifar = READ_CP32(IFAR);
153 p->arch.dfsr = READ_CP32(DFSR);
154 #elif defined(CONFIG_ARM_64)
155 p->arch.far = READ_SYSREG64(FAR_EL1);
156 p->arch.esr = READ_SYSREG64(ESR_EL1);
157 #endif
158
159 if ( is_32bit_domain(p->domain) )
160 p->arch.ifsr = READ_SYSREG(IFSR32_EL2);
161 p->arch.afsr0 = READ_SYSREG(AFSR0_EL1);
162 p->arch.afsr1 = READ_SYSREG(AFSR1_EL1);
163
164 /* XXX MPU */
165
166 /* VFP */
167 vfp_save_state(p);
168
169 /* VGIC */
170 gic_save_state(p);
171
172 isb();
173 }
174
ctxt_switch_to(struct vcpu * n)175 static void ctxt_switch_to(struct vcpu *n)
176 {
177 uint32_t vpidr;
178
179 /* When the idle VCPU is running, Xen will always stay in hypervisor
180 * mode. Therefore we don't need to restore the context of an idle VCPU.
181 */
182 if ( is_idle_vcpu(n) )
183 return;
184
185 vpidr = READ_SYSREG32(MIDR_EL1);
186 WRITE_SYSREG32(vpidr, VPIDR_EL2);
187 WRITE_SYSREG(n->arch.vmpidr, VMPIDR_EL2);
188
189 /* VGIC */
190 gic_restore_state(n);
191
192 /* VFP */
193 vfp_restore_state(n);
194
195 /* XXX MPU */
196
197 /* Fault Status */
198 #if defined(CONFIG_ARM_32)
199 WRITE_CP32(n->arch.dfar, DFAR);
200 WRITE_CP32(n->arch.ifar, IFAR);
201 WRITE_CP32(n->arch.dfsr, DFSR);
202 #elif defined(CONFIG_ARM_64)
203 WRITE_SYSREG64(n->arch.far, FAR_EL1);
204 WRITE_SYSREG64(n->arch.esr, ESR_EL1);
205 #endif
206
207 if ( is_32bit_domain(n->domain) )
208 WRITE_SYSREG(n->arch.ifsr, IFSR32_EL2);
209 WRITE_SYSREG(n->arch.afsr0, AFSR0_EL1);
210 WRITE_SYSREG(n->arch.afsr1, AFSR1_EL1);
211
212 /* MMU */
213 WRITE_SYSREG(n->arch.vbar, VBAR_EL1);
214 WRITE_SYSREG(n->arch.ttbcr, TCR_EL1);
215 WRITE_SYSREG64(n->arch.ttbr0, TTBR0_EL1);
216 WRITE_SYSREG64(n->arch.ttbr1, TTBR1_EL1);
217
218 /*
219 * Erratum #852523: DACR32_EL2 must be restored before one of the
220 * following sysregs: SCTLR_EL1, TCR_EL1, TTBR0_EL1, TTBR1_EL1 or
221 * CONTEXTIDR_EL1.
222 */
223 if ( is_32bit_domain(n->domain) )
224 WRITE_SYSREG(n->arch.dacr, DACR32_EL2);
225 WRITE_SYSREG64(n->arch.par, PAR_EL1);
226 #if defined(CONFIG_ARM_32)
227 WRITE_CP32(n->arch.mair0, MAIR0);
228 WRITE_CP32(n->arch.mair1, MAIR1);
229 WRITE_CP32(n->arch.amair0, AMAIR0);
230 WRITE_CP32(n->arch.amair1, AMAIR1);
231 #elif defined(CONFIG_ARM_64)
232 WRITE_SYSREG64(n->arch.mair, MAIR_EL1);
233 WRITE_SYSREG64(n->arch.amair, AMAIR_EL1);
234 #endif
235 isb();
236
237 /*
238 * ARM64_WORKAROUND_AT_SPECULATE: The P2M should be restored after
239 * the stage-1 MMU sysregs have been restored.
240 */
241 p2m_restore_state(n);
242
243 /* Control Registers */
244 WRITE_SYSREG(n->arch.cpacr, CPACR_EL1);
245
246 /*
247 * This write to sysreg CONTEXTIDR_EL1 ensures we don't hit erratum
248 * #852523. I.e DACR32_EL2 is not correctly synchronized.
249 */
250 WRITE_SYSREG(n->arch.contextidr, CONTEXTIDR_EL1);
251 WRITE_SYSREG(n->arch.tpidr_el0, TPIDR_EL0);
252 WRITE_SYSREG(n->arch.tpidrro_el0, TPIDRRO_EL0);
253 WRITE_SYSREG(n->arch.tpidr_el1, TPIDR_EL1);
254
255 if ( is_32bit_domain(n->domain) && cpu_has_thumbee )
256 {
257 WRITE_SYSREG32(n->arch.teecr, TEECR32_EL1);
258 WRITE_SYSREG32(n->arch.teehbr, TEEHBR32_EL1);
259 }
260
261 #ifdef CONFIG_ARM_32
262 WRITE_CP32(n->arch.joscr, JOSCR);
263 WRITE_CP32(n->arch.jmcr, JMCR);
264 #endif
265 isb();
266
267 /* CP 15 */
268 WRITE_SYSREG(n->arch.csselr, CSSELR_EL1);
269
270 isb();
271
272 /* This is could trigger an hardware interrupt from the virtual
273 * timer. The interrupt needs to be injected into the guest. */
274 WRITE_SYSREG32(n->arch.cntkctl, CNTKCTL_EL1);
275 virt_timer_restore(n);
276 }
277
278 /* Update per-VCPU guest runstate shared memory area (if registered). */
update_runstate_area(struct vcpu * v)279 static void update_runstate_area(struct vcpu *v)
280 {
281 void __user *guest_handle = NULL;
282 struct vcpu_runstate_info runstate;
283
284 if ( guest_handle_is_null(runstate_guest(v)) )
285 return;
286
287 memcpy(&runstate, &v->runstate, sizeof(runstate));
288
289 if ( VM_ASSIST(v->domain, runstate_update_flag) )
290 {
291 guest_handle = &v->runstate_guest.p->state_entry_time + 1;
292 guest_handle--;
293 runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
294 __raw_copy_to_guest(guest_handle,
295 (void *)(&runstate.state_entry_time + 1) - 1, 1);
296 smp_wmb();
297 }
298
299 __copy_to_guest(runstate_guest(v), &runstate, 1);
300
301 if ( guest_handle )
302 {
303 runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
304 smp_wmb();
305 __raw_copy_to_guest(guest_handle,
306 (void *)(&runstate.state_entry_time + 1) - 1, 1);
307 }
308 }
309
schedule_tail(struct vcpu * prev)310 static void schedule_tail(struct vcpu *prev)
311 {
312 ASSERT(prev != current);
313
314 ctxt_switch_from(prev);
315
316 ctxt_switch_to(current);
317
318 local_irq_enable();
319
320 sched_context_switched(prev, current);
321
322 update_runstate_area(current);
323
324 /* Ensure that the vcpu has an up-to-date time base. */
325 update_vcpu_system_time(current);
326 }
327
continue_new_vcpu(struct vcpu * prev)328 static void continue_new_vcpu(struct vcpu *prev)
329 {
330 current->arch.actlr = READ_SYSREG32(ACTLR_EL1);
331 processor_vcpu_initialise(current);
332
333 schedule_tail(prev);
334
335 if ( is_idle_vcpu(current) )
336 reset_stack_and_jump(idle_loop);
337 else if ( is_32bit_domain(current->domain) )
338 /* check_wakeup_from_wait(); */
339 reset_stack_and_jump(return_to_new_vcpu32);
340 else
341 /* check_wakeup_from_wait(); */
342 reset_stack_and_jump(return_to_new_vcpu64);
343 }
344
context_switch(struct vcpu * prev,struct vcpu * next)345 void context_switch(struct vcpu *prev, struct vcpu *next)
346 {
347 ASSERT(local_irq_is_enabled());
348 ASSERT(prev != next);
349 ASSERT(!vcpu_cpu_dirty(next));
350
351 update_runstate_area(prev);
352
353 local_irq_disable();
354
355 set_current(next);
356
357 prev = __context_switch(prev, next);
358
359 schedule_tail(prev);
360 }
361
continue_running(struct vcpu * same)362 void continue_running(struct vcpu *same)
363 {
364 /* Nothing to do */
365 }
366
sync_local_execstate(void)367 void sync_local_execstate(void)
368 {
369 /* Nothing to do -- no lazy switching */
370 }
371
sync_vcpu_execstate(struct vcpu * v)372 void sync_vcpu_execstate(struct vcpu *v)
373 {
374 /*
375 * We don't support lazy switching.
376 *
377 * However the context may have been saved from a remote pCPU so we
378 * need a barrier to ensure it is observed before continuing.
379 *
380 * Per vcpu_context_saved(), the context can be observed when
381 * v->is_running is false (the caller should check it before calling
382 * this function).
383 *
384 * Note this is a full barrier to also prevent update of the context
385 * to happen before it was observed.
386 */
387 smp_mb();
388 }
389
390 #define NEXT_ARG(fmt, args) \
391 ({ \
392 unsigned long __arg; \
393 switch ( *(fmt)++ ) \
394 { \
395 case 'i': __arg = (unsigned long)va_arg(args, unsigned int); break; \
396 case 'l': __arg = (unsigned long)va_arg(args, unsigned long); break; \
397 case 'h': __arg = (unsigned long)va_arg(args, void *); break; \
398 default: goto bad_fmt; \
399 } \
400 __arg; \
401 })
402
hypercall_create_continuation(unsigned int op,const char * format,...)403 unsigned long hypercall_create_continuation(
404 unsigned int op, const char *format, ...)
405 {
406 struct mc_state *mcs = ¤t->mc_state;
407 struct cpu_user_regs *regs;
408 const char *p = format;
409 unsigned long arg, rc;
410 unsigned int i;
411 va_list args;
412
413 current->hcall_preempted = true;
414
415 va_start(args, format);
416
417 if ( mcs->flags & MCSF_in_multicall )
418 {
419 for ( i = 0; *p != '\0'; i++ )
420 mcs->call.args[i] = NEXT_ARG(p, args);
421
422 /* Return value gets written back to mcs->call.result */
423 rc = mcs->call.result;
424 }
425 else
426 {
427 regs = guest_cpu_user_regs();
428
429 #ifdef CONFIG_ARM_64
430 if ( !is_32bit_domain(current->domain) )
431 {
432 regs->x16 = op;
433
434 for ( i = 0; *p != '\0'; i++ )
435 {
436 arg = NEXT_ARG(p, args);
437
438 switch ( i )
439 {
440 case 0: regs->x0 = arg; break;
441 case 1: regs->x1 = arg; break;
442 case 2: regs->x2 = arg; break;
443 case 3: regs->x3 = arg; break;
444 case 4: regs->x4 = arg; break;
445 case 5: regs->x5 = arg; break;
446 }
447 }
448
449 /* Return value gets written back to x0 */
450 rc = regs->x0;
451 }
452 else
453 #endif
454 {
455 regs->r12 = op;
456
457 for ( i = 0; *p != '\0'; i++ )
458 {
459 arg = NEXT_ARG(p, args);
460
461 switch ( i )
462 {
463 case 0: regs->r0 = arg; break;
464 case 1: regs->r1 = arg; break;
465 case 2: regs->r2 = arg; break;
466 case 3: regs->r3 = arg; break;
467 case 4: regs->r4 = arg; break;
468 case 5: regs->r5 = arg; break;
469 }
470 }
471
472 /* Return value gets written back to r0 */
473 rc = regs->r0;
474 }
475 }
476
477 va_end(args);
478
479 return rc;
480
481 bad_fmt:
482 va_end(args);
483 gprintk(XENLOG_ERR, "Bad hypercall continuation format '%c'\n", *p);
484 ASSERT_UNREACHABLE();
485 domain_crash(current->domain);
486 return 0;
487 }
488
489 #undef NEXT_ARG
490
startup_cpu_idle_loop(void)491 void startup_cpu_idle_loop(void)
492 {
493 struct vcpu *v = current;
494
495 ASSERT(is_idle_vcpu(v));
496 /* TODO
497 cpumask_set_cpu(v->processor, v->domain->dirty_cpumask);
498 v->dirty_cpu = v->processor;
499 */
500
501 reset_stack_and_jump(idle_loop);
502 }
503
alloc_domain_struct(void)504 struct domain *alloc_domain_struct(void)
505 {
506 struct domain *d;
507 BUILD_BUG_ON(sizeof(*d) > PAGE_SIZE);
508 d = alloc_xenheap_pages(0, 0);
509 if ( d == NULL )
510 return NULL;
511
512 clear_page(d);
513 return d;
514 }
515
free_domain_struct(struct domain * d)516 void free_domain_struct(struct domain *d)
517 {
518 free_xenheap_page(d);
519 }
520
dump_pageframe_info(struct domain * d)521 void dump_pageframe_info(struct domain *d)
522 {
523
524 }
525
526 /*
527 * The new VGIC has a bigger per-IRQ structure, so we need more than one
528 * page on ARM64. Cowardly increase the limit in this case.
529 */
530 #if defined(CONFIG_NEW_VGIC) && defined(CONFIG_ARM_64)
531 #define MAX_PAGES_PER_VCPU 2
532 #else
533 #define MAX_PAGES_PER_VCPU 1
534 #endif
535
alloc_vcpu_struct(const struct domain * d)536 struct vcpu *alloc_vcpu_struct(const struct domain *d)
537 {
538 struct vcpu *v;
539
540 BUILD_BUG_ON(sizeof(*v) > MAX_PAGES_PER_VCPU * PAGE_SIZE);
541 v = alloc_xenheap_pages(get_order_from_bytes(sizeof(*v)), 0);
542 if ( v != NULL )
543 {
544 unsigned int i;
545
546 for ( i = 0; i < DIV_ROUND_UP(sizeof(*v), PAGE_SIZE); i++ )
547 clear_page((void *)v + i * PAGE_SIZE);
548 }
549
550 return v;
551 }
552
free_vcpu_struct(struct vcpu * v)553 void free_vcpu_struct(struct vcpu *v)
554 {
555 free_xenheap_pages(v, get_order_from_bytes(sizeof(*v)));
556 }
557
arch_vcpu_create(struct vcpu * v)558 int arch_vcpu_create(struct vcpu *v)
559 {
560 int rc = 0;
561
562 BUILD_BUG_ON( sizeof(struct cpu_info) > STACK_SIZE );
563
564 v->arch.stack = alloc_xenheap_pages(STACK_ORDER, MEMF_node(vcpu_to_node(v)));
565 if ( v->arch.stack == NULL )
566 return -ENOMEM;
567
568 v->arch.cpu_info = (struct cpu_info *)(v->arch.stack
569 + STACK_SIZE
570 - sizeof(struct cpu_info));
571 memset(v->arch.cpu_info, 0, sizeof(*v->arch.cpu_info));
572
573 v->arch.saved_context.sp = (register_t)v->arch.cpu_info;
574 v->arch.saved_context.pc = (register_t)continue_new_vcpu;
575
576 /* Idle VCPUs don't need the rest of this setup */
577 if ( is_idle_vcpu(v) )
578 return rc;
579
580 v->arch.sctlr = SCTLR_GUEST_INIT;
581
582 v->arch.vmpidr = MPIDR_SMP | vcpuid_to_vaffinity(v->vcpu_id);
583
584 v->arch.hcr_el2 = get_default_hcr_flags();
585
586 if ( (rc = vcpu_vgic_init(v)) != 0 )
587 goto fail;
588
589 if ( (rc = vcpu_vtimer_init(v)) != 0 )
590 goto fail;
591
592 /*
593 * The workaround 2 (i.e SSBD mitigation) is enabled by default if
594 * supported.
595 */
596 if ( get_ssbd_state() == ARM_SSBD_RUNTIME )
597 v->arch.cpu_info->flags |= CPUINFO_WORKAROUND_2_FLAG;
598
599 return rc;
600
601 fail:
602 arch_vcpu_destroy(v);
603 return rc;
604 }
605
arch_vcpu_destroy(struct vcpu * v)606 void arch_vcpu_destroy(struct vcpu *v)
607 {
608 vcpu_timer_destroy(v);
609 vcpu_vgic_free(v);
610 free_xenheap_pages(v->arch.stack, STACK_ORDER);
611 }
612
vcpu_switch_to_aarch64_mode(struct vcpu * v)613 void vcpu_switch_to_aarch64_mode(struct vcpu *v)
614 {
615 v->arch.hcr_el2 |= HCR_RW;
616 }
617
arch_sanitise_domain_config(struct xen_domctl_createdomain * config)618 int arch_sanitise_domain_config(struct xen_domctl_createdomain *config)
619 {
620 unsigned int max_vcpus;
621
622 /* HVM and HAP must be set. IOMMU may or may not be */
623 if ( (config->flags & ~XEN_DOMCTL_CDF_iommu) !=
624 (XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap) )
625 {
626 dprintk(XENLOG_INFO, "Unsupported configuration %#x\n",
627 config->flags);
628 return -EINVAL;
629 }
630
631 /* The P2M table must always be shared between the CPU and the IOMMU */
632 if ( config->iommu_opts & XEN_DOMCTL_IOMMU_no_sharept )
633 {
634 dprintk(XENLOG_INFO,
635 "Unsupported iommu option: XEN_DOMCTL_IOMMU_no_sharept\n");
636 return -EINVAL;
637 }
638
639 /* Fill in the native GIC version, passed back to the toolstack. */
640 if ( config->arch.gic_version == XEN_DOMCTL_CONFIG_GIC_NATIVE )
641 {
642 switch ( gic_hw_version() )
643 {
644 case GIC_V2:
645 config->arch.gic_version = XEN_DOMCTL_CONFIG_GIC_V2;
646 break;
647
648 case GIC_V3:
649 config->arch.gic_version = XEN_DOMCTL_CONFIG_GIC_V3;
650 break;
651
652 default:
653 ASSERT_UNREACHABLE();
654 return -EINVAL;
655 }
656 }
657
658 /* max_vcpus depends on the GIC version, and Xen's compiled limit. */
659 max_vcpus = min(vgic_max_vcpus(config->arch.gic_version), MAX_VIRT_CPUS);
660
661 if ( max_vcpus == 0 )
662 {
663 dprintk(XENLOG_INFO, "Unsupported GIC version\n");
664 return -EINVAL;
665 }
666
667 if ( config->max_vcpus > max_vcpus )
668 {
669 dprintk(XENLOG_INFO, "Requested vCPUs (%u) exceeds max (%u)\n",
670 config->max_vcpus, max_vcpus);
671 return -EINVAL;
672 }
673
674 if ( config->arch.tee_type != XEN_DOMCTL_CONFIG_TEE_NONE &&
675 config->arch.tee_type != tee_get_type() )
676 {
677 dprintk(XENLOG_INFO, "Unsupported TEE type\n");
678 return -EINVAL;
679 }
680
681 return 0;
682 }
683
arch_domain_create(struct domain * d,struct xen_domctl_createdomain * config)684 int arch_domain_create(struct domain *d,
685 struct xen_domctl_createdomain *config)
686 {
687 int rc, count = 0;
688
689 BUILD_BUG_ON(GUEST_MAX_VCPUS < MAX_VIRT_CPUS);
690
691 /* Idle domains do not need this setup */
692 if ( is_idle_domain(d) )
693 return 0;
694
695 ASSERT(config != NULL);
696
697 /* p2m_init relies on some value initialized by the IOMMU subsystem */
698 if ( (rc = iommu_domain_init(d, config->iommu_opts)) != 0 )
699 goto fail;
700
701 if ( (rc = p2m_init(d)) != 0 )
702 goto fail;
703
704 rc = -ENOMEM;
705 if ( (d->shared_info = alloc_xenheap_pages(0, 0)) == NULL )
706 goto fail;
707
708 clear_page(d->shared_info);
709 share_xen_page_with_guest(virt_to_page(d->shared_info), d, SHARE_rw);
710
711 switch ( config->arch.gic_version )
712 {
713 case XEN_DOMCTL_CONFIG_GIC_V2:
714 d->arch.vgic.version = GIC_V2;
715 break;
716
717 case XEN_DOMCTL_CONFIG_GIC_V3:
718 d->arch.vgic.version = GIC_V3;
719 break;
720
721 default:
722 BUG();
723 }
724
725 if ( (rc = domain_vgic_register(d, &count)) != 0 )
726 goto fail;
727
728 if ( (rc = domain_io_init(d, count + MAX_IO_HANDLER)) != 0 )
729 goto fail;
730
731 if ( (rc = domain_vgic_init(d, config->arch.nr_spis)) != 0 )
732 goto fail;
733
734 if ( (rc = domain_vtimer_init(d, &config->arch)) != 0 )
735 goto fail;
736
737 if ( (rc = tee_domain_init(d, config->arch.tee_type)) != 0 )
738 goto fail;
739
740 update_domain_wallclock_time(d);
741
742 /*
743 * The hardware domain will get a PPI later in
744 * arch/arm/domain_build.c depending on the
745 * interrupt map of the hardware.
746 */
747 if ( !is_hardware_domain(d) )
748 {
749 d->arch.evtchn_irq = GUEST_EVTCHN_PPI;
750 /* At this stage vgic_reserve_virq should never fail */
751 if ( !vgic_reserve_virq(d, GUEST_EVTCHN_PPI) )
752 BUG();
753 }
754
755 /*
756 * Virtual UART is only used by linux early printk and decompress code.
757 * Only use it for the hardware domain because the linux kernel may not
758 * support multi-platform.
759 */
760 if ( is_hardware_domain(d) && (rc = domain_vuart_init(d)) )
761 goto fail;
762
763 return 0;
764
765 fail:
766 d->is_dying = DOMDYING_dead;
767 arch_domain_destroy(d);
768
769 return rc;
770 }
771
arch_domain_destroy(struct domain * d)772 void arch_domain_destroy(struct domain *d)
773 {
774 /* IOMMU page table is shared with P2M, always call
775 * iommu_domain_destroy() before p2m_teardown().
776 */
777 iommu_domain_destroy(d);
778 p2m_teardown(d);
779 domain_vgic_free(d);
780 domain_vuart_free(d);
781 free_xenheap_page(d->shared_info);
782 #ifdef CONFIG_ACPI
783 free_xenheap_pages(d->arch.efi_acpi_table,
784 get_order_from_bytes(d->arch.efi_acpi_len));
785 #endif
786 domain_io_free(d);
787 }
788
arch_domain_shutdown(struct domain * d)789 void arch_domain_shutdown(struct domain *d)
790 {
791 }
792
arch_domain_pause(struct domain * d)793 void arch_domain_pause(struct domain *d)
794 {
795 }
796
arch_domain_unpause(struct domain * d)797 void arch_domain_unpause(struct domain *d)
798 {
799 }
800
arch_domain_soft_reset(struct domain * d)801 int arch_domain_soft_reset(struct domain *d)
802 {
803 return -ENOSYS;
804 }
805
arch_domain_creation_finished(struct domain * d)806 void arch_domain_creation_finished(struct domain *d)
807 {
808 /*
809 * To avoid flushing the whole guest RAM on the first Set/Way, we
810 * invalidate the P2M to track what has been accessed.
811 *
812 * This is only turned when IOMMU is not used or the page-table are
813 * not shared because bit[0] (e.g valid bit) unset will result
814 * IOMMU fault that could be not fixed-up.
815 */
816 if ( !iommu_use_hap_pt(d) )
817 p2m_invalidate_root(p2m_get_hostp2m(d));
818 }
819
is_guest_pv32_psr(uint32_t psr)820 static int is_guest_pv32_psr(uint32_t psr)
821 {
822 switch (psr & PSR_MODE_MASK)
823 {
824 case PSR_MODE_USR:
825 case PSR_MODE_FIQ:
826 case PSR_MODE_IRQ:
827 case PSR_MODE_SVC:
828 case PSR_MODE_ABT:
829 case PSR_MODE_UND:
830 case PSR_MODE_SYS:
831 return 1;
832 case PSR_MODE_MON:
833 case PSR_MODE_HYP:
834 default:
835 return 0;
836 }
837 }
838
839
840 #ifdef CONFIG_ARM_64
is_guest_pv64_psr(uint32_t psr)841 static int is_guest_pv64_psr(uint32_t psr)
842 {
843 if ( psr & PSR_MODE_BIT )
844 return 0;
845
846 switch (psr & PSR_MODE_MASK)
847 {
848 case PSR_MODE_EL1h:
849 case PSR_MODE_EL1t:
850 case PSR_MODE_EL0t:
851 return 1;
852 case PSR_MODE_EL3h:
853 case PSR_MODE_EL3t:
854 case PSR_MODE_EL2h:
855 case PSR_MODE_EL2t:
856 default:
857 return 0;
858 }
859 }
860 #endif
861
862 /*
863 * Initialise VCPU state. The context can be supplied by either the
864 * toolstack (XEN_DOMCTL_setvcpucontext) or the guest
865 * (VCPUOP_initialise) and therefore must be properly validated.
866 */
arch_set_info_guest(struct vcpu * v,vcpu_guest_context_u c)867 int arch_set_info_guest(
868 struct vcpu *v, vcpu_guest_context_u c)
869 {
870 struct vcpu_guest_context *ctxt = c.nat;
871 struct vcpu_guest_core_regs *regs = &c.nat->user_regs;
872
873 if ( is_32bit_domain(v->domain) )
874 {
875 if ( !is_guest_pv32_psr(regs->cpsr) )
876 return -EINVAL;
877
878 if ( regs->spsr_svc && !is_guest_pv32_psr(regs->spsr_svc) )
879 return -EINVAL;
880 if ( regs->spsr_abt && !is_guest_pv32_psr(regs->spsr_abt) )
881 return -EINVAL;
882 if ( regs->spsr_und && !is_guest_pv32_psr(regs->spsr_und) )
883 return -EINVAL;
884 if ( regs->spsr_irq && !is_guest_pv32_psr(regs->spsr_irq) )
885 return -EINVAL;
886 if ( regs->spsr_fiq && !is_guest_pv32_psr(regs->spsr_fiq) )
887 return -EINVAL;
888 }
889 #ifdef CONFIG_ARM_64
890 else
891 {
892 if ( !is_guest_pv64_psr(regs->cpsr) )
893 return -EINVAL;
894
895 if ( regs->spsr_el1 && !is_guest_pv64_psr(regs->spsr_el1) )
896 return -EINVAL;
897 }
898 #endif
899
900 vcpu_regs_user_to_hyp(v, regs);
901
902 v->arch.sctlr = ctxt->sctlr;
903 v->arch.ttbr0 = ctxt->ttbr0;
904 v->arch.ttbr1 = ctxt->ttbr1;
905 v->arch.ttbcr = ctxt->ttbcr;
906
907 v->is_initialised = 1;
908
909 if ( ctxt->flags & VGCF_online )
910 clear_bit(_VPF_down, &v->pause_flags);
911 else
912 set_bit(_VPF_down, &v->pause_flags);
913
914 return 0;
915 }
916
arch_initialise_vcpu(struct vcpu * v,XEN_GUEST_HANDLE_PARAM (void)arg)917 int arch_initialise_vcpu(struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
918 {
919 return default_initialise_vcpu(v, arg);
920 }
921
arch_vcpu_reset(struct vcpu * v)922 int arch_vcpu_reset(struct vcpu *v)
923 {
924 vcpu_end_shutdown_deferral(v);
925 return 0;
926 }
927
relinquish_memory(struct domain * d,struct page_list_head * list)928 static int relinquish_memory(struct domain *d, struct page_list_head *list)
929 {
930 struct page_info *page, *tmp;
931 int ret = 0;
932
933 /* Use a recursive lock, as we may enter 'free_domheap_page'. */
934 spin_lock_recursive(&d->page_alloc_lock);
935
936 page_list_for_each_safe( page, tmp, list )
937 {
938 /* Grab a reference to the page so it won't disappear from under us. */
939 if ( unlikely(!get_page(page, d)) )
940 /*
941 * Couldn't get a reference -- someone is freeing this page and
942 * has already committed to doing so, so no more to do here.
943 *
944 * Note that the page must be left on the list, a list_del
945 * here will clash with the list_del done by the other
946 * party in the race and corrupt the list head.
947 */
948 continue;
949
950 put_page_alloc_ref(page);
951 put_page(page);
952
953 if ( hypercall_preempt_check() )
954 {
955 ret = -ERESTART;
956 goto out;
957 }
958 }
959
960 out:
961 spin_unlock_recursive(&d->page_alloc_lock);
962 return ret;
963 }
964
965 /*
966 * Record the current progress. Subsequent hypercall continuations will
967 * logically restart work from this point.
968 *
969 * PROGRESS() markers must not be in the middle of loops. The loop
970 * variable isn't preserved accross a continuation.
971 *
972 * To avoid redundant work, there should be a marker before each
973 * function which may return -ERESTART.
974 */
975 enum {
976 PROG_tee = 1,
977 PROG_xen,
978 PROG_page,
979 PROG_mapping,
980 PROG_done,
981 };
982
983 #define PROGRESS(x) \
984 d->arch.rel_priv = PROG_ ## x; \
985 /* Fallthrough */ \
986 case PROG_ ## x
987
domain_relinquish_resources(struct domain * d)988 int domain_relinquish_resources(struct domain *d)
989 {
990 int ret = 0;
991
992 /*
993 * This hypercall can take minutes of wallclock time to complete. This
994 * logic implements a co-routine, stashing state in struct domain across
995 * hypercall continuation boundaries.
996 */
997 switch ( d->arch.rel_priv )
998 {
999 case 0:
1000 ret = iommu_release_dt_devices(d);
1001 if ( ret )
1002 return ret;
1003
1004 /*
1005 * Release the resources allocated for vpl011 which were
1006 * allocated via a DOMCTL call XEN_DOMCTL_vuart_op.
1007 */
1008 domain_vpl011_deinit(d);
1009
1010 PROGRESS(tee):
1011 ret = tee_relinquish_resources(d);
1012 if (ret )
1013 return ret;
1014
1015 PROGRESS(xen):
1016 ret = relinquish_memory(d, &d->xenpage_list);
1017 if ( ret )
1018 return ret;
1019
1020 PROGRESS(page):
1021 ret = relinquish_memory(d, &d->page_list);
1022 if ( ret )
1023 return ret;
1024
1025 PROGRESS(mapping):
1026 ret = relinquish_p2m_mapping(d);
1027 if ( ret )
1028 return ret;
1029
1030 PROGRESS(done):
1031 break;
1032
1033 default:
1034 BUG();
1035 }
1036
1037 return 0;
1038 }
1039
1040 #undef PROGRESS
1041
arch_dump_domain_info(struct domain * d)1042 void arch_dump_domain_info(struct domain *d)
1043 {
1044 p2m_dump_info(d);
1045 }
1046
1047
do_arm_vcpu_op(int cmd,unsigned int vcpuid,XEN_GUEST_HANDLE_PARAM (void)arg)1048 long do_arm_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
1049 {
1050 switch ( cmd )
1051 {
1052 case VCPUOP_register_vcpu_info:
1053 case VCPUOP_register_runstate_memory_area:
1054 return do_vcpu_op(cmd, vcpuid, arg);
1055 default:
1056 return -EINVAL;
1057 }
1058 }
1059
arch_do_vcpu_op(int cmd,struct vcpu * v,XEN_GUEST_HANDLE_PARAM (void)arg)1060 long arch_do_vcpu_op(int cmd, struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
1061 {
1062 return -ENOSYS;
1063 }
1064
arch_dump_vcpu_info(struct vcpu * v)1065 void arch_dump_vcpu_info(struct vcpu *v)
1066 {
1067 gic_dump_info(v);
1068 gic_dump_vgic_info(v);
1069 }
1070
vcpu_mark_events_pending(struct vcpu * v)1071 void vcpu_mark_events_pending(struct vcpu *v)
1072 {
1073 bool already_pending = guest_test_and_set_bit(v->domain,
1074 0, (unsigned long *)&vcpu_info(v, evtchn_upcall_pending));
1075
1076 if ( already_pending )
1077 return;
1078
1079 vgic_inject_irq(v->domain, v, v->domain->arch.evtchn_irq, true);
1080 }
1081
vcpu_update_evtchn_irq(struct vcpu * v)1082 void vcpu_update_evtchn_irq(struct vcpu *v)
1083 {
1084 bool pending = vcpu_info(v, evtchn_upcall_pending);
1085
1086 vgic_inject_irq(v->domain, v, v->domain->arch.evtchn_irq, pending);
1087 }
1088
1089 /* The ARM spec declares that even if local irqs are masked in
1090 * the CPSR register, an irq should wake up a cpu from WFI anyway.
1091 * For this reason we need to check for irqs that need delivery,
1092 * ignoring the CPSR register, *after* calling SCHEDOP_block to
1093 * avoid races with vgic_inject_irq.
1094 */
vcpu_block_unless_event_pending(struct vcpu * v)1095 void vcpu_block_unless_event_pending(struct vcpu *v)
1096 {
1097 vcpu_block();
1098 if ( local_events_need_delivery_nomask() )
1099 vcpu_unblock(current);
1100 }
1101
vcpu_kick(struct vcpu * vcpu)1102 void vcpu_kick(struct vcpu *vcpu)
1103 {
1104 bool running = vcpu->is_running;
1105
1106 vcpu_unblock(vcpu);
1107 if ( running && vcpu != current )
1108 {
1109 perfc_incr(vcpu_kick);
1110 smp_send_event_check_mask(cpumask_of(vcpu->processor));
1111 }
1112 }
1113
1114 /*
1115 * Local variables:
1116 * mode: C
1117 * c-file-style: "BSD"
1118 * c-basic-offset: 4
1119 * indent-tabs-mode: nil
1120 * End:
1121 */
1122