1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 2 of the License, or
5  * (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  */
12 #include <xen/bitops.h>
13 #include <xen/errno.h>
14 #include <xen/grant_table.h>
15 #include <xen/hypercall.h>
16 #include <xen/init.h>
17 #include <xen/lib.h>
18 #include <xen/livepatch.h>
19 #include <xen/sched.h>
20 #include <xen/softirq.h>
21 #include <xen/wait.h>
22 
23 #include <asm/alternative.h>
24 #include <asm/cpuerrata.h>
25 #include <asm/cpufeature.h>
26 #include <asm/current.h>
27 #include <asm/event.h>
28 #include <asm/gic.h>
29 #include <asm/guest_access.h>
30 #include <asm/guest_atomics.h>
31 #include <asm/irq.h>
32 #include <asm/p2m.h>
33 #include <asm/platform.h>
34 #include <asm/procinfo.h>
35 #include <asm/regs.h>
36 #include <asm/tee/tee.h>
37 #include <asm/vfp.h>
38 #include <asm/vgic.h>
39 #include <asm/vtimer.h>
40 
41 #include "vuart.h"
42 
43 DEFINE_PER_CPU(struct vcpu *, curr_vcpu);
44 
do_idle(void)45 static void do_idle(void)
46 {
47     unsigned int cpu = smp_processor_id();
48 
49     rcu_idle_enter(cpu);
50     /* rcu_idle_enter() can raise TIMER_SOFTIRQ. Process it now. */
51     process_pending_softirqs();
52 
53     local_irq_disable();
54     if ( cpu_is_haltable(cpu) )
55     {
56         dsb(sy);
57         wfi();
58     }
59     local_irq_enable();
60 
61     rcu_idle_exit(cpu);
62 }
63 
idle_loop(void)64 void idle_loop(void)
65 {
66     unsigned int cpu = smp_processor_id();
67 
68     for ( ; ; )
69     {
70         if ( cpu_is_offline(cpu) )
71             stop_cpu();
72 
73         /* Are we here for running vcpu context tasklets, or for idling? */
74         if ( unlikely(tasklet_work_to_do(cpu)) )
75         {
76             do_tasklet();
77             /* Livepatch work is always kicked off via a tasklet. */
78             check_for_livepatch_work();
79         }
80         /*
81          * Test softirqs twice --- first to see if should even try scrubbing
82          * and then, after it is done, whether softirqs became pending
83          * while we were scrubbing.
84          */
85         else if ( !softirq_pending(cpu) && !scrub_free_pages() &&
86                   !softirq_pending(cpu) )
87             do_idle();
88 
89         do_softirq();
90     }
91 }
92 
ctxt_switch_from(struct vcpu * p)93 static void ctxt_switch_from(struct vcpu *p)
94 {
95     /* When the idle VCPU is running, Xen will always stay in hypervisor
96      * mode. Therefore we don't need to save the context of an idle VCPU.
97      */
98     if ( is_idle_vcpu(p) )
99         return;
100 
101     p2m_save_state(p);
102 
103     /* CP 15 */
104     p->arch.csselr = READ_SYSREG(CSSELR_EL1);
105 
106     /* Control Registers */
107     p->arch.cpacr = READ_SYSREG(CPACR_EL1);
108 
109     p->arch.contextidr = READ_SYSREG(CONTEXTIDR_EL1);
110     p->arch.tpidr_el0 = READ_SYSREG(TPIDR_EL0);
111     p->arch.tpidrro_el0 = READ_SYSREG(TPIDRRO_EL0);
112     p->arch.tpidr_el1 = READ_SYSREG(TPIDR_EL1);
113 
114     /* Arch timer */
115     p->arch.cntkctl = READ_SYSREG32(CNTKCTL_EL1);
116     virt_timer_save(p);
117 
118     if ( is_32bit_domain(p->domain) && cpu_has_thumbee )
119     {
120         p->arch.teecr = READ_SYSREG32(TEECR32_EL1);
121         p->arch.teehbr = READ_SYSREG32(TEEHBR32_EL1);
122     }
123 
124 #ifdef CONFIG_ARM_32
125     p->arch.joscr = READ_CP32(JOSCR);
126     p->arch.jmcr = READ_CP32(JMCR);
127 #endif
128 
129     isb();
130 
131     /* MMU */
132     p->arch.vbar = READ_SYSREG(VBAR_EL1);
133     p->arch.ttbcr = READ_SYSREG(TCR_EL1);
134     p->arch.ttbr0 = READ_SYSREG64(TTBR0_EL1);
135     p->arch.ttbr1 = READ_SYSREG64(TTBR1_EL1);
136     if ( is_32bit_domain(p->domain) )
137         p->arch.dacr = READ_SYSREG(DACR32_EL2);
138     p->arch.par = READ_SYSREG64(PAR_EL1);
139 #if defined(CONFIG_ARM_32)
140     p->arch.mair0 = READ_CP32(MAIR0);
141     p->arch.mair1 = READ_CP32(MAIR1);
142     p->arch.amair0 = READ_CP32(AMAIR0);
143     p->arch.amair1 = READ_CP32(AMAIR1);
144 #else
145     p->arch.mair = READ_SYSREG64(MAIR_EL1);
146     p->arch.amair = READ_SYSREG64(AMAIR_EL1);
147 #endif
148 
149     /* Fault Status */
150 #if defined(CONFIG_ARM_32)
151     p->arch.dfar = READ_CP32(DFAR);
152     p->arch.ifar = READ_CP32(IFAR);
153     p->arch.dfsr = READ_CP32(DFSR);
154 #elif defined(CONFIG_ARM_64)
155     p->arch.far = READ_SYSREG64(FAR_EL1);
156     p->arch.esr = READ_SYSREG64(ESR_EL1);
157 #endif
158 
159     if ( is_32bit_domain(p->domain) )
160         p->arch.ifsr  = READ_SYSREG(IFSR32_EL2);
161     p->arch.afsr0 = READ_SYSREG(AFSR0_EL1);
162     p->arch.afsr1 = READ_SYSREG(AFSR1_EL1);
163 
164     /* XXX MPU */
165 
166     /* VFP */
167     vfp_save_state(p);
168 
169     /* VGIC */
170     gic_save_state(p);
171 
172     isb();
173 }
174 
ctxt_switch_to(struct vcpu * n)175 static void ctxt_switch_to(struct vcpu *n)
176 {
177     uint32_t vpidr;
178 
179     /* When the idle VCPU is running, Xen will always stay in hypervisor
180      * mode. Therefore we don't need to restore the context of an idle VCPU.
181      */
182     if ( is_idle_vcpu(n) )
183         return;
184 
185     vpidr = READ_SYSREG32(MIDR_EL1);
186     WRITE_SYSREG32(vpidr, VPIDR_EL2);
187     WRITE_SYSREG(n->arch.vmpidr, VMPIDR_EL2);
188 
189     /* VGIC */
190     gic_restore_state(n);
191 
192     /* VFP */
193     vfp_restore_state(n);
194 
195     /* XXX MPU */
196 
197     /* Fault Status */
198 #if defined(CONFIG_ARM_32)
199     WRITE_CP32(n->arch.dfar, DFAR);
200     WRITE_CP32(n->arch.ifar, IFAR);
201     WRITE_CP32(n->arch.dfsr, DFSR);
202 #elif defined(CONFIG_ARM_64)
203     WRITE_SYSREG64(n->arch.far, FAR_EL1);
204     WRITE_SYSREG64(n->arch.esr, ESR_EL1);
205 #endif
206 
207     if ( is_32bit_domain(n->domain) )
208         WRITE_SYSREG(n->arch.ifsr, IFSR32_EL2);
209     WRITE_SYSREG(n->arch.afsr0, AFSR0_EL1);
210     WRITE_SYSREG(n->arch.afsr1, AFSR1_EL1);
211 
212     /* MMU */
213     WRITE_SYSREG(n->arch.vbar, VBAR_EL1);
214     WRITE_SYSREG(n->arch.ttbcr, TCR_EL1);
215     WRITE_SYSREG64(n->arch.ttbr0, TTBR0_EL1);
216     WRITE_SYSREG64(n->arch.ttbr1, TTBR1_EL1);
217 
218     /*
219      * Erratum #852523: DACR32_EL2 must be restored before one of the
220      * following sysregs: SCTLR_EL1, TCR_EL1, TTBR0_EL1, TTBR1_EL1 or
221      * CONTEXTIDR_EL1.
222      */
223     if ( is_32bit_domain(n->domain) )
224         WRITE_SYSREG(n->arch.dacr, DACR32_EL2);
225     WRITE_SYSREG64(n->arch.par, PAR_EL1);
226 #if defined(CONFIG_ARM_32)
227     WRITE_CP32(n->arch.mair0, MAIR0);
228     WRITE_CP32(n->arch.mair1, MAIR1);
229     WRITE_CP32(n->arch.amair0, AMAIR0);
230     WRITE_CP32(n->arch.amair1, AMAIR1);
231 #elif defined(CONFIG_ARM_64)
232     WRITE_SYSREG64(n->arch.mair, MAIR_EL1);
233     WRITE_SYSREG64(n->arch.amair, AMAIR_EL1);
234 #endif
235     isb();
236 
237     /*
238      * ARM64_WORKAROUND_AT_SPECULATE: The P2M should be restored after
239      * the stage-1 MMU sysregs have been restored.
240      */
241     p2m_restore_state(n);
242 
243     /* Control Registers */
244     WRITE_SYSREG(n->arch.cpacr, CPACR_EL1);
245 
246     /*
247      * This write to sysreg CONTEXTIDR_EL1 ensures we don't hit erratum
248      * #852523. I.e DACR32_EL2 is not correctly synchronized.
249      */
250     WRITE_SYSREG(n->arch.contextidr, CONTEXTIDR_EL1);
251     WRITE_SYSREG(n->arch.tpidr_el0, TPIDR_EL0);
252     WRITE_SYSREG(n->arch.tpidrro_el0, TPIDRRO_EL0);
253     WRITE_SYSREG(n->arch.tpidr_el1, TPIDR_EL1);
254 
255     if ( is_32bit_domain(n->domain) && cpu_has_thumbee )
256     {
257         WRITE_SYSREG32(n->arch.teecr, TEECR32_EL1);
258         WRITE_SYSREG32(n->arch.teehbr, TEEHBR32_EL1);
259     }
260 
261 #ifdef CONFIG_ARM_32
262     WRITE_CP32(n->arch.joscr, JOSCR);
263     WRITE_CP32(n->arch.jmcr, JMCR);
264 #endif
265     isb();
266 
267     /* CP 15 */
268     WRITE_SYSREG(n->arch.csselr, CSSELR_EL1);
269 
270     isb();
271 
272     /* This is could trigger an hardware interrupt from the virtual
273      * timer. The interrupt needs to be injected into the guest. */
274     WRITE_SYSREG32(n->arch.cntkctl, CNTKCTL_EL1);
275     virt_timer_restore(n);
276 }
277 
278 /* Update per-VCPU guest runstate shared memory area (if registered). */
update_runstate_area(struct vcpu * v)279 static void update_runstate_area(struct vcpu *v)
280 {
281     void __user *guest_handle = NULL;
282     struct vcpu_runstate_info runstate;
283 
284     if ( guest_handle_is_null(runstate_guest(v)) )
285         return;
286 
287     memcpy(&runstate, &v->runstate, sizeof(runstate));
288 
289     if ( VM_ASSIST(v->domain, runstate_update_flag) )
290     {
291         guest_handle = &v->runstate_guest.p->state_entry_time + 1;
292         guest_handle--;
293         runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
294         __raw_copy_to_guest(guest_handle,
295                             (void *)(&runstate.state_entry_time + 1) - 1, 1);
296         smp_wmb();
297     }
298 
299     __copy_to_guest(runstate_guest(v), &runstate, 1);
300 
301     if ( guest_handle )
302     {
303         runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
304         smp_wmb();
305         __raw_copy_to_guest(guest_handle,
306                             (void *)(&runstate.state_entry_time + 1) - 1, 1);
307     }
308 }
309 
schedule_tail(struct vcpu * prev)310 static void schedule_tail(struct vcpu *prev)
311 {
312     ASSERT(prev != current);
313 
314     ctxt_switch_from(prev);
315 
316     ctxt_switch_to(current);
317 
318     local_irq_enable();
319 
320     sched_context_switched(prev, current);
321 
322     update_runstate_area(current);
323 
324     /* Ensure that the vcpu has an up-to-date time base. */
325     update_vcpu_system_time(current);
326 }
327 
continue_new_vcpu(struct vcpu * prev)328 static void continue_new_vcpu(struct vcpu *prev)
329 {
330     current->arch.actlr = READ_SYSREG32(ACTLR_EL1);
331     processor_vcpu_initialise(current);
332 
333     schedule_tail(prev);
334 
335     if ( is_idle_vcpu(current) )
336         reset_stack_and_jump(idle_loop);
337     else if ( is_32bit_domain(current->domain) )
338         /* check_wakeup_from_wait(); */
339         reset_stack_and_jump(return_to_new_vcpu32);
340     else
341         /* check_wakeup_from_wait(); */
342         reset_stack_and_jump(return_to_new_vcpu64);
343 }
344 
context_switch(struct vcpu * prev,struct vcpu * next)345 void context_switch(struct vcpu *prev, struct vcpu *next)
346 {
347     ASSERT(local_irq_is_enabled());
348     ASSERT(prev != next);
349     ASSERT(!vcpu_cpu_dirty(next));
350 
351     update_runstate_area(prev);
352 
353     local_irq_disable();
354 
355     set_current(next);
356 
357     prev = __context_switch(prev, next);
358 
359     schedule_tail(prev);
360 }
361 
continue_running(struct vcpu * same)362 void continue_running(struct vcpu *same)
363 {
364     /* Nothing to do */
365 }
366 
sync_local_execstate(void)367 void sync_local_execstate(void)
368 {
369     /* Nothing to do -- no lazy switching */
370 }
371 
sync_vcpu_execstate(struct vcpu * v)372 void sync_vcpu_execstate(struct vcpu *v)
373 {
374     /*
375      * We don't support lazy switching.
376      *
377      * However the context may have been saved from a remote pCPU so we
378      * need a barrier to ensure it is observed before continuing.
379      *
380      * Per vcpu_context_saved(), the context can be observed when
381      * v->is_running is false (the caller should check it before calling
382      * this function).
383      *
384      * Note this is a full barrier to also prevent update of the context
385      * to happen before it was observed.
386      */
387     smp_mb();
388 }
389 
390 #define NEXT_ARG(fmt, args)                                                 \
391 ({                                                                          \
392     unsigned long __arg;                                                    \
393     switch ( *(fmt)++ )                                                     \
394     {                                                                       \
395     case 'i': __arg = (unsigned long)va_arg(args, unsigned int);  break;    \
396     case 'l': __arg = (unsigned long)va_arg(args, unsigned long); break;    \
397     case 'h': __arg = (unsigned long)va_arg(args, void *);        break;    \
398     default:  goto bad_fmt;                                                 \
399     }                                                                       \
400     __arg;                                                                  \
401 })
402 
hypercall_create_continuation(unsigned int op,const char * format,...)403 unsigned long hypercall_create_continuation(
404     unsigned int op, const char *format, ...)
405 {
406     struct mc_state *mcs = &current->mc_state;
407     struct cpu_user_regs *regs;
408     const char *p = format;
409     unsigned long arg, rc;
410     unsigned int i;
411     va_list args;
412 
413     current->hcall_preempted = true;
414 
415     va_start(args, format);
416 
417     if ( mcs->flags & MCSF_in_multicall )
418     {
419         for ( i = 0; *p != '\0'; i++ )
420             mcs->call.args[i] = NEXT_ARG(p, args);
421 
422         /* Return value gets written back to mcs->call.result */
423         rc = mcs->call.result;
424     }
425     else
426     {
427         regs = guest_cpu_user_regs();
428 
429 #ifdef CONFIG_ARM_64
430         if ( !is_32bit_domain(current->domain) )
431         {
432             regs->x16 = op;
433 
434             for ( i = 0; *p != '\0'; i++ )
435             {
436                 arg = NEXT_ARG(p, args);
437 
438                 switch ( i )
439                 {
440                 case 0: regs->x0 = arg; break;
441                 case 1: regs->x1 = arg; break;
442                 case 2: regs->x2 = arg; break;
443                 case 3: regs->x3 = arg; break;
444                 case 4: regs->x4 = arg; break;
445                 case 5: regs->x5 = arg; break;
446                 }
447             }
448 
449             /* Return value gets written back to x0 */
450             rc = regs->x0;
451         }
452         else
453 #endif
454         {
455             regs->r12 = op;
456 
457             for ( i = 0; *p != '\0'; i++ )
458             {
459                 arg = NEXT_ARG(p, args);
460 
461                 switch ( i )
462                 {
463                 case 0: regs->r0 = arg; break;
464                 case 1: regs->r1 = arg; break;
465                 case 2: regs->r2 = arg; break;
466                 case 3: regs->r3 = arg; break;
467                 case 4: regs->r4 = arg; break;
468                 case 5: regs->r5 = arg; break;
469                 }
470             }
471 
472             /* Return value gets written back to r0 */
473             rc = regs->r0;
474         }
475     }
476 
477     va_end(args);
478 
479     return rc;
480 
481  bad_fmt:
482     va_end(args);
483     gprintk(XENLOG_ERR, "Bad hypercall continuation format '%c'\n", *p);
484     ASSERT_UNREACHABLE();
485     domain_crash(current->domain);
486     return 0;
487 }
488 
489 #undef NEXT_ARG
490 
startup_cpu_idle_loop(void)491 void startup_cpu_idle_loop(void)
492 {
493     struct vcpu *v = current;
494 
495     ASSERT(is_idle_vcpu(v));
496     /* TODO
497        cpumask_set_cpu(v->processor, v->domain->dirty_cpumask);
498        v->dirty_cpu = v->processor;
499     */
500 
501     reset_stack_and_jump(idle_loop);
502 }
503 
alloc_domain_struct(void)504 struct domain *alloc_domain_struct(void)
505 {
506     struct domain *d;
507     BUILD_BUG_ON(sizeof(*d) > PAGE_SIZE);
508     d = alloc_xenheap_pages(0, 0);
509     if ( d == NULL )
510         return NULL;
511 
512     clear_page(d);
513     return d;
514 }
515 
free_domain_struct(struct domain * d)516 void free_domain_struct(struct domain *d)
517 {
518     free_xenheap_page(d);
519 }
520 
dump_pageframe_info(struct domain * d)521 void dump_pageframe_info(struct domain *d)
522 {
523 
524 }
525 
526 /*
527  * The new VGIC has a bigger per-IRQ structure, so we need more than one
528  * page on ARM64. Cowardly increase the limit in this case.
529  */
530 #if defined(CONFIG_NEW_VGIC) && defined(CONFIG_ARM_64)
531 #define MAX_PAGES_PER_VCPU  2
532 #else
533 #define MAX_PAGES_PER_VCPU  1
534 #endif
535 
alloc_vcpu_struct(const struct domain * d)536 struct vcpu *alloc_vcpu_struct(const struct domain *d)
537 {
538     struct vcpu *v;
539 
540     BUILD_BUG_ON(sizeof(*v) > MAX_PAGES_PER_VCPU * PAGE_SIZE);
541     v = alloc_xenheap_pages(get_order_from_bytes(sizeof(*v)), 0);
542     if ( v != NULL )
543     {
544         unsigned int i;
545 
546         for ( i = 0; i < DIV_ROUND_UP(sizeof(*v), PAGE_SIZE); i++ )
547             clear_page((void *)v + i * PAGE_SIZE);
548     }
549 
550     return v;
551 }
552 
free_vcpu_struct(struct vcpu * v)553 void free_vcpu_struct(struct vcpu *v)
554 {
555     free_xenheap_pages(v, get_order_from_bytes(sizeof(*v)));
556 }
557 
arch_vcpu_create(struct vcpu * v)558 int arch_vcpu_create(struct vcpu *v)
559 {
560     int rc = 0;
561 
562     BUILD_BUG_ON( sizeof(struct cpu_info) > STACK_SIZE );
563 
564     v->arch.stack = alloc_xenheap_pages(STACK_ORDER, MEMF_node(vcpu_to_node(v)));
565     if ( v->arch.stack == NULL )
566         return -ENOMEM;
567 
568     v->arch.cpu_info = (struct cpu_info *)(v->arch.stack
569                                            + STACK_SIZE
570                                            - sizeof(struct cpu_info));
571     memset(v->arch.cpu_info, 0, sizeof(*v->arch.cpu_info));
572 
573     v->arch.saved_context.sp = (register_t)v->arch.cpu_info;
574     v->arch.saved_context.pc = (register_t)continue_new_vcpu;
575 
576     /* Idle VCPUs don't need the rest of this setup */
577     if ( is_idle_vcpu(v) )
578         return rc;
579 
580     v->arch.sctlr = SCTLR_GUEST_INIT;
581 
582     v->arch.vmpidr = MPIDR_SMP | vcpuid_to_vaffinity(v->vcpu_id);
583 
584     v->arch.hcr_el2 = get_default_hcr_flags();
585 
586     if ( (rc = vcpu_vgic_init(v)) != 0 )
587         goto fail;
588 
589     if ( (rc = vcpu_vtimer_init(v)) != 0 )
590         goto fail;
591 
592     /*
593      * The workaround 2 (i.e SSBD mitigation) is enabled by default if
594      * supported.
595      */
596     if ( get_ssbd_state() == ARM_SSBD_RUNTIME )
597         v->arch.cpu_info->flags |= CPUINFO_WORKAROUND_2_FLAG;
598 
599     return rc;
600 
601 fail:
602     arch_vcpu_destroy(v);
603     return rc;
604 }
605 
arch_vcpu_destroy(struct vcpu * v)606 void arch_vcpu_destroy(struct vcpu *v)
607 {
608     vcpu_timer_destroy(v);
609     vcpu_vgic_free(v);
610     free_xenheap_pages(v->arch.stack, STACK_ORDER);
611 }
612 
vcpu_switch_to_aarch64_mode(struct vcpu * v)613 void vcpu_switch_to_aarch64_mode(struct vcpu *v)
614 {
615     v->arch.hcr_el2 |= HCR_RW;
616 }
617 
arch_sanitise_domain_config(struct xen_domctl_createdomain * config)618 int arch_sanitise_domain_config(struct xen_domctl_createdomain *config)
619 {
620     unsigned int max_vcpus;
621 
622     /* HVM and HAP must be set. IOMMU may or may not be */
623     if ( (config->flags & ~XEN_DOMCTL_CDF_iommu) !=
624          (XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap) )
625     {
626         dprintk(XENLOG_INFO, "Unsupported configuration %#x\n",
627                 config->flags);
628         return -EINVAL;
629     }
630 
631     /* The P2M table must always be shared between the CPU and the IOMMU */
632     if ( config->iommu_opts & XEN_DOMCTL_IOMMU_no_sharept )
633     {
634         dprintk(XENLOG_INFO,
635                 "Unsupported iommu option: XEN_DOMCTL_IOMMU_no_sharept\n");
636         return -EINVAL;
637     }
638 
639     /* Fill in the native GIC version, passed back to the toolstack. */
640     if ( config->arch.gic_version == XEN_DOMCTL_CONFIG_GIC_NATIVE )
641     {
642         switch ( gic_hw_version() )
643         {
644         case GIC_V2:
645             config->arch.gic_version = XEN_DOMCTL_CONFIG_GIC_V2;
646             break;
647 
648         case GIC_V3:
649             config->arch.gic_version = XEN_DOMCTL_CONFIG_GIC_V3;
650             break;
651 
652         default:
653             ASSERT_UNREACHABLE();
654             return -EINVAL;
655         }
656     }
657 
658     /* max_vcpus depends on the GIC version, and Xen's compiled limit. */
659     max_vcpus = min(vgic_max_vcpus(config->arch.gic_version), MAX_VIRT_CPUS);
660 
661     if ( max_vcpus == 0 )
662     {
663         dprintk(XENLOG_INFO, "Unsupported GIC version\n");
664         return -EINVAL;
665     }
666 
667     if ( config->max_vcpus > max_vcpus )
668     {
669         dprintk(XENLOG_INFO, "Requested vCPUs (%u) exceeds max (%u)\n",
670                 config->max_vcpus, max_vcpus);
671         return -EINVAL;
672     }
673 
674     if ( config->arch.tee_type != XEN_DOMCTL_CONFIG_TEE_NONE &&
675          config->arch.tee_type != tee_get_type() )
676     {
677         dprintk(XENLOG_INFO, "Unsupported TEE type\n");
678         return -EINVAL;
679     }
680 
681     return 0;
682 }
683 
arch_domain_create(struct domain * d,struct xen_domctl_createdomain * config)684 int arch_domain_create(struct domain *d,
685                        struct xen_domctl_createdomain *config)
686 {
687     int rc, count = 0;
688 
689     BUILD_BUG_ON(GUEST_MAX_VCPUS < MAX_VIRT_CPUS);
690 
691     /* Idle domains do not need this setup */
692     if ( is_idle_domain(d) )
693         return 0;
694 
695     ASSERT(config != NULL);
696 
697     /* p2m_init relies on some value initialized by the IOMMU subsystem */
698     if ( (rc = iommu_domain_init(d, config->iommu_opts)) != 0 )
699         goto fail;
700 
701     if ( (rc = p2m_init(d)) != 0 )
702         goto fail;
703 
704     rc = -ENOMEM;
705     if ( (d->shared_info = alloc_xenheap_pages(0, 0)) == NULL )
706         goto fail;
707 
708     clear_page(d->shared_info);
709     share_xen_page_with_guest(virt_to_page(d->shared_info), d, SHARE_rw);
710 
711     switch ( config->arch.gic_version )
712     {
713     case XEN_DOMCTL_CONFIG_GIC_V2:
714         d->arch.vgic.version = GIC_V2;
715         break;
716 
717     case XEN_DOMCTL_CONFIG_GIC_V3:
718         d->arch.vgic.version = GIC_V3;
719         break;
720 
721     default:
722         BUG();
723     }
724 
725     if ( (rc = domain_vgic_register(d, &count)) != 0 )
726         goto fail;
727 
728     if ( (rc = domain_io_init(d, count + MAX_IO_HANDLER)) != 0 )
729         goto fail;
730 
731     if ( (rc = domain_vgic_init(d, config->arch.nr_spis)) != 0 )
732         goto fail;
733 
734     if ( (rc = domain_vtimer_init(d, &config->arch)) != 0 )
735         goto fail;
736 
737     if ( (rc = tee_domain_init(d, config->arch.tee_type)) != 0 )
738         goto fail;
739 
740     update_domain_wallclock_time(d);
741 
742     /*
743      * The hardware domain will get a PPI later in
744      * arch/arm/domain_build.c  depending on the
745      * interrupt map of the hardware.
746      */
747     if ( !is_hardware_domain(d) )
748     {
749         d->arch.evtchn_irq = GUEST_EVTCHN_PPI;
750         /* At this stage vgic_reserve_virq should never fail */
751         if ( !vgic_reserve_virq(d, GUEST_EVTCHN_PPI) )
752             BUG();
753     }
754 
755     /*
756      * Virtual UART is only used by linux early printk and decompress code.
757      * Only use it for the hardware domain because the linux kernel may not
758      * support multi-platform.
759      */
760     if ( is_hardware_domain(d) && (rc = domain_vuart_init(d)) )
761         goto fail;
762 
763     return 0;
764 
765 fail:
766     d->is_dying = DOMDYING_dead;
767     arch_domain_destroy(d);
768 
769     return rc;
770 }
771 
arch_domain_destroy(struct domain * d)772 void arch_domain_destroy(struct domain *d)
773 {
774     /* IOMMU page table is shared with P2M, always call
775      * iommu_domain_destroy() before p2m_teardown().
776      */
777     iommu_domain_destroy(d);
778     p2m_teardown(d);
779     domain_vgic_free(d);
780     domain_vuart_free(d);
781     free_xenheap_page(d->shared_info);
782 #ifdef CONFIG_ACPI
783     free_xenheap_pages(d->arch.efi_acpi_table,
784                        get_order_from_bytes(d->arch.efi_acpi_len));
785 #endif
786     domain_io_free(d);
787 }
788 
arch_domain_shutdown(struct domain * d)789 void arch_domain_shutdown(struct domain *d)
790 {
791 }
792 
arch_domain_pause(struct domain * d)793 void arch_domain_pause(struct domain *d)
794 {
795 }
796 
arch_domain_unpause(struct domain * d)797 void arch_domain_unpause(struct domain *d)
798 {
799 }
800 
arch_domain_soft_reset(struct domain * d)801 int arch_domain_soft_reset(struct domain *d)
802 {
803     return -ENOSYS;
804 }
805 
arch_domain_creation_finished(struct domain * d)806 void arch_domain_creation_finished(struct domain *d)
807 {
808     /*
809      * To avoid flushing the whole guest RAM on the first Set/Way, we
810      * invalidate the P2M to track what has been accessed.
811      *
812      * This is only turned when IOMMU is not used or the page-table are
813      * not shared because bit[0] (e.g valid bit) unset will result
814      * IOMMU fault that could be not fixed-up.
815      */
816     if ( !iommu_use_hap_pt(d) )
817         p2m_invalidate_root(p2m_get_hostp2m(d));
818 }
819 
is_guest_pv32_psr(uint32_t psr)820 static int is_guest_pv32_psr(uint32_t psr)
821 {
822     switch (psr & PSR_MODE_MASK)
823     {
824     case PSR_MODE_USR:
825     case PSR_MODE_FIQ:
826     case PSR_MODE_IRQ:
827     case PSR_MODE_SVC:
828     case PSR_MODE_ABT:
829     case PSR_MODE_UND:
830     case PSR_MODE_SYS:
831         return 1;
832     case PSR_MODE_MON:
833     case PSR_MODE_HYP:
834     default:
835         return 0;
836     }
837 }
838 
839 
840 #ifdef CONFIG_ARM_64
is_guest_pv64_psr(uint32_t psr)841 static int is_guest_pv64_psr(uint32_t psr)
842 {
843     if ( psr & PSR_MODE_BIT )
844         return 0;
845 
846     switch (psr & PSR_MODE_MASK)
847     {
848     case PSR_MODE_EL1h:
849     case PSR_MODE_EL1t:
850     case PSR_MODE_EL0t:
851         return 1;
852     case PSR_MODE_EL3h:
853     case PSR_MODE_EL3t:
854     case PSR_MODE_EL2h:
855     case PSR_MODE_EL2t:
856     default:
857         return 0;
858     }
859 }
860 #endif
861 
862 /*
863  * Initialise VCPU state. The context can be supplied by either the
864  * toolstack (XEN_DOMCTL_setvcpucontext) or the guest
865  * (VCPUOP_initialise) and therefore must be properly validated.
866  */
arch_set_info_guest(struct vcpu * v,vcpu_guest_context_u c)867 int arch_set_info_guest(
868     struct vcpu *v, vcpu_guest_context_u c)
869 {
870     struct vcpu_guest_context *ctxt = c.nat;
871     struct vcpu_guest_core_regs *regs = &c.nat->user_regs;
872 
873     if ( is_32bit_domain(v->domain) )
874     {
875         if ( !is_guest_pv32_psr(regs->cpsr) )
876             return -EINVAL;
877 
878         if ( regs->spsr_svc && !is_guest_pv32_psr(regs->spsr_svc) )
879             return -EINVAL;
880         if ( regs->spsr_abt && !is_guest_pv32_psr(regs->spsr_abt) )
881             return -EINVAL;
882         if ( regs->spsr_und && !is_guest_pv32_psr(regs->spsr_und) )
883             return -EINVAL;
884         if ( regs->spsr_irq && !is_guest_pv32_psr(regs->spsr_irq) )
885             return -EINVAL;
886         if ( regs->spsr_fiq && !is_guest_pv32_psr(regs->spsr_fiq) )
887             return -EINVAL;
888     }
889 #ifdef CONFIG_ARM_64
890     else
891     {
892         if ( !is_guest_pv64_psr(regs->cpsr) )
893             return -EINVAL;
894 
895         if ( regs->spsr_el1 && !is_guest_pv64_psr(regs->spsr_el1) )
896             return -EINVAL;
897     }
898 #endif
899 
900     vcpu_regs_user_to_hyp(v, regs);
901 
902     v->arch.sctlr = ctxt->sctlr;
903     v->arch.ttbr0 = ctxt->ttbr0;
904     v->arch.ttbr1 = ctxt->ttbr1;
905     v->arch.ttbcr = ctxt->ttbcr;
906 
907     v->is_initialised = 1;
908 
909     if ( ctxt->flags & VGCF_online )
910         clear_bit(_VPF_down, &v->pause_flags);
911     else
912         set_bit(_VPF_down, &v->pause_flags);
913 
914     return 0;
915 }
916 
arch_initialise_vcpu(struct vcpu * v,XEN_GUEST_HANDLE_PARAM (void)arg)917 int arch_initialise_vcpu(struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
918 {
919     return default_initialise_vcpu(v, arg);
920 }
921 
arch_vcpu_reset(struct vcpu * v)922 int arch_vcpu_reset(struct vcpu *v)
923 {
924     vcpu_end_shutdown_deferral(v);
925     return 0;
926 }
927 
relinquish_memory(struct domain * d,struct page_list_head * list)928 static int relinquish_memory(struct domain *d, struct page_list_head *list)
929 {
930     struct page_info *page, *tmp;
931     int               ret = 0;
932 
933     /* Use a recursive lock, as we may enter 'free_domheap_page'. */
934     spin_lock_recursive(&d->page_alloc_lock);
935 
936     page_list_for_each_safe( page, tmp, list )
937     {
938         /* Grab a reference to the page so it won't disappear from under us. */
939         if ( unlikely(!get_page(page, d)) )
940             /*
941              * Couldn't get a reference -- someone is freeing this page and
942              * has already committed to doing so, so no more to do here.
943              *
944              * Note that the page must be left on the list, a list_del
945              * here will clash with the list_del done by the other
946              * party in the race and corrupt the list head.
947              */
948             continue;
949 
950         put_page_alloc_ref(page);
951         put_page(page);
952 
953         if ( hypercall_preempt_check() )
954         {
955             ret = -ERESTART;
956             goto out;
957         }
958     }
959 
960   out:
961     spin_unlock_recursive(&d->page_alloc_lock);
962     return ret;
963 }
964 
965 /*
966  * Record the current progress. Subsequent hypercall continuations will
967  * logically restart work from this point.
968  *
969  * PROGRESS() markers must not be in the middle of loops. The loop
970  * variable isn't preserved accross a continuation.
971  *
972  * To avoid redundant work, there should be a marker before each
973  * function which may return -ERESTART.
974  */
975 enum {
976     PROG_tee = 1,
977     PROG_xen,
978     PROG_page,
979     PROG_mapping,
980     PROG_done,
981 };
982 
983 #define PROGRESS(x)                         \
984     d->arch.rel_priv = PROG_ ## x;          \
985     /* Fallthrough */                       \
986     case PROG_ ## x
987 
domain_relinquish_resources(struct domain * d)988 int domain_relinquish_resources(struct domain *d)
989 {
990     int ret = 0;
991 
992     /*
993      * This hypercall can take minutes of wallclock time to complete.  This
994      * logic implements a co-routine, stashing state in struct domain across
995      * hypercall continuation boundaries.
996      */
997     switch ( d->arch.rel_priv )
998     {
999     case 0:
1000         ret = iommu_release_dt_devices(d);
1001         if ( ret )
1002             return ret;
1003 
1004         /*
1005          * Release the resources allocated for vpl011 which were
1006          * allocated via a DOMCTL call XEN_DOMCTL_vuart_op.
1007          */
1008         domain_vpl011_deinit(d);
1009 
1010     PROGRESS(tee):
1011         ret = tee_relinquish_resources(d);
1012         if (ret )
1013             return ret;
1014 
1015     PROGRESS(xen):
1016         ret = relinquish_memory(d, &d->xenpage_list);
1017         if ( ret )
1018             return ret;
1019 
1020     PROGRESS(page):
1021         ret = relinquish_memory(d, &d->page_list);
1022         if ( ret )
1023             return ret;
1024 
1025     PROGRESS(mapping):
1026         ret = relinquish_p2m_mapping(d);
1027         if ( ret )
1028             return ret;
1029 
1030     PROGRESS(done):
1031         break;
1032 
1033     default:
1034         BUG();
1035     }
1036 
1037     return 0;
1038 }
1039 
1040 #undef PROGRESS
1041 
arch_dump_domain_info(struct domain * d)1042 void arch_dump_domain_info(struct domain *d)
1043 {
1044     p2m_dump_info(d);
1045 }
1046 
1047 
do_arm_vcpu_op(int cmd,unsigned int vcpuid,XEN_GUEST_HANDLE_PARAM (void)arg)1048 long do_arm_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
1049 {
1050     switch ( cmd )
1051     {
1052         case VCPUOP_register_vcpu_info:
1053         case VCPUOP_register_runstate_memory_area:
1054             return do_vcpu_op(cmd, vcpuid, arg);
1055         default:
1056             return -EINVAL;
1057     }
1058 }
1059 
arch_do_vcpu_op(int cmd,struct vcpu * v,XEN_GUEST_HANDLE_PARAM (void)arg)1060 long arch_do_vcpu_op(int cmd, struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
1061 {
1062     return -ENOSYS;
1063 }
1064 
arch_dump_vcpu_info(struct vcpu * v)1065 void arch_dump_vcpu_info(struct vcpu *v)
1066 {
1067     gic_dump_info(v);
1068     gic_dump_vgic_info(v);
1069 }
1070 
vcpu_mark_events_pending(struct vcpu * v)1071 void vcpu_mark_events_pending(struct vcpu *v)
1072 {
1073     bool already_pending = guest_test_and_set_bit(v->domain,
1074         0, (unsigned long *)&vcpu_info(v, evtchn_upcall_pending));
1075 
1076     if ( already_pending )
1077         return;
1078 
1079     vgic_inject_irq(v->domain, v, v->domain->arch.evtchn_irq, true);
1080 }
1081 
vcpu_update_evtchn_irq(struct vcpu * v)1082 void vcpu_update_evtchn_irq(struct vcpu *v)
1083 {
1084     bool pending = vcpu_info(v, evtchn_upcall_pending);
1085 
1086     vgic_inject_irq(v->domain, v, v->domain->arch.evtchn_irq, pending);
1087 }
1088 
1089 /* The ARM spec declares that even if local irqs are masked in
1090  * the CPSR register, an irq should wake up a cpu from WFI anyway.
1091  * For this reason we need to check for irqs that need delivery,
1092  * ignoring the CPSR register, *after* calling SCHEDOP_block to
1093  * avoid races with vgic_inject_irq.
1094  */
vcpu_block_unless_event_pending(struct vcpu * v)1095 void vcpu_block_unless_event_pending(struct vcpu *v)
1096 {
1097     vcpu_block();
1098     if ( local_events_need_delivery_nomask() )
1099         vcpu_unblock(current);
1100 }
1101 
vcpu_kick(struct vcpu * vcpu)1102 void vcpu_kick(struct vcpu *vcpu)
1103 {
1104     bool running = vcpu->is_running;
1105 
1106     vcpu_unblock(vcpu);
1107     if ( running && vcpu != current )
1108     {
1109         perfc_incr(vcpu_kick);
1110         smp_send_event_check_mask(cpumask_of(vcpu->processor));
1111     }
1112 }
1113 
1114 /*
1115  * Local variables:
1116  * mode: C
1117  * c-file-style: "BSD"
1118  * c-basic-offset: 4
1119  * indent-tabs-mode: nil
1120  * End:
1121  */
1122