1 /******************************************************************************
2  * crash.c
3  *
4  * Based heavily on arch/i386/kernel/crash.c from Linux 2.6.16
5  *
6  * Xen port written by:
7  * - Simon 'Horms' Horman <horms@verge.net.au>
8  * - Magnus Damm <magnus@valinux.co.jp>
9  */
10 
11 #include <asm/atomic.h>
12 #include <asm/elf.h>
13 #include <xen/types.h>
14 #include <xen/irq.h>
15 #include <asm/nmi.h>
16 #include <xen/string.h>
17 #include <xen/elf.h>
18 #include <xen/elfcore.h>
19 #include <xen/smp.h>
20 #include <xen/delay.h>
21 #include <xen/perfc.h>
22 #include <xen/kexec.h>
23 #include <xen/sched.h>
24 #include <xen/keyhandler.h>
25 #include <public/xen.h>
26 #include <asm/shared.h>
27 #include <asm/hvm/support.h>
28 #include <asm/apic.h>
29 #include <asm/io_apic.h>
30 #include <xen/iommu.h>
31 #include <asm/hpet.h>
32 #include <xen/console.h>
33 
34 static cpumask_t waiting_to_crash;
35 static unsigned int crashing_cpu;
36 static DEFINE_PER_CPU_READ_MOSTLY(bool, crash_save_done);
37 
38 /* This becomes the NMI handler for non-crashing CPUs, when Xen is crashing. */
do_nmi_crash(const struct cpu_user_regs * regs)39 static void noreturn do_nmi_crash(const struct cpu_user_regs *regs)
40 {
41     unsigned int cpu = smp_processor_id();
42 
43     stac();
44 
45     /* nmi_shootdown_cpus() should ensure that this assertion is correct. */
46     ASSERT(cpu != crashing_cpu);
47 
48     /* Save crash information and shut down CPU.  Attempt only once. */
49     if ( !this_cpu(crash_save_done) )
50     {
51         /* Disable the interrupt stack table for the MCE handler.  This
52          * prevents race conditions between clearing MCIP and receving a
53          * new MCE, during which the exception frame would be clobbered
54          * and the MCE handler fall into an infinite loop.  We are soon
55          * going to disable the NMI watchdog, so the loop would not be
56          * caught.
57          *
58          * We do not need to change the NMI IST, as the nmi_crash
59          * handler is immue to corrupt exception frames, by virtue of
60          * being designed never to return.
61          *
62          * This update is safe from a security point of view, as this
63          * pcpu is never going to try to sysret back to a PV vcpu.
64          */
65         set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
66 
67         kexec_crash_save_cpu();
68         __stop_this_cpu();
69 
70         this_cpu(crash_save_done) = true;
71         cpumask_clear_cpu(cpu, &waiting_to_crash);
72     }
73 
74     /* Poor mans self_nmi().  __stop_this_cpu() has reverted the LAPIC
75      * back to its boot state, so we are unable to rely on the regular
76      * apic_* functions, due to 'x2apic_enabled' being possibly wrong.
77      * (The likely scenario is that we have reverted from x2apic mode to
78      * xapic, at which point #GPFs will occur if we use the apic_*
79      * functions)
80      *
81      * The ICR and APIC ID of the LAPIC are still valid even during
82      * software disable (Intel SDM Vol 3, 10.4.7.2).  As a result, we
83      * can deliberately queue up another NMI at the LAPIC which will not
84      * be delivered as the hardware NMI latch is currently in effect.
85      * This means that if NMIs become unlatched (e.g. following a
86      * non-fatal MCE), the LAPIC will force us back here rather than
87      * wandering back into regular Xen code.
88      */
89     switch ( current_local_apic_mode() )
90     {
91         u32 apic_id;
92 
93     case APIC_MODE_X2APIC:
94         apic_id = apic_rdmsr(APIC_ID);
95 
96         apic_wrmsr(APIC_ICR, APIC_DM_NMI | APIC_DEST_PHYSICAL
97                    | ((u64)apic_id << 32));
98         break;
99 
100     case APIC_MODE_XAPIC:
101         apic_id = GET_xAPIC_ID(apic_mem_read(APIC_ID));
102 
103         while ( apic_mem_read(APIC_ICR) & APIC_ICR_BUSY )
104             cpu_relax();
105 
106         apic_mem_write(APIC_ICR2, apic_id << 24);
107         apic_mem_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_PHYSICAL);
108         break;
109 
110     default:
111         break;
112     }
113 
114     for ( ; ; )
115         halt();
116 }
117 
nmi_shootdown_cpus(void)118 static void nmi_shootdown_cpus(void)
119 {
120     unsigned long msecs;
121     unsigned int cpu = smp_processor_id();
122 
123     disable_lapic_nmi_watchdog();
124     local_irq_disable();
125 
126     crashing_cpu = cpu;
127     local_irq_count(crashing_cpu) = 0;
128 
129     cpumask_andnot(&waiting_to_crash, &cpu_online_map, cpumask_of(cpu));
130 
131     /*
132      * Disable IST for MCEs to avoid stack corruption race conditions, and
133      * change the NMI handler to a nop to avoid deviation from this codepath.
134      */
135     _set_gate_lower(&idt_tables[cpu][TRAP_nmi],
136                     SYS_DESC_irq_gate, 0, &trap_nop);
137     set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
138 
139     /*
140      * Ideally would be:
141      *   exception_table[TRAP_nmi] = &do_nmi_crash;
142      *
143      * but the exception_table is read only.  Access it via its directmap
144      * mappings.
145      */
146     write_atomic((unsigned long *)__va(__pa(&exception_table[TRAP_nmi])),
147                  (unsigned long)&do_nmi_crash);
148 
149     smp_send_nmi_allbutself();
150 
151     msecs = 1000; /* Wait at most a second for the other cpus to stop */
152     while ( !cpumask_empty(&waiting_to_crash) && msecs )
153     {
154         mdelay(1);
155         msecs--;
156     }
157 
158     /*
159      * We may have NMI'd another CPU while it was holding the console lock.
160      * It won't be in a position to release the lock...
161      */
162     console_force_unlock();
163 
164     /* Leave a hint of how well we did trying to shoot down the other cpus */
165     if ( cpumask_empty(&waiting_to_crash) )
166         printk("Shot down all CPUs\n");
167     else
168         printk("Failed to shoot down CPUs {%*pbl}\n",
169                CPUMASK_PR(&waiting_to_crash));
170 
171     /*
172      * Try to crash shutdown IOMMU functionality as some old crashdump
173      * kernels are not happy when booting if interrupt/dma remapping
174      * is still enabled.
175      */
176     iommu_crash_shutdown();
177 
178     if ( cpu_online(cpu) )
179     {
180         __stop_this_cpu();
181 
182         /*
183          * This is a bit of a hack due to the problems with the x2apic_enabled
184          * variable, but we can't do any better without a significant
185          * refactoring of the APIC code
186          */
187         x2apic_enabled = (current_local_apic_mode() == APIC_MODE_X2APIC);
188 
189         disable_IO_APIC();
190         hpet_disable();
191     }
192 }
193 
machine_crash_shutdown(void)194 void machine_crash_shutdown(void)
195 {
196     crash_xen_info_t *info;
197 
198     nmi_shootdown_cpus();
199 
200     /* Reset CPUID masking and faulting to the host's default. */
201     ctxt_switch_levelling(NULL);
202 
203     /* Disable shadow stacks. */
204     if ( cpu_has_xen_shstk )
205     {
206         wrmsrl(MSR_S_CET, 0);
207         write_cr4(read_cr4() & ~X86_CR4_CET);
208     }
209 
210     info = kexec_crash_save_info();
211     info->xen_phys_start = xen_phys_start;
212     info->dom0_pfn_to_mfn_frame_list_list =
213         arch_get_pfn_to_mfn_frame_list_list(hardware_domain);
214 }
215 
216 /*
217  * Local variables:
218  * mode: C
219  * c-file-style: "BSD"
220  * c-basic-offset: 4
221  * tab-width: 4
222  * indent-tabs-mode: nil
223  * End:
224  */
225