1 /******************************************************************************
2 * crash.c
3 *
4 * Based heavily on arch/i386/kernel/crash.c from Linux 2.6.16
5 *
6 * Xen port written by:
7 * - Simon 'Horms' Horman <horms@verge.net.au>
8 * - Magnus Damm <magnus@valinux.co.jp>
9 */
10
11 #include <asm/atomic.h>
12 #include <asm/elf.h>
13 #include <xen/types.h>
14 #include <xen/irq.h>
15 #include <asm/nmi.h>
16 #include <xen/string.h>
17 #include <xen/elf.h>
18 #include <xen/elfcore.h>
19 #include <xen/smp.h>
20 #include <xen/delay.h>
21 #include <xen/perfc.h>
22 #include <xen/kexec.h>
23 #include <xen/sched.h>
24 #include <xen/keyhandler.h>
25 #include <public/xen.h>
26 #include <asm/shared.h>
27 #include <asm/hvm/support.h>
28 #include <asm/apic.h>
29 #include <asm/io_apic.h>
30 #include <xen/iommu.h>
31 #include <asm/hpet.h>
32 #include <xen/console.h>
33
34 static cpumask_t waiting_to_crash;
35 static unsigned int crashing_cpu;
36 static DEFINE_PER_CPU_READ_MOSTLY(bool, crash_save_done);
37
38 /* This becomes the NMI handler for non-crashing CPUs, when Xen is crashing. */
do_nmi_crash(const struct cpu_user_regs * regs)39 static void noreturn do_nmi_crash(const struct cpu_user_regs *regs)
40 {
41 unsigned int cpu = smp_processor_id();
42
43 stac();
44
45 /* nmi_shootdown_cpus() should ensure that this assertion is correct. */
46 ASSERT(cpu != crashing_cpu);
47
48 /* Save crash information and shut down CPU. Attempt only once. */
49 if ( !this_cpu(crash_save_done) )
50 {
51 /* Disable the interrupt stack table for the MCE handler. This
52 * prevents race conditions between clearing MCIP and receving a
53 * new MCE, during which the exception frame would be clobbered
54 * and the MCE handler fall into an infinite loop. We are soon
55 * going to disable the NMI watchdog, so the loop would not be
56 * caught.
57 *
58 * We do not need to change the NMI IST, as the nmi_crash
59 * handler is immue to corrupt exception frames, by virtue of
60 * being designed never to return.
61 *
62 * This update is safe from a security point of view, as this
63 * pcpu is never going to try to sysret back to a PV vcpu.
64 */
65 set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
66
67 kexec_crash_save_cpu();
68 __stop_this_cpu();
69
70 this_cpu(crash_save_done) = true;
71 cpumask_clear_cpu(cpu, &waiting_to_crash);
72 }
73
74 /* Poor mans self_nmi(). __stop_this_cpu() has reverted the LAPIC
75 * back to its boot state, so we are unable to rely on the regular
76 * apic_* functions, due to 'x2apic_enabled' being possibly wrong.
77 * (The likely scenario is that we have reverted from x2apic mode to
78 * xapic, at which point #GPFs will occur if we use the apic_*
79 * functions)
80 *
81 * The ICR and APIC ID of the LAPIC are still valid even during
82 * software disable (Intel SDM Vol 3, 10.4.7.2). As a result, we
83 * can deliberately queue up another NMI at the LAPIC which will not
84 * be delivered as the hardware NMI latch is currently in effect.
85 * This means that if NMIs become unlatched (e.g. following a
86 * non-fatal MCE), the LAPIC will force us back here rather than
87 * wandering back into regular Xen code.
88 */
89 switch ( current_local_apic_mode() )
90 {
91 u32 apic_id;
92
93 case APIC_MODE_X2APIC:
94 apic_id = apic_rdmsr(APIC_ID);
95
96 apic_wrmsr(APIC_ICR, APIC_DM_NMI | APIC_DEST_PHYSICAL
97 | ((u64)apic_id << 32));
98 break;
99
100 case APIC_MODE_XAPIC:
101 apic_id = GET_xAPIC_ID(apic_mem_read(APIC_ID));
102
103 while ( apic_mem_read(APIC_ICR) & APIC_ICR_BUSY )
104 cpu_relax();
105
106 apic_mem_write(APIC_ICR2, apic_id << 24);
107 apic_mem_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_PHYSICAL);
108 break;
109
110 default:
111 break;
112 }
113
114 for ( ; ; )
115 halt();
116 }
117
nmi_shootdown_cpus(void)118 static void nmi_shootdown_cpus(void)
119 {
120 unsigned long msecs;
121 unsigned int cpu = smp_processor_id();
122
123 disable_lapic_nmi_watchdog();
124 local_irq_disable();
125
126 crashing_cpu = cpu;
127 local_irq_count(crashing_cpu) = 0;
128
129 cpumask_andnot(&waiting_to_crash, &cpu_online_map, cpumask_of(cpu));
130
131 /*
132 * Disable IST for MCEs to avoid stack corruption race conditions, and
133 * change the NMI handler to a nop to avoid deviation from this codepath.
134 */
135 _set_gate_lower(&idt_tables[cpu][TRAP_nmi],
136 SYS_DESC_irq_gate, 0, &trap_nop);
137 set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
138
139 /*
140 * Ideally would be:
141 * exception_table[TRAP_nmi] = &do_nmi_crash;
142 *
143 * but the exception_table is read only. Access it via its directmap
144 * mappings.
145 */
146 write_atomic((unsigned long *)__va(__pa(&exception_table[TRAP_nmi])),
147 (unsigned long)&do_nmi_crash);
148
149 smp_send_nmi_allbutself();
150
151 msecs = 1000; /* Wait at most a second for the other cpus to stop */
152 while ( !cpumask_empty(&waiting_to_crash) && msecs )
153 {
154 mdelay(1);
155 msecs--;
156 }
157
158 /*
159 * We may have NMI'd another CPU while it was holding the console lock.
160 * It won't be in a position to release the lock...
161 */
162 console_force_unlock();
163
164 /* Leave a hint of how well we did trying to shoot down the other cpus */
165 if ( cpumask_empty(&waiting_to_crash) )
166 printk("Shot down all CPUs\n");
167 else
168 printk("Failed to shoot down CPUs {%*pbl}\n",
169 CPUMASK_PR(&waiting_to_crash));
170
171 /*
172 * Try to crash shutdown IOMMU functionality as some old crashdump
173 * kernels are not happy when booting if interrupt/dma remapping
174 * is still enabled.
175 */
176 iommu_crash_shutdown();
177
178 if ( cpu_online(cpu) )
179 {
180 __stop_this_cpu();
181
182 /*
183 * This is a bit of a hack due to the problems with the x2apic_enabled
184 * variable, but we can't do any better without a significant
185 * refactoring of the APIC code
186 */
187 x2apic_enabled = (current_local_apic_mode() == APIC_MODE_X2APIC);
188
189 disable_IO_APIC();
190 hpet_disable();
191 }
192 }
193
machine_crash_shutdown(void)194 void machine_crash_shutdown(void)
195 {
196 crash_xen_info_t *info;
197
198 nmi_shootdown_cpus();
199
200 /* Reset CPUID masking and faulting to the host's default. */
201 ctxt_switch_levelling(NULL);
202
203 /* Disable shadow stacks. */
204 if ( cpu_has_xen_shstk )
205 {
206 wrmsrl(MSR_S_CET, 0);
207 write_cr4(read_cr4() & ~X86_CR4_CET);
208 }
209
210 info = kexec_crash_save_info();
211 info->xen_phys_start = xen_phys_start;
212 info->dom0_pfn_to_mfn_frame_list_list =
213 arch_get_pfn_to_mfn_frame_list_list(hardware_domain);
214 }
215
216 /*
217 * Local variables:
218 * mode: C
219 * c-file-style: "BSD"
220 * c-basic-offset: 4
221 * tab-width: 4
222 * indent-tabs-mode: nil
223 * End:
224 */
225