1 #include <xen/types.h>
2 #include <xen/sched.h>
3 #include "mcaction.h"
4 #include "vmce.h"
5 #include "mce.h"
6
7 static struct mcinfo_recovery *
mci_action_add_pageoffline(int bank,struct mc_info * mi,mfn_t mfn,uint32_t status)8 mci_action_add_pageoffline(int bank, struct mc_info *mi,
9 mfn_t mfn, uint32_t status)
10 {
11 struct mcinfo_recovery *rec;
12
13 if ( !mi )
14 return NULL;
15
16 rec = x86_mcinfo_reserve(mi, sizeof(*rec), MC_TYPE_RECOVERY);
17 if ( !rec )
18 {
19 mi->flags |= MCINFO_FLAGS_UNCOMPLETE;
20 return NULL;
21 }
22
23 rec->mc_bank = bank;
24 rec->action_types = MC_ACTION_PAGE_OFFLINE;
25 rec->action_info.page_retire.mfn = mfn_x(mfn);
26 rec->action_info.page_retire.status = status;
27 return rec;
28 }
29
30 mce_check_addr_t mc_check_addr = NULL;
31
mce_register_addrcheck(mce_check_addr_t cbfunc)32 void mce_register_addrcheck(mce_check_addr_t cbfunc)
33 {
34 mc_check_addr = cbfunc;
35 }
36
37 void
mc_memerr_dhandler(struct mca_binfo * binfo,enum mce_result * result,const struct cpu_user_regs * regs)38 mc_memerr_dhandler(struct mca_binfo *binfo,
39 enum mce_result *result,
40 const struct cpu_user_regs *regs)
41 {
42 struct mcinfo_bank *bank = binfo->mib;
43 struct mcinfo_global *global = binfo->mig;
44 struct domain *d;
45 mfn_t mfn;
46 unsigned long gfn;
47 uint32_t status;
48 int vmce_vcpuid;
49 unsigned int mc_vcpuid;
50
51 if ( !mc_check_addr(bank->mc_status, bank->mc_misc, MC_ADDR_PHYSICAL) )
52 {
53 dprintk(XENLOG_WARNING,
54 "No physical address provided for memory error\n");
55 return;
56 }
57
58 mfn = maddr_to_mfn(bank->mc_addr);
59 if ( offline_page(mfn, 1, &status) )
60 {
61 dprintk(XENLOG_WARNING,
62 "Failed to offline page %"PRI_mfn" for MCE error\n",
63 mfn_x(mfn));
64 return;
65 }
66
67 mci_action_add_pageoffline(binfo->bank, binfo->mi, mfn, status);
68
69 /* This is free page */
70 if ( status & PG_OFFLINE_OFFLINED )
71 *result = MCER_RECOVERED;
72 else if ( status & PG_OFFLINE_AGAIN )
73 *result = MCER_CONTINUE;
74 else if ( status & PG_OFFLINE_PENDING )
75 {
76 /* This page has owner */
77 if ( status & PG_OFFLINE_OWNED )
78 {
79 bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT;
80 mce_printk(MCE_QUIET, "MCE: This error page is ownded"
81 " by DOM %d\n", bank->mc_domid);
82 /*
83 * XXX: Cannot handle shared pages yet
84 * (this should identify all domains and gfn mapping to
85 * the mfn in question)
86 */
87 BUG_ON( bank->mc_domid == DOMID_COW );
88 if ( bank->mc_domid != DOMID_XEN )
89 {
90 d = get_domain_by_id(bank->mc_domid);
91 ASSERT(d);
92 gfn = get_gpfn_from_mfn((bank->mc_addr) >> PAGE_SHIFT);
93
94 if ( unmmap_broken_page(d, mfn, gfn) )
95 {
96 printk("Unmap broken memory %"PRI_mfn" for DOM%d failed\n",
97 mfn_x(mfn), d->domain_id);
98 goto vmce_failed;
99 }
100
101 mc_vcpuid = global->mc_vcpuid;
102 if ( mc_vcpuid == XEN_MC_VCPUID_INVALID ||
103 /*
104 * Because MC# may happen asynchronously with the actual
105 * operation that triggers the error, the domain ID as
106 * well as the vCPU ID collected in 'global' at MC# are
107 * not always precise. In that case, fallback to broadcast.
108 */
109 global->mc_domid != bank->mc_domid ||
110 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
111 (!(global->mc_gstatus & MCG_STATUS_LMCE) ||
112 !(d->vcpu[mc_vcpuid]->arch.vmce.mcg_ext_ctl &
113 MCG_EXT_CTL_LMCE_EN))) )
114 vmce_vcpuid = VMCE_INJECT_BROADCAST;
115 else
116 vmce_vcpuid = mc_vcpuid;
117
118 bank->mc_addr = gfn << PAGE_SHIFT |
119 (bank->mc_addr & (PAGE_SIZE - 1));
120 if ( fill_vmsr_data(bank, d, global->mc_gstatus, vmce_vcpuid) )
121 {
122 mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d "
123 "failed\n", bank->mc_domid);
124 goto vmce_failed;
125 }
126
127 /* We will inject vMCE to DOMU */
128 if ( inject_vmce(d, vmce_vcpuid) < 0 )
129 {
130 mce_printk(MCE_QUIET, "inject vMCE to DOM%d"
131 " failed\n", d->domain_id);
132 goto vmce_failed;
133 }
134
135 /*
136 * Impacted domain go on with domain's recovery job
137 * if the domain has its own MCA handler.
138 * For xen, it has contained the error and finished
139 * its own recovery job.
140 */
141 *result = MCER_RECOVERED;
142 put_domain(d);
143
144 return;
145 vmce_failed:
146 put_domain(d);
147 domain_crash(d);
148 }
149 }
150 }
151 }
152