1 /* drivers/acpi/sleep/power.c - PM core functionality for Xen
2  *
3  * Copyrights from Linux side:
4  * Copyright (c) 2000-2003 Patrick Mochel
5  * Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz>
6  * Copyright (c) 2003 Open Source Development Lab
7  * Copyright (c) 2004 David Shaohua Li <shaohua.li@intel.com>
8  * Copyright (c) 2005 Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>
9  *
10  * Slimmed with Xen specific support.
11  */
12 
13 #include <asm/io.h>
14 #include <xen/acpi.h>
15 #include <xen/errno.h>
16 #include <xen/iocap.h>
17 #include <xen/param.h>
18 #include <xen/sched.h>
19 #include <asm/acpi.h>
20 #include <asm/irq.h>
21 #include <asm/init.h>
22 #include <xen/spinlock.h>
23 #include <xen/sched.h>
24 #include <xen/domain.h>
25 #include <xen/console.h>
26 #include <xen/iommu.h>
27 #include <xen/watchdog.h>
28 #include <xen/cpu.h>
29 #include <public/platform.h>
30 #include <asm/tboot.h>
31 #include <asm/apic.h>
32 #include <asm/io_apic.h>
33 #include <asm/microcode.h>
34 #include <asm/spec_ctrl.h>
35 #include <acpi/cpufreq/cpufreq.h>
36 
37 uint32_t system_reset_counter = 1;
38 
parse_acpi_sleep(const char * s)39 static int __init parse_acpi_sleep(const char *s)
40 {
41     const char *ss;
42     unsigned int flag = 0;
43     int rc = 0;
44 
45     do {
46         ss = strchr(s, ',');
47         if ( !ss )
48             ss = strchr(s, '\0');
49 
50         if ( !cmdline_strcmp(s, "s3_bios") )
51             flag |= 1;
52         else if ( !cmdline_strcmp(s, "s3_mode") )
53             flag |= 2;
54         else
55             rc = -EINVAL;
56 
57         s = ss + 1;
58     } while ( *ss );
59 
60     acpi_video_flags |= flag;
61 
62     return rc;
63 }
64 custom_param("acpi_sleep", parse_acpi_sleep);
65 
66 static DEFINE_SPINLOCK(pm_lock);
67 
68 struct acpi_sleep_info acpi_sinfo;
69 
70 void do_suspend_lowlevel(void);
71 
72 enum dev_power_saved
73 {
74     SAVED_NONE,
75     SAVED_CONSOLE,
76     SAVED_TIME,
77     SAVED_I8259A,
78     SAVED_IOAPIC,
79     SAVED_IOMMU,
80     SAVED_LAPIC,
81     SAVED_ALL,
82 };
83 
device_power_down(void)84 static int device_power_down(void)
85 {
86     if ( console_suspend() )
87         return SAVED_NONE;
88 
89     if ( time_suspend() )
90         return SAVED_CONSOLE;
91 
92     if ( i8259A_suspend() )
93         return SAVED_TIME;
94 
95     /* ioapic_suspend cannot fail */
96     ioapic_suspend();
97 
98     if ( iommu_suspend() )
99         return SAVED_IOAPIC;
100 
101     if ( lapic_suspend() )
102         return SAVED_IOMMU;
103 
104     return SAVED_ALL;
105 }
106 
device_power_up(enum dev_power_saved saved)107 static void device_power_up(enum dev_power_saved saved)
108 {
109     switch ( saved )
110     {
111     case SAVED_ALL:
112     case SAVED_LAPIC:
113         lapic_resume();
114         /* fall through */
115     case SAVED_IOMMU:
116         iommu_resume();
117         /* fall through */
118     case SAVED_IOAPIC:
119         ioapic_resume();
120         /* fall through */
121     case SAVED_I8259A:
122         i8259A_resume();
123         /* fall through */
124     case SAVED_TIME:
125         time_resume();
126         /* fall through */
127     case SAVED_CONSOLE:
128         console_resume();
129         /* fall through */
130     case SAVED_NONE:
131         break;
132     default:
133         BUG();
134         break;
135     }
136 }
137 
freeze_domains(void)138 static void freeze_domains(void)
139 {
140     struct domain *d;
141 
142     rcu_read_lock(&domlist_read_lock);
143     /*
144      * Note that we iterate in order of domain-id. Hence we will pause dom0
145      * first which is required for correctness (as only dom0 can add domains to
146      * the domain list). Otherwise we could miss concurrently-created domains.
147      */
148     for_each_domain ( d )
149         domain_pause(d);
150     rcu_read_unlock(&domlist_read_lock);
151 
152     scheduler_disable();
153 }
154 
thaw_domains(void)155 static void thaw_domains(void)
156 {
157     struct domain *d;
158 
159     scheduler_enable();
160 
161     rcu_read_lock(&domlist_read_lock);
162     for_each_domain ( d )
163     {
164         restore_vcpu_affinity(d);
165         domain_unpause(d);
166     }
167     rcu_read_unlock(&domlist_read_lock);
168 }
169 
acpi_sleep_prepare(u32 state)170 static void acpi_sleep_prepare(u32 state)
171 {
172     void *wakeup_vector_va;
173 
174     if ( state != ACPI_STATE_S3 )
175         return;
176 
177     wakeup_vector_va = __acpi_map_table(
178         acpi_sinfo.wakeup_vector, sizeof(uint64_t));
179 
180     /* TBoot will set resume vector itself (when it is safe to do so). */
181     if ( tboot_in_measured_env() )
182         return;
183 
184     if ( acpi_sinfo.vector_width == 32 )
185         *(uint32_t *)wakeup_vector_va = bootsym_phys(wakeup_start);
186     else
187         *(uint64_t *)wakeup_vector_va = bootsym_phys(wakeup_start);
188 }
189 
acpi_sleep_post(u32 state)190 static void acpi_sleep_post(u32 state) {}
191 
192 /* Main interface to do xen specific suspend/resume */
enter_state(u32 state)193 static int enter_state(u32 state)
194 {
195     unsigned long flags;
196     int error;
197     struct cpu_info *ci;
198 
199     if ( (state <= ACPI_STATE_S0) || (state > ACPI_S_STATES_MAX) )
200         return -EINVAL;
201 
202     if ( !spin_trylock(&pm_lock) )
203         return -EBUSY;
204 
205     BUG_ON(system_state != SYS_STATE_active);
206     BUG_ON(!is_idle_vcpu(current));
207     BUG_ON(smp_processor_id() != 0);
208     system_state = SYS_STATE_suspend;
209 
210     printk(XENLOG_INFO "Preparing system for ACPI S%d state.\n", state);
211 
212     freeze_domains();
213 
214     acpi_dmar_reinstate();
215 
216     if ( (error = disable_nonboot_cpus()) )
217     {
218         system_state = SYS_STATE_resume;
219         goto enable_cpu;
220     }
221 
222     cpufreq_del_cpu(0);
223 
224     hvm_cpu_down();
225 
226     acpi_sleep_prepare(state);
227 
228     watchdog_disable();
229     console_start_sync();
230     printk("Entering ACPI S%d state.\n", state);
231 
232     local_irq_save(flags);
233     spin_debug_disable();
234 
235     if ( (error = device_power_down()) != SAVED_ALL )
236     {
237         printk(XENLOG_ERR "Some devices failed to power down.");
238         system_state = SYS_STATE_resume;
239         device_power_up(error);
240         console_end_sync();
241         watchdog_enable();
242         error = -EIO;
243         goto done;
244     }
245     else
246         error = 0;
247 
248     ci = get_cpu_info();
249     spec_ctrl_enter_idle(ci);
250     /* Avoid NMI/#MC using MSR_SPEC_CTRL until we've reloaded microcode. */
251     ci->spec_ctrl_flags &= ~SCF_ist_wrmsr;
252 
253     ACPI_FLUSH_CPU_CACHE();
254 
255     switch ( state )
256     {
257     case ACPI_STATE_S3:
258         do_suspend_lowlevel();
259         system_reset_counter++;
260         error = tboot_s3_resume();
261         break;
262     case ACPI_STATE_S5:
263         acpi_enter_sleep_state(ACPI_STATE_S5);
264         break;
265     default:
266         error = -EINVAL;
267         break;
268     }
269 
270     system_state = SYS_STATE_resume;
271 
272     /* Restore EFER from cached value. */
273     write_efer(read_efer());
274 
275     device_power_up(SAVED_ALL);
276 
277     mcheck_init(&boot_cpu_data, false);
278 
279     printk(XENLOG_INFO "Finishing wakeup from ACPI S%d state.\n", state);
280 
281     if ( (state == ACPI_STATE_S3) && error )
282         tboot_s3_error(error);
283 
284     console_end_sync();
285     watchdog_enable();
286 
287     microcode_update_one();
288 
289     if ( !recheck_cpu_features(0) )
290         panic("Missing previously available feature(s)\n");
291 
292     /* Re-enabled default NMI/#MC use of MSR_SPEC_CTRL. */
293     ci->spec_ctrl_flags |= (default_spec_ctrl_flags & SCF_ist_wrmsr);
294     spec_ctrl_exit_idle(ci);
295 
296     if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) )
297         wrmsrl(MSR_MCU_OPT_CTRL, default_xen_mcu_opt_ctrl);
298 
299     /* (re)initialise SYSCALL/SYSENTER state, amongst other things. */
300     percpu_traps_init();
301 
302  done:
303     spin_debug_enable();
304     local_irq_restore(flags);
305     acpi_sleep_post(state);
306     if ( hvm_cpu_up() )
307         BUG();
308     cpufreq_add_cpu(0);
309 
310  enable_cpu:
311     mtrr_aps_sync_begin();
312     enable_nonboot_cpus();
313     mtrr_aps_sync_end();
314     iommu_adjust_irq_affinities();
315     acpi_dmar_zap();
316     thaw_domains();
317     system_state = SYS_STATE_active;
318     spin_unlock(&pm_lock);
319     return error;
320 }
321 
enter_state_helper(void * data)322 static long enter_state_helper(void *data)
323 {
324     struct acpi_sleep_info *sinfo = (struct acpi_sleep_info *)data;
325     return enter_state(sinfo->sleep_state);
326 }
327 
328 /*
329  * Dom0 issues this hypercall in place of writing pm1a_cnt. Xen then
330  * takes over the control and put the system into sleep state really.
331  */
acpi_enter_sleep(struct xenpf_enter_acpi_sleep * sleep)332 int acpi_enter_sleep(struct xenpf_enter_acpi_sleep *sleep)
333 {
334     if ( sleep->flags & XENPF_ACPI_SLEEP_EXTENDED )
335     {
336         if ( !acpi_sinfo.sleep_control.address ||
337              !acpi_sinfo.sleep_status.address )
338             return -EPERM;
339 
340         if ( sleep->flags & ~XENPF_ACPI_SLEEP_EXTENDED )
341             return -EINVAL;
342 
343         if ( sleep->val_a > ACPI_SLEEP_TYPE_MAX ||
344              (sleep->val_b != ACPI_SLEEP_TYPE_INVALID &&
345               sleep->val_b > ACPI_SLEEP_TYPE_MAX) )
346             return -ERANGE;
347 
348         acpi_sinfo.sleep_type_a = sleep->val_a;
349         acpi_sinfo.sleep_type_b = sleep->val_b;
350 
351         acpi_sinfo.sleep_extended = 1;
352     }
353 
354     else if ( !acpi_sinfo.pm1a_cnt_blk.address )
355         return -EPERM;
356 
357     /* Sanity check */
358     else if ( sleep->val_b &&
359               ((sleep->val_a ^ sleep->val_b) & ACPI_BITMASK_SLEEP_ENABLE) )
360     {
361         gdprintk(XENLOG_ERR, "Mismatched pm1a/pm1b setting\n");
362         return -EINVAL;
363     }
364 
365     else if ( sleep->flags )
366         return -EINVAL;
367 
368     else
369     {
370         acpi_sinfo.pm1a_cnt_val = sleep->val_a;
371         acpi_sinfo.pm1b_cnt_val = sleep->val_b;
372         acpi_sinfo.sleep_extended = 0;
373     }
374 
375     acpi_sinfo.sleep_state = sleep->sleep_state;
376 
377     return continue_hypercall_on_cpu(0, enter_state_helper, &acpi_sinfo);
378 }
379 
acpi_get_wake_status(void)380 static int acpi_get_wake_status(void)
381 {
382     uint32_t val;
383     acpi_status status;
384 
385     if ( acpi_sinfo.sleep_extended )
386     {
387         status = acpi_hw_register_read(ACPI_REGISTER_SLEEP_STATUS, &val);
388 
389         return ACPI_FAILURE(status) ? 0 : val & ACPI_X_WAKE_STATUS;
390     }
391 
392     /* Wake status is the 15th bit of PM1 status register. (ACPI spec 3.0) */
393     status = acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS, &val);
394     if ( ACPI_FAILURE(status) )
395         return 0;
396 
397     val &= ACPI_BITMASK_WAKE_STATUS;
398     val >>= ACPI_BITPOSITION_WAKE_STATUS;
399     return val;
400 }
401 
tboot_sleep(u8 sleep_state)402 static void tboot_sleep(u8 sleep_state)
403 {
404     uint32_t shutdown_type;
405 
406 #define TB_COPY_GAS(tbg, g)             \
407     tbg.space_id = g.space_id;          \
408     tbg.bit_width = g.bit_width;        \
409     tbg.bit_offset = g.bit_offset;      \
410     tbg.access_width = g.access_width;  \
411     tbg.address = g.address;
412 
413     /* sizes are not same (due to packing) so copy each one */
414     TB_COPY_GAS(g_tboot_shared->acpi_sinfo.pm1a_cnt_blk,
415                 acpi_sinfo.pm1a_cnt_blk);
416     TB_COPY_GAS(g_tboot_shared->acpi_sinfo.pm1b_cnt_blk,
417                 acpi_sinfo.pm1b_cnt_blk);
418     TB_COPY_GAS(g_tboot_shared->acpi_sinfo.pm1a_evt_blk,
419                 acpi_sinfo.pm1a_evt_blk);
420     TB_COPY_GAS(g_tboot_shared->acpi_sinfo.pm1b_evt_blk,
421                 acpi_sinfo.pm1b_evt_blk);
422     g_tboot_shared->acpi_sinfo.pm1a_cnt_val = acpi_sinfo.pm1a_cnt_val;
423     g_tboot_shared->acpi_sinfo.pm1b_cnt_val = acpi_sinfo.pm1b_cnt_val;
424     g_tboot_shared->acpi_sinfo.wakeup_vector = acpi_sinfo.wakeup_vector;
425     g_tboot_shared->acpi_sinfo.vector_width = acpi_sinfo.vector_width;
426     g_tboot_shared->acpi_sinfo.kernel_s3_resume_vector =
427                                               bootsym_phys(wakeup_start);
428 
429     switch ( sleep_state )
430     {
431         case ACPI_STATE_S3:
432             shutdown_type = TB_SHUTDOWN_S3;
433             break;
434         case ACPI_STATE_S4:
435             shutdown_type = TB_SHUTDOWN_S4;
436             break;
437         case ACPI_STATE_S5:
438             shutdown_type = TB_SHUTDOWN_S5;
439             break;
440         default:
441             return;
442     }
443 
444     tboot_shutdown(shutdown_type);
445 }
446 
447 /* System is really put into sleep state by this stub */
acpi_enter_sleep_state(u8 sleep_state)448 acpi_status acpi_enter_sleep_state(u8 sleep_state)
449 {
450     acpi_status status;
451 
452     if ( tboot_in_measured_env() )
453     {
454         tboot_sleep(sleep_state);
455         printk(XENLOG_ERR "TBOOT failed entering s3 state\n");
456         return_ACPI_STATUS(AE_ERROR);
457     }
458 
459     ACPI_FLUSH_CPU_CACHE();
460 
461     if ( acpi_sinfo.sleep_extended )
462     {
463         /*
464          * Set the SLP_TYP and SLP_EN bits.
465          *
466          * Note: We only use the first value returned by the \_Sx method
467          * (acpi_sinfo.sleep_type_a) - As per ACPI specification.
468          */
469         u8 sleep_type_value =
470             ((acpi_sinfo.sleep_type_a << ACPI_X_SLEEP_TYPE_POSITION) &
471              ACPI_X_SLEEP_TYPE_MASK) | ACPI_X_SLEEP_ENABLE;
472 
473         status = acpi_hw_register_write(ACPI_REGISTER_SLEEP_CONTROL,
474                                         sleep_type_value);
475     }
476     else
477     {
478         status = acpi_hw_register_write(ACPI_REGISTER_PM1A_CONTROL,
479                                         acpi_sinfo.pm1a_cnt_val);
480         if ( !ACPI_FAILURE(status) && acpi_sinfo.pm1b_cnt_blk.address )
481             status = acpi_hw_register_write(ACPI_REGISTER_PM1B_CONTROL,
482                                             acpi_sinfo.pm1b_cnt_val);
483     }
484 
485     if ( ACPI_FAILURE(status) )
486         return_ACPI_STATUS(AE_ERROR);
487 
488     /* Wait until we enter sleep state, and spin until we wake */
489     while ( !acpi_get_wake_status() )
490         continue;
491 
492     return_ACPI_STATUS(AE_OK);
493 }
494