1 /* Generic MTRR (Memory Type Range Register) driver.
2
3 Copyright (C) 1997-2000 Richard Gooch
4 Copyright (c) 2002 Patrick Mochel
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with this library; If not, see <http://www.gnu.org/licenses/>.
18
19 Richard Gooch may be reached by email at rgooch@atnf.csiro.au
20 The postal address is:
21 Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
22
23 Source: "Pentium Pro Family Developer's Manual, Volume 3:
24 Operating System Writer's Guide" (Intel document number 242692),
25 section 11.11.7
26
27 This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
28 on 6-7 March 2002.
29 Source: Intel Architecture Software Developers Manual, Volume 3:
30 System Programming Guide; Section 9.11. (1997 edition - PPro).
31 */
32
33 #include <xen/init.h>
34 #include <xen/lib.h>
35 #include <xen/smp.h>
36 #include <xen/spinlock.h>
37 #include <asm/atomic.h>
38 #include <asm/mtrr.h>
39 #include <asm/uaccess.h>
40 #include <asm/processor.h>
41 #include <asm/msr.h>
42 #include "mtrr.h"
43
44 /* No blocking mutexes in Xen. Spin instead. */
45 #define DEFINE_MUTEX(_m) DEFINE_SPINLOCK(_m)
46 #define mutex_lock(_m) spin_lock(_m)
47 #define mutex_unlock(_m) spin_unlock(_m)
48 #define dump_stack() ((void)0)
49 #define get_cpu() smp_processor_id()
50 #define put_cpu() do {} while(0)
51
52 u32 __read_mostly num_var_ranges = 0;
53
54 unsigned int *__read_mostly usage_table;
55 static DEFINE_MUTEX(mtrr_mutex);
56
57 u64 __read_mostly size_or_mask;
58 u64 __read_mostly size_and_mask;
59
60 const struct mtrr_ops *__read_mostly mtrr_if = NULL;
61
62 static void set_mtrr(unsigned int reg, unsigned long base,
63 unsigned long size, mtrr_type type);
64
65 static const char *const mtrr_strings[MTRR_NUM_TYPES] =
66 {
67 "uncachable", /* 0 */
68 "write-combining", /* 1 */
69 "?", /* 2 */
70 "?", /* 3 */
71 "write-through", /* 4 */
72 "write-protect", /* 5 */
73 "write-back", /* 6 */
74 };
75
mtrr_attrib_to_str(int x)76 static const char *mtrr_attrib_to_str(int x)
77 {
78 return (x <= 6) ? mtrr_strings[x] : "?";
79 }
80
81 /* Returns non-zero if we have the write-combining memory type */
have_wrcomb(void)82 static int have_wrcomb(void)
83 {
84 return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0);
85 }
86
87 /* This function returns the number of variable MTRRs */
set_num_var_ranges(void)88 static void __init set_num_var_ranges(void)
89 {
90 unsigned long config = 0;
91
92 if (use_intel()) {
93 rdmsrl(MSR_MTRRcap, config);
94 } else if (is_cpu(AMD))
95 config = 2;
96 else if (is_cpu(CENTAUR))
97 config = 8;
98 num_var_ranges = MASK_EXTR(config, MTRRcap_VCNT);
99 }
100
init_table(void)101 static void __init init_table(void)
102 {
103 int i, max;
104
105 max = num_var_ranges;
106 if ((usage_table = xmalloc_array(unsigned int, max)) == NULL) {
107 printk(KERN_ERR "mtrr: could not allocate\n");
108 return;
109 }
110 for (i = 0; i < max; i++)
111 usage_table[i] = 1;
112 }
113
114 struct set_mtrr_data {
115 atomic_t count;
116 atomic_t gate;
117 unsigned long smp_base;
118 unsigned long smp_size;
119 unsigned int smp_reg;
120 mtrr_type smp_type;
121 };
122
123 /* As per the IA32 SDM vol-3: 10.11.8 MTRR Considerations in MP Systems section
124 * MTRRs updates must to be synchronized across all the processors.
125 * This flags avoids multiple cpu synchronization while booting each cpu.
126 * At the boot & resume time, this flag is turned on in mtrr_aps_sync_begin().
127 * Using this flag the mtrr initialization (and the all cpus sync up) in the
128 * mtrr_ap_init() is avoided while booting each cpu.
129 * After all the cpus have came up, then mtrr_aps_sync_end() synchronizes all
130 * the cpus and updates mtrrs on all of them. Then this flag is turned off.
131 */
132 int hold_mtrr_updates_on_aps;
133
ipi_handler(void * info)134 static void ipi_handler(void *info)
135 /* [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
136 [RETURNS] Nothing.
137 */
138 {
139 struct set_mtrr_data *data = info;
140 unsigned long flags;
141
142 local_irq_save(flags);
143
144 atomic_dec(&data->count);
145 while(!atomic_read(&data->gate))
146 cpu_relax();
147
148 /* The master has cleared me to execute */
149 if (data->smp_reg == ~0U) /* update all mtrr registers */
150 /* At the cpu hot-add time this will reinitialize mtrr
151 * registres on the existing cpus. It is ok. */
152 mtrr_if->set_all();
153 else /* single mtrr register update */
154 mtrr_if->set(data->smp_reg, data->smp_base,
155 data->smp_size, data->smp_type);
156
157 atomic_dec(&data->count);
158 while(atomic_read(&data->gate))
159 cpu_relax();
160
161 atomic_dec(&data->count);
162 local_irq_restore(flags);
163 }
164
types_compatible(mtrr_type type1,mtrr_type type2)165 static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
166 return type1 == MTRR_TYPE_UNCACHABLE ||
167 type2 == MTRR_TYPE_UNCACHABLE ||
168 (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
169 (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
170 }
171
172 /**
173 * set_mtrr - update mtrrs on all processors
174 * @reg: mtrr in question
175 * @base: mtrr base
176 * @size: mtrr size
177 * @type: mtrr type
178 *
179 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
180 *
181 * 1. Send IPI to do the following:
182 * 2. Disable Interrupts
183 * 3. Wait for all procs to do so
184 * 4. Enter no-fill cache mode
185 * 5. Flush caches
186 * 6. Clear PGE bit
187 * 7. Flush all TLBs
188 * 8. Disable all range registers
189 * 9. Update the MTRRs
190 * 10. Enable all range registers
191 * 11. Flush all TLBs and caches again
192 * 12. Enter normal cache mode and reenable caching
193 * 13. Set PGE
194 * 14. Wait for buddies to catch up
195 * 15. Enable interrupts.
196 *
197 * What does that mean for us? Well, first we set data.count to the number
198 * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait
199 * until it hits 0 and proceed. We set the data.gate flag and reset data.count.
200 * Meanwhile, they are waiting for that flag to be set. Once it's set, each
201 * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it
202 * differently, so we call mtrr_if->set() callback and let them take care of it.
203 * When they're done, they again decrement data->count and wait for data.gate to
204 * be reset.
205 * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag.
206 * Everyone then enables interrupts and we all continue on.
207 *
208 * Note that the mechanism is the same for UP systems, too; all the SMP stuff
209 * becomes nops.
210 */
set_mtrr(unsigned int reg,unsigned long base,unsigned long size,mtrr_type type)211 static void set_mtrr(unsigned int reg, unsigned long base,
212 unsigned long size, mtrr_type type)
213 {
214 cpumask_t allbutself;
215 unsigned int nr_cpus;
216 struct set_mtrr_data data;
217 unsigned long flags;
218
219 cpumask_andnot(&allbutself, &cpu_online_map,
220 cpumask_of(smp_processor_id()));
221 nr_cpus = cpumask_weight(&allbutself);
222
223 data.smp_reg = reg;
224 data.smp_base = base;
225 data.smp_size = size;
226 data.smp_type = type;
227 atomic_set(&data.count, nr_cpus);
228 atomic_set(&data.gate,0);
229
230 /* Start the ball rolling on other CPUs */
231 on_selected_cpus(&allbutself, ipi_handler, &data, 0);
232
233 local_irq_save(flags);
234
235 while (atomic_read(&data.count))
236 cpu_relax();
237
238 /* ok, reset count and toggle gate */
239 atomic_set(&data.count, nr_cpus);
240 smp_wmb();
241 atomic_set(&data.gate,1);
242
243 /* do our MTRR business */
244
245 /* HACK!
246 * We use this same function to initialize the mtrrs on boot.
247 * The state of the boot cpu's mtrrs has been saved, and we want
248 * to replicate across all the APs.
249 * If we're doing that @reg is set to something special...
250 */
251 if (reg == ~0U) /* update all mtrr registers */
252 /* at boot or resume time, this will reinitialize the mtrrs on
253 * the bp. It is ok. */
254 mtrr_if->set_all();
255 else /* update the single mtrr register */
256 mtrr_if->set(reg,base,size,type);
257
258 /* wait for the others */
259 while (atomic_read(&data.count))
260 cpu_relax();
261
262 atomic_set(&data.count, nr_cpus);
263 smp_wmb();
264 atomic_set(&data.gate,0);
265
266 /*
267 * Wait here for everyone to have seen the gate change
268 * So we're the last ones to touch 'data'
269 */
270 while (atomic_read(&data.count))
271 cpu_relax();
272
273 local_irq_restore(flags);
274 }
275
276 /**
277 * mtrr_add_page - Add a memory type region
278 * @base: Physical base address of region in pages (in units of 4 kB!)
279 * @size: Physical size of region in pages (4 kB)
280 * @type: Type of MTRR desired
281 * @increment: If this is true do usage counting on the region
282 *
283 * Memory type region registers control the caching on newer Intel and
284 * non Intel processors. This function allows drivers to request an
285 * MTRR is added. The details and hardware specifics of each processor's
286 * implementation are hidden from the caller, but nevertheless the
287 * caller should expect to need to provide a power of two size on an
288 * equivalent power of two boundary.
289 *
290 * If the region cannot be added either because all regions are in use
291 * or the CPU cannot support it a negative value is returned. On success
292 * the register number for this entry is returned, but should be treated
293 * as a cookie only.
294 *
295 * On a multiprocessor machine the changes are made to all processors.
296 * This is required on x86 by the Intel processors.
297 *
298 * The available types are
299 *
300 * %MTRR_TYPE_UNCACHABLE - No caching
301 *
302 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
303 *
304 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
305 *
306 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
307 *
308 * BUGS: Needs a quiet flag for the cases where drivers do not mind
309 * failures and do not wish system log messages to be sent.
310 */
311
mtrr_add_page(unsigned long base,unsigned long size,unsigned int type,char increment)312 int mtrr_add_page(unsigned long base, unsigned long size,
313 unsigned int type, char increment)
314 {
315 int i, replace, error;
316 mtrr_type ltype;
317 unsigned long lbase, lsize;
318
319 if (!mtrr_if)
320 return -ENXIO;
321
322 if ((error = mtrr_if->validate_add_page(base,size,type)))
323 return error;
324
325 if (type >= MTRR_NUM_TYPES) {
326 printk(KERN_WARNING "mtrr: type: %u invalid\n", type);
327 return -EINVAL;
328 }
329
330 /* If the type is WC, check that this processor supports it */
331 if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
332 printk(KERN_WARNING
333 "mtrr: your processor doesn't support write-combining\n");
334 return -EOPNOTSUPP;
335 }
336
337 if (!size) {
338 printk(KERN_WARNING "mtrr: zero sized request\n");
339 return -EINVAL;
340 }
341
342 if ((base | (base + size - 1)) >> (paddr_bits - PAGE_SHIFT)) {
343 printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n");
344 return -EINVAL;
345 }
346
347 error = -EINVAL;
348 replace = -1;
349
350 /* Search for existing MTRR */
351 mutex_lock(&mtrr_mutex);
352 for (i = 0; i < num_var_ranges; ++i) {
353 mtrr_if->get(i, &lbase, &lsize, <ype);
354 if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase)
355 continue;
356 /* At this point we know there is some kind of overlap/enclosure */
357 if (base < lbase || base + size - 1 > lbase + lsize - 1) {
358 if (base <= lbase && base + size - 1 >= lbase + lsize - 1) {
359 /* New region encloses an existing region */
360 if (type == ltype) {
361 replace = replace == -1 ? i : -2;
362 continue;
363 }
364 else if (types_compatible(type, ltype))
365 continue;
366 }
367 printk(KERN_WARNING
368 "mtrr: %#lx000,%#lx000 overlaps existing"
369 " %#lx000,%#lx000\n", base, size, lbase,
370 lsize);
371 goto out;
372 }
373 /* New region is enclosed by an existing region */
374 if (ltype != type) {
375 if (types_compatible(type, ltype))
376 continue;
377 printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
378 base, size, mtrr_attrib_to_str(ltype),
379 mtrr_attrib_to_str(type));
380 goto out;
381 }
382 if (increment)
383 ++usage_table[i];
384 error = i;
385 goto out;
386 }
387 /* Search for an empty MTRR */
388 i = mtrr_if->get_free_region(base, size, replace);
389 if (i >= 0) {
390 set_mtrr(i, base, size, type);
391 if (likely(replace < 0))
392 usage_table[i] = 1;
393 else {
394 usage_table[i] = usage_table[replace] + !!increment;
395 if (unlikely(replace != i)) {
396 set_mtrr(replace, 0, 0, 0);
397 usage_table[replace] = 0;
398 }
399 }
400 } else
401 printk(KERN_INFO "mtrr: no more MTRRs available\n");
402 error = i;
403 out:
404 mutex_unlock(&mtrr_mutex);
405 return error;
406 }
407
mtrr_check(unsigned long base,unsigned long size)408 static int mtrr_check(unsigned long base, unsigned long size)
409 {
410 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
411 printk(KERN_WARNING
412 "mtrr: size and base must be multiples of 4 kiB\n");
413 printk(KERN_DEBUG
414 "mtrr: size: %#lx base: %#lx\n", size, base);
415 dump_stack();
416 return -1;
417 }
418 return 0;
419 }
420
421 /**
422 * mtrr_add - Add a memory type region
423 * @base: Physical base address of region
424 * @size: Physical size of region
425 * @type: Type of MTRR desired
426 * @increment: If this is true do usage counting on the region
427 *
428 * Memory type region registers control the caching on newer Intel and
429 * non Intel processors. This function allows drivers to request an
430 * MTRR is added. The details and hardware specifics of each processor's
431 * implementation are hidden from the caller, but nevertheless the
432 * caller should expect to need to provide a power of two size on an
433 * equivalent power of two boundary.
434 *
435 * If the region cannot be added either because all regions are in use
436 * or the CPU cannot support it a negative value is returned. On success
437 * the register number for this entry is returned, but should be treated
438 * as a cookie only.
439 *
440 * On a multiprocessor machine the changes are made to all processors.
441 * This is required on x86 by the Intel processors.
442 *
443 * The available types are
444 *
445 * %MTRR_TYPE_UNCACHABLE - No caching
446 *
447 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
448 *
449 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
450 *
451 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
452 *
453 * BUGS: Needs a quiet flag for the cases where drivers do not mind
454 * failures and do not wish system log messages to be sent.
455 */
456
457 int __init
mtrr_add(unsigned long base,unsigned long size,unsigned int type,char increment)458 mtrr_add(unsigned long base, unsigned long size, unsigned int type,
459 char increment)
460 {
461 if (mtrr_check(base, size))
462 return -EINVAL;
463 return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
464 increment);
465 }
466
467 /**
468 * mtrr_del_page - delete a memory type region
469 * @reg: Register returned by mtrr_add
470 * @base: Physical base address
471 * @size: Size of region
472 *
473 * If register is supplied then base and size are ignored. This is
474 * how drivers should call it.
475 *
476 * Releases an MTRR region. If the usage count drops to zero the
477 * register is freed and the region returns to default state.
478 * On success the register is returned, on failure a negative error
479 * code.
480 */
481
mtrr_del_page(int reg,unsigned long base,unsigned long size)482 int mtrr_del_page(int reg, unsigned long base, unsigned long size)
483 {
484 int i, max;
485 mtrr_type ltype;
486 unsigned long lbase, lsize;
487 int error = -EINVAL;
488
489 if (!mtrr_if)
490 return -ENXIO;
491
492 max = num_var_ranges;
493 mutex_lock(&mtrr_mutex);
494 if (reg < 0) {
495 /* Search for existing MTRR */
496 for (i = 0; i < max; ++i) {
497 mtrr_if->get(i, &lbase, &lsize, <ype);
498 if (lbase == base && lsize == size) {
499 reg = i;
500 break;
501 }
502 }
503 if (reg < 0) {
504 printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base,
505 size);
506 goto out;
507 }
508 }
509 if (reg >= max) {
510 printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
511 goto out;
512 }
513 mtrr_if->get(reg, &lbase, &lsize, <ype);
514 if (lsize < 1) {
515 printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
516 goto out;
517 }
518 if (usage_table[reg] < 1) {
519 printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
520 goto out;
521 }
522 if (--usage_table[reg] < 1)
523 set_mtrr(reg, 0, 0, 0);
524 error = reg;
525 out:
526 mutex_unlock(&mtrr_mutex);
527 return error;
528 }
529 /**
530 * mtrr_del - delete a memory type region
531 * @reg: Register returned by mtrr_add
532 * @base: Physical base address
533 * @size: Size of region
534 *
535 * If register is supplied then base and size are ignored. This is
536 * how drivers should call it.
537 *
538 * Releases an MTRR region. If the usage count drops to zero the
539 * register is freed and the region returns to default state.
540 * On success the register is returned, on failure a negative error
541 * code.
542 */
543
544 int __init
mtrr_del(int reg,unsigned long base,unsigned long size)545 mtrr_del(int reg, unsigned long base, unsigned long size)
546 {
547 if (mtrr_check(base, size))
548 return -EINVAL;
549 return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
550 }
551
552 /* The suspend/resume methods are only for CPU without MTRR. CPU using generic
553 * MTRR driver doesn't require this
554 */
555 struct mtrr_value {
556 mtrr_type ltype;
557 unsigned long lbase;
558 unsigned long lsize;
559 };
560
561 /**
562 * mtrr_bp_init - initialize mtrrs on the boot CPU
563 *
564 * This needs to be called early; before any of the other CPUs are
565 * initialized (i.e. before smp_init()).
566 *
567 */
mtrr_bp_init(void)568 void __init mtrr_bp_init(void)
569 {
570 if (cpu_has_mtrr) {
571 mtrr_if = &generic_mtrr_ops;
572 size_or_mask = ~((1ULL << (paddr_bits - PAGE_SHIFT)) - 1);
573 size_and_mask = ~size_or_mask & 0xfffff00000ULL;
574 }
575
576 if (mtrr_if) {
577 set_num_var_ranges();
578 init_table();
579 if (use_intel())
580 get_mtrr_state();
581 }
582 }
583
mtrr_ap_init(void)584 void mtrr_ap_init(void)
585 {
586 if (!mtrr_if || !use_intel() || hold_mtrr_updates_on_aps)
587 return;
588 /*
589 * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed,
590 * but this routine will be called in cpu boot time, holding the lock
591 * breaks it. This routine is called in two cases: 1.very earily time
592 * of software resume, when there absolutely isn't mtrr entry changes;
593 * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to
594 * prevent mtrr entry changes
595 */
596 set_mtrr(~0U, 0, 0, 0);
597 }
598
599 /**
600 * Save current fixed-range MTRR state of the BSP
601 */
mtrr_save_state(void)602 void mtrr_save_state(void)
603 {
604 int cpu = get_cpu();
605
606 if (cpu == 0)
607 mtrr_save_fixed_ranges(NULL);
608 else
609 on_selected_cpus(cpumask_of(0), mtrr_save_fixed_ranges, NULL, 1);
610 put_cpu();
611 }
612
mtrr_aps_sync_begin(void)613 void mtrr_aps_sync_begin(void)
614 {
615 if (!use_intel())
616 return;
617 hold_mtrr_updates_on_aps = 1;
618 }
619
mtrr_aps_sync_end(void)620 void mtrr_aps_sync_end(void)
621 {
622 if (!use_intel())
623 return;
624 set_mtrr(~0U, 0, 0, 0);
625 hold_mtrr_updates_on_aps = 0;
626 }
627
mtrr_bp_restore(void)628 void mtrr_bp_restore(void)
629 {
630 if (!use_intel())
631 return;
632 mtrr_if->set_all();
633 }
634
mtrr_init_finialize(void)635 static int __init mtrr_init_finialize(void)
636 {
637 if (!mtrr_if)
638 return 0;
639 if (use_intel())
640 mtrr_state_warn();
641 return 0;
642 }
643 __initcall(mtrr_init_finialize);
644