1// SPDX-License-Identifier: LGPL-2.0+
2/* Generic MTRR (Memory Type Range Register) driver.
3
4 Copyright (C) 1997-2000 Richard Gooch
5 Copyright (c) 2002 Patrick Mochel
6
7 Richard Gooch may be reached by email at rgooch@atnf.csiro.au
8 The postal address is:
9 Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
10
11 Source: "Pentium Pro Family Developer's Manual, Volume 3:
12 Operating System Writer's Guide" (Intel document number 242692),
13 section 11.11.7
14
15 This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
16 on 6-7 March 2002.
17 Source: Intel Architecture Software Developers Manual, Volume 3:
18 System Programming Guide; Section 9.11. (1997 edition - PPro).
19*/
20
21#include <linux/types.h> /* FIXME: kvm_para.h needs this */
22
23#include <linux/stop_machine.h>
24#include <linux/kvm_para.h>
25#include <linux/uaccess.h>
26#include <linux/export.h>
27#include <linux/mutex.h>
28#include <linux/init.h>
29#include <linux/sort.h>
30#include <linux/cpu.h>
31#include <linux/pci.h>
32#include <linux/smp.h>
33#include <linux/syscore_ops.h>
34#include <linux/rcupdate.h>
35
36#include <asm/cacheinfo.h>
37#include <asm/cpufeature.h>
38#include <asm/e820/api.h>
39#include <asm/mtrr.h>
40#include <asm/msr.h>
41#include <asm/memtype.h>
42
43#include "mtrr.h"
44
45static_assert(X86_MEMTYPE_UC == MTRR_TYPE_UNCACHABLE);
46static_assert(X86_MEMTYPE_WC == MTRR_TYPE_WRCOMB);
47static_assert(X86_MEMTYPE_WT == MTRR_TYPE_WRTHROUGH);
48static_assert(X86_MEMTYPE_WP == MTRR_TYPE_WRPROT);
49static_assert(X86_MEMTYPE_WB == MTRR_TYPE_WRBACK);
50
51/* arch_phys_wc_add returns an MTRR register index plus this offset. */
52#define MTRR_TO_PHYS_WC_OFFSET 1000
53
54u32 num_var_ranges;
55
56unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
57DEFINE_MUTEX(mtrr_mutex);
58
59const struct mtrr_ops *mtrr_if;
60
61/* Returns non-zero if we have the write-combining memory type */
62static int have_wrcomb(void)
63{
64 struct pci_dev *dev;
65
66 dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
67 if (dev != NULL) {
68 /*
69 * ServerWorks LE chipsets < rev 6 have problems with
70 * write-combining. Don't allow it and leave room for other
71 * chipsets to be tagged
72 */
73 if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
74 dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
75 dev->revision <= 5) {
76 pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n");
77 pci_dev_put(dev);
78 return 0;
79 }
80 /*
81 * Intel 450NX errata # 23. Non ascending cacheline evictions to
82 * write combining memory may resulting in data corruption
83 */
84 if (dev->vendor == PCI_VENDOR_ID_INTEL &&
85 dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
86 pr_info("Intel 450NX MMC detected. Write-combining disabled.\n");
87 pci_dev_put(dev);
88 return 0;
89 }
90 pci_dev_put(dev);
91 }
92 return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
93}
94
95static void __init init_table(void)
96{
97 int i, max;
98
99 max = num_var_ranges;
100 for (i = 0; i < max; i++)
101 mtrr_usage_table[i] = 1;
102}
103
104struct set_mtrr_data {
105 unsigned long smp_base;
106 unsigned long smp_size;
107 unsigned int smp_reg;
108 mtrr_type smp_type;
109};
110
111/**
112 * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed
113 * by all the CPUs.
114 * @info: pointer to mtrr configuration data
115 *
116 * Returns nothing.
117 */
118static int mtrr_rendezvous_handler(void *info)
119{
120 struct set_mtrr_data *data = info;
121
122 mtrr_if->set(data->smp_reg, data->smp_base,
123 data->smp_size, data->smp_type);
124 return 0;
125}
126
127static inline int types_compatible(mtrr_type type1, mtrr_type type2)
128{
129 return type1 == MTRR_TYPE_UNCACHABLE ||
130 type2 == MTRR_TYPE_UNCACHABLE ||
131 (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
132 (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
133}
134
135/**
136 * set_mtrr - update mtrrs on all processors
137 * @reg: mtrr in question
138 * @base: mtrr base
139 * @size: mtrr size
140 * @type: mtrr type
141 *
142 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
143 *
144 * 1. Queue work to do the following on all processors:
145 * 2. Disable Interrupts
146 * 3. Wait for all procs to do so
147 * 4. Enter no-fill cache mode
148 * 5. Flush caches
149 * 6. Clear PGE bit
150 * 7. Flush all TLBs
151 * 8. Disable all range registers
152 * 9. Update the MTRRs
153 * 10. Enable all range registers
154 * 11. Flush all TLBs and caches again
155 * 12. Enter normal cache mode and reenable caching
156 * 13. Set PGE
157 * 14. Wait for buddies to catch up
158 * 15. Enable interrupts.
159 *
160 * What does that mean for us? Well, stop_machine() will ensure that
161 * the rendezvous handler is started on each CPU. And in lockstep they
162 * do the state transition of disabling interrupts, updating MTRR's
163 * (the CPU vendors may each do it differently, so we call mtrr_if->set()
164 * callback and let them take care of it.) and enabling interrupts.
165 *
166 * Note that the mechanism is the same for UP systems, too; all the SMP stuff
167 * becomes nops.
168 */
169static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size,
170 mtrr_type type)
171{
172 struct set_mtrr_data data = { .smp_reg = reg,
173 .smp_base = base,
174 .smp_size = size,
175 .smp_type = type
176 };
177
178 stop_machine_cpuslocked(fn: mtrr_rendezvous_handler, data: &data, cpu_online_mask);
179
180 generic_rebuild_map();
181}
182
183/**
184 * mtrr_add_page - Add a memory type region
185 * @base: Physical base address of region in pages (in units of 4 kB!)
186 * @size: Physical size of region in pages (4 kB)
187 * @type: Type of MTRR desired
188 * @increment: If this is true do usage counting on the region
189 *
190 * Memory type region registers control the caching on newer Intel and
191 * non Intel processors. This function allows drivers to request an
192 * MTRR is added. The details and hardware specifics of each processor's
193 * implementation are hidden from the caller, but nevertheless the
194 * caller should expect to need to provide a power of two size on an
195 * equivalent power of two boundary.
196 *
197 * If the region cannot be added either because all regions are in use
198 * or the CPU cannot support it a negative value is returned. On success
199 * the register number for this entry is returned, but should be treated
200 * as a cookie only.
201 *
202 * On a multiprocessor machine the changes are made to all processors.
203 * This is required on x86 by the Intel processors.
204 *
205 * The available types are
206 *
207 * %MTRR_TYPE_UNCACHABLE - No caching
208 *
209 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
210 *
211 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
212 *
213 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
214 *
215 * BUGS: Needs a quiet flag for the cases where drivers do not mind
216 * failures and do not wish system log messages to be sent.
217 */
218int mtrr_add_page(unsigned long base, unsigned long size,
219 unsigned int type, bool increment)
220{
221 unsigned long lbase, lsize;
222 int i, replace, error;
223 mtrr_type ltype;
224
225 if (!mtrr_enabled())
226 return -ENXIO;
227
228 error = mtrr_if->validate_add_page(base, size, type);
229 if (error)
230 return error;
231
232 if (type >= MTRR_NUM_TYPES) {
233 pr_warn("type: %u invalid\n", type);
234 return -EINVAL;
235 }
236
237 /* If the type is WC, check that this processor supports it */
238 if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
239 pr_warn("your processor doesn't support write-combining\n");
240 return -ENOSYS;
241 }
242
243 if (!size) {
244 pr_warn("zero sized request\n");
245 return -EINVAL;
246 }
247
248 if ((base | (base + size - 1)) >>
249 (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
250 pr_warn("base or size exceeds the MTRR width\n");
251 return -EINVAL;
252 }
253
254 error = -EINVAL;
255 replace = -1;
256
257 /* No CPU hotplug when we change MTRR entries */
258 cpus_read_lock();
259
260 /* Search for existing MTRR */
261 mutex_lock(lock: &mtrr_mutex);
262 for (i = 0; i < num_var_ranges; ++i) {
263 mtrr_if->get(i, &lbase, &lsize, &ltype);
264 if (!lsize || base > lbase + lsize - 1 ||
265 base + size - 1 < lbase)
266 continue;
267 /*
268 * At this point we know there is some kind of
269 * overlap/enclosure
270 */
271 if (base < lbase || base + size - 1 > lbase + lsize - 1) {
272 if (base <= lbase &&
273 base + size - 1 >= lbase + lsize - 1) {
274 /* New region encloses an existing region */
275 if (type == ltype) {
276 replace = replace == -1 ? i : -2;
277 continue;
278 } else if (types_compatible(type1: type, type2: ltype))
279 continue;
280 }
281 pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase,
282 lsize);
283 goto out;
284 }
285 /* New region is enclosed by an existing region */
286 if (ltype != type) {
287 if (types_compatible(type1: type, type2: ltype))
288 continue;
289 pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n",
290 base, size, mtrr_attrib_to_str(ltype),
291 mtrr_attrib_to_str(type));
292 goto out;
293 }
294 if (increment)
295 ++mtrr_usage_table[i];
296 error = i;
297 goto out;
298 }
299 /* Search for an empty MTRR */
300 i = mtrr_if->get_free_region(base, size, replace);
301 if (i >= 0) {
302 set_mtrr(reg: i, base, size, type);
303 if (likely(replace < 0)) {
304 mtrr_usage_table[i] = 1;
305 } else {
306 mtrr_usage_table[i] = mtrr_usage_table[replace];
307 if (increment)
308 mtrr_usage_table[i]++;
309 if (unlikely(replace != i)) {
310 set_mtrr(reg: replace, base: 0, size: 0, type: 0);
311 mtrr_usage_table[replace] = 0;
312 }
313 }
314 } else {
315 pr_info("no more MTRRs available\n");
316 }
317 error = i;
318 out:
319 mutex_unlock(lock: &mtrr_mutex);
320 cpus_read_unlock();
321 return error;
322}
323
324static int mtrr_check(unsigned long base, unsigned long size)
325{
326 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
327 pr_warn("size and base must be multiples of 4 kiB\n");
328 Dprintk("size: 0x%lx base: 0x%lx\n", size, base);
329 dump_stack();
330 return -1;
331 }
332 return 0;
333}
334
335/**
336 * mtrr_add - Add a memory type region
337 * @base: Physical base address of region
338 * @size: Physical size of region
339 * @type: Type of MTRR desired
340 * @increment: If this is true do usage counting on the region
341 *
342 * Memory type region registers control the caching on newer Intel and
343 * non Intel processors. This function allows drivers to request an
344 * MTRR is added. The details and hardware specifics of each processor's
345 * implementation are hidden from the caller, but nevertheless the
346 * caller should expect to need to provide a power of two size on an
347 * equivalent power of two boundary.
348 *
349 * If the region cannot be added either because all regions are in use
350 * or the CPU cannot support it a negative value is returned. On success
351 * the register number for this entry is returned, but should be treated
352 * as a cookie only.
353 *
354 * On a multiprocessor machine the changes are made to all processors.
355 * This is required on x86 by the Intel processors.
356 *
357 * The available types are
358 *
359 * %MTRR_TYPE_UNCACHABLE - No caching
360 *
361 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
362 *
363 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
364 *
365 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
366 *
367 * BUGS: Needs a quiet flag for the cases where drivers do not mind
368 * failures and do not wish system log messages to be sent.
369 */
370int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
371 bool increment)
372{
373 if (!mtrr_enabled())
374 return -ENODEV;
375 if (mtrr_check(base, size))
376 return -EINVAL;
377 return mtrr_add_page(base: base >> PAGE_SHIFT, size: size >> PAGE_SHIFT, type,
378 increment);
379}
380
381/**
382 * mtrr_del_page - delete a memory type region
383 * @reg: Register returned by mtrr_add
384 * @base: Physical base address
385 * @size: Size of region
386 *
387 * If register is supplied then base and size are ignored. This is
388 * how drivers should call it.
389 *
390 * Releases an MTRR region. If the usage count drops to zero the
391 * register is freed and the region returns to default state.
392 * On success the register is returned, on failure a negative error
393 * code.
394 */
395int mtrr_del_page(int reg, unsigned long base, unsigned long size)
396{
397 int i, max;
398 mtrr_type ltype;
399 unsigned long lbase, lsize;
400 int error = -EINVAL;
401
402 if (!mtrr_enabled())
403 return -ENODEV;
404
405 max = num_var_ranges;
406 /* No CPU hotplug when we change MTRR entries */
407 cpus_read_lock();
408 mutex_lock(lock: &mtrr_mutex);
409 if (reg < 0) {
410 /* Search for existing MTRR */
411 for (i = 0; i < max; ++i) {
412 mtrr_if->get(i, &lbase, &lsize, &ltype);
413 if (lbase == base && lsize == size) {
414 reg = i;
415 break;
416 }
417 }
418 if (reg < 0) {
419 Dprintk("no MTRR for %lx000,%lx000 found\n", base, size);
420 goto out;
421 }
422 }
423 if (reg >= max) {
424 pr_warn("register: %d too big\n", reg);
425 goto out;
426 }
427 mtrr_if->get(reg, &lbase, &lsize, &ltype);
428 if (lsize < 1) {
429 pr_warn("MTRR %d not used\n", reg);
430 goto out;
431 }
432 if (mtrr_usage_table[reg] < 1) {
433 pr_warn("reg: %d has count=0\n", reg);
434 goto out;
435 }
436 if (--mtrr_usage_table[reg] < 1)
437 set_mtrr(reg, base: 0, size: 0, type: 0);
438 error = reg;
439 out:
440 mutex_unlock(lock: &mtrr_mutex);
441 cpus_read_unlock();
442 return error;
443}
444
445/**
446 * mtrr_del - delete a memory type region
447 * @reg: Register returned by mtrr_add
448 * @base: Physical base address
449 * @size: Size of region
450 *
451 * If register is supplied then base and size are ignored. This is
452 * how drivers should call it.
453 *
454 * Releases an MTRR region. If the usage count drops to zero the
455 * register is freed and the region returns to default state.
456 * On success the register is returned, on failure a negative error
457 * code.
458 */
459int mtrr_del(int reg, unsigned long base, unsigned long size)
460{
461 if (!mtrr_enabled())
462 return -ENODEV;
463 if (mtrr_check(base, size))
464 return -EINVAL;
465 return mtrr_del_page(reg, base: base >> PAGE_SHIFT, size: size >> PAGE_SHIFT);
466}
467
468/**
469 * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable
470 * @base: Physical base address
471 * @size: Size of region
472 *
473 * If PAT is available, this does nothing. If PAT is unavailable, it
474 * attempts to add a WC MTRR covering size bytes starting at base and
475 * logs an error if this fails.
476 *
477 * The called should provide a power of two size on an equivalent
478 * power of two boundary.
479 *
480 * Drivers must store the return value to pass to mtrr_del_wc_if_needed,
481 * but drivers should not try to interpret that return value.
482 */
483int arch_phys_wc_add(unsigned long base, unsigned long size)
484{
485 int ret;
486
487 if (pat_enabled() || !mtrr_enabled())
488 return 0; /* Success! (We don't need to do anything.) */
489
490 ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, increment: true);
491 if (ret < 0) {
492 pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.",
493 (void *)base, (void *)(base + size - 1));
494 return ret;
495 }
496 return ret + MTRR_TO_PHYS_WC_OFFSET;
497}
498EXPORT_SYMBOL(arch_phys_wc_add);
499
500/*
501 * arch_phys_wc_del - undoes arch_phys_wc_add
502 * @handle: Return value from arch_phys_wc_add
503 *
504 * This cleans up after mtrr_add_wc_if_needed.
505 *
506 * The API guarantees that mtrr_del_wc_if_needed(error code) and
507 * mtrr_del_wc_if_needed(0) do nothing.
508 */
509void arch_phys_wc_del(int handle)
510{
511 if (handle >= 1) {
512 WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET);
513 mtrr_del(reg: handle - MTRR_TO_PHYS_WC_OFFSET, base: 0, size: 0);
514 }
515}
516EXPORT_SYMBOL(arch_phys_wc_del);
517
518/*
519 * arch_phys_wc_index - translates arch_phys_wc_add's return value
520 * @handle: Return value from arch_phys_wc_add
521 *
522 * This will turn the return value from arch_phys_wc_add into an mtrr
523 * index suitable for debugging.
524 *
525 * Note: There is no legitimate use for this function, except possibly
526 * in printk line. Alas there is an illegitimate use in some ancient
527 * drm ioctls.
528 */
529int arch_phys_wc_index(int handle)
530{
531 if (handle < MTRR_TO_PHYS_WC_OFFSET)
532 return -1;
533 else
534 return handle - MTRR_TO_PHYS_WC_OFFSET;
535}
536EXPORT_SYMBOL_GPL(arch_phys_wc_index);
537
538int __initdata changed_by_mtrr_cleanup;
539
540/**
541 * mtrr_bp_init - initialize MTRRs on the boot CPU
542 *
543 * This needs to be called early; before any of the other CPUs are
544 * initialized (i.e. before smp_init()).
545 */
546void __init mtrr_bp_init(void)
547{
548 bool generic_mtrrs = cpu_feature_enabled(X86_FEATURE_MTRR);
549 const char *why = "(not available)";
550 unsigned long config, dummy;
551
552 phys_hi_rsvd = GENMASK(31, boot_cpu_data.x86_phys_bits - 32);
553
554 if (!generic_mtrrs && mtrr_state.enabled) {
555 /*
556 * Software overwrite of MTRR state, only for generic case.
557 * Note that X86_FEATURE_MTRR has been reset in this case.
558 */
559 init_table();
560 mtrr_build_map();
561 pr_info("MTRRs set to read-only\n");
562
563 return;
564 }
565
566 if (generic_mtrrs)
567 mtrr_if = &generic_mtrr_ops;
568 else
569 mtrr_set_if();
570
571 if (mtrr_enabled()) {
572 /* Get the number of variable MTRR ranges. */
573 if (mtrr_if == &generic_mtrr_ops)
574 rdmsr(MSR_MTRRcap, config, dummy);
575 else
576 config = mtrr_if->var_regs;
577 num_var_ranges = config & MTRR_CAP_VCNT;
578
579 init_table();
580 if (mtrr_if == &generic_mtrr_ops) {
581 /* BIOS may override */
582 if (get_mtrr_state()) {
583 memory_caching_control |= CACHE_MTRR;
584 changed_by_mtrr_cleanup = mtrr_cleanup();
585 mtrr_build_map();
586 } else {
587 mtrr_if = NULL;
588 why = "by BIOS";
589 }
590 }
591 }
592
593 if (!mtrr_enabled())
594 pr_info("MTRRs disabled %s\n", why);
595}
596
597/**
598 * mtrr_save_state - Save current fixed-range MTRR state of the first
599 * cpu in cpu_online_mask.
600 */
601void mtrr_save_state(void)
602{
603 int first_cpu;
604
605 if (!mtrr_enabled() || !mtrr_state.have_fixed)
606 return;
607
608 first_cpu = cpumask_first(cpu_online_mask);
609 smp_call_function_single(cpuid: first_cpu, func: mtrr_save_fixed_ranges, NULL, wait: 1);
610}
611
612static int __init mtrr_init_finalize(void)
613{
614 /*
615 * Map might exist if guest_force_mtrr_state() has been called or if
616 * mtrr_enabled() returns true.
617 */
618 mtrr_copy_map();
619
620 if (!mtrr_enabled())
621 return 0;
622
623 if (memory_caching_control & CACHE_MTRR) {
624 if (!changed_by_mtrr_cleanup)
625 mtrr_state_warn();
626 return 0;
627 }
628
629 mtrr_register_syscore();
630
631 return 0;
632}
633subsys_initcall(mtrr_init_finalize);
634