| 1 | // SPDX-License-Identifier: GPL-2.0-only | 
|---|
| 2 | /* | 
|---|
| 3 | * Page Attribute Table (PAT) support: handle memory caching attributes in page tables. | 
|---|
| 4 | * | 
|---|
| 5 | * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> | 
|---|
| 6 | *          Suresh B Siddha <suresh.b.siddha@intel.com> | 
|---|
| 7 | * | 
|---|
| 8 | * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen. | 
|---|
| 9 | * | 
|---|
| 10 | * Basic principles: | 
|---|
| 11 | * | 
|---|
| 12 | * PAT is a CPU feature supported by all modern x86 CPUs, to allow the firmware and | 
|---|
| 13 | * the kernel to set one of a handful of 'caching type' attributes for physical | 
|---|
| 14 | * memory ranges: uncached, write-combining, write-through, write-protected, | 
|---|
| 15 | * and the most commonly used and default attribute: write-back caching. | 
|---|
| 16 | * | 
|---|
| 17 | * PAT support supersedes and augments MTRR support in a compatible fashion: MTRR is | 
|---|
| 18 | * a hardware interface to enumerate a limited number of physical memory ranges | 
|---|
| 19 | * and set their caching attributes explicitly, programmed into the CPU via MSRs. | 
|---|
| 20 | * Even modern CPUs have MTRRs enabled - but these are typically not touched | 
|---|
| 21 | * by the kernel or by user-space (such as the X server), we rely on PAT for any | 
|---|
| 22 | * additional cache attribute logic. | 
|---|
| 23 | * | 
|---|
| 24 | * PAT doesn't work via explicit memory ranges, but uses page table entries to add | 
|---|
| 25 | * cache attribute information to the mapped memory range: there's 3 bits used, | 
|---|
| 26 | * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT), with the 8 possible values mapped by the | 
|---|
| 27 | * CPU to actual cache attributes via an MSR loaded into the CPU (MSR_IA32_CR_PAT). | 
|---|
| 28 | * | 
|---|
| 29 | * ( There's a metric ton of finer details, such as compatibility with CPU quirks | 
|---|
| 30 | *   that only support 4 types of PAT entries, and interaction with MTRRs, see | 
|---|
| 31 | *   below for details. ) | 
|---|
| 32 | */ | 
|---|
| 33 |  | 
|---|
| 34 | #include <linux/seq_file.h> | 
|---|
| 35 | #include <linux/memblock.h> | 
|---|
| 36 | #include <linux/debugfs.h> | 
|---|
| 37 | #include <linux/ioport.h> | 
|---|
| 38 | #include <linux/kernel.h> | 
|---|
| 39 | #include <linux/slab.h> | 
|---|
| 40 | #include <linux/io.h> | 
|---|
| 41 | #include <linux/mm.h> | 
|---|
| 42 | #include <linux/highmem.h> | 
|---|
| 43 | #include <linux/fs.h> | 
|---|
| 44 | #include <linux/rbtree.h> | 
|---|
| 45 |  | 
|---|
| 46 | #include <asm/cpu_device_id.h> | 
|---|
| 47 | #include <asm/cacheflush.h> | 
|---|
| 48 | #include <asm/cacheinfo.h> | 
|---|
| 49 | #include <asm/processor.h> | 
|---|
| 50 | #include <asm/tlbflush.h> | 
|---|
| 51 | #include <asm/x86_init.h> | 
|---|
| 52 | #include <asm/fcntl.h> | 
|---|
| 53 | #include <asm/e820/api.h> | 
|---|
| 54 | #include <asm/mtrr.h> | 
|---|
| 55 | #include <asm/page.h> | 
|---|
| 56 | #include <asm/msr.h> | 
|---|
| 57 | #include <asm/memtype.h> | 
|---|
| 58 | #include <asm/io.h> | 
|---|
| 59 |  | 
|---|
| 60 | #include "memtype.h" | 
|---|
| 61 | #include "../mm_internal.h" | 
|---|
| 62 |  | 
|---|
| 63 | #undef pr_fmt | 
|---|
| 64 | #define pr_fmt(fmt) "" fmt | 
|---|
| 65 |  | 
|---|
| 66 | static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT); | 
|---|
| 67 | static u64 __ro_after_init pat_msr_val; | 
|---|
| 68 |  | 
|---|
| 69 | /* | 
|---|
| 70 | * PAT support is enabled by default, but can be disabled for | 
|---|
| 71 | * various user-requested or hardware-forced reasons: | 
|---|
| 72 | */ | 
|---|
| 73 | static void __init pat_disable(const char *msg_reason) | 
|---|
| 74 | { | 
|---|
| 75 | if (pat_disabled) | 
|---|
| 76 | return; | 
|---|
| 77 |  | 
|---|
| 78 | pat_disabled = true; | 
|---|
| 79 | pr_info( "x86/PAT: %s\n", msg_reason); | 
|---|
| 80 |  | 
|---|
| 81 | memory_caching_control &= ~CACHE_PAT; | 
|---|
| 82 | } | 
|---|
| 83 |  | 
|---|
| 84 | static int __init nopat(char *str) | 
|---|
| 85 | { | 
|---|
| 86 | pat_disable(msg_reason: "PAT support disabled via boot option."); | 
|---|
| 87 | return 0; | 
|---|
| 88 | } | 
|---|
| 89 | early_param( "nopat", nopat); | 
|---|
| 90 |  | 
|---|
| 91 | bool pat_enabled(void) | 
|---|
| 92 | { | 
|---|
| 93 | return !pat_disabled; | 
|---|
| 94 | } | 
|---|
| 95 | EXPORT_SYMBOL_GPL(pat_enabled); | 
|---|
| 96 |  | 
|---|
| 97 | int pat_debug_enable; | 
|---|
| 98 |  | 
|---|
| 99 | static int __init pat_debug_setup(char *str) | 
|---|
| 100 | { | 
|---|
| 101 | pat_debug_enable = 1; | 
|---|
| 102 | return 1; | 
|---|
| 103 | } | 
|---|
| 104 | __setup( "debugpat", pat_debug_setup); | 
|---|
| 105 |  | 
|---|
| 106 | #ifdef CONFIG_X86_PAT | 
|---|
| 107 | /* | 
|---|
| 108 | * X86 PAT uses page flags arch_1 and arch_2 together to keep track of | 
|---|
| 109 | * memory type of pages that have backing page struct. | 
|---|
| 110 | * | 
|---|
| 111 | * X86 PAT supports 4 different memory types: | 
|---|
| 112 | *  - _PAGE_CACHE_MODE_WB | 
|---|
| 113 | *  - _PAGE_CACHE_MODE_WC | 
|---|
| 114 | *  - _PAGE_CACHE_MODE_UC_MINUS | 
|---|
| 115 | *  - _PAGE_CACHE_MODE_WT | 
|---|
| 116 | * | 
|---|
| 117 | * _PAGE_CACHE_MODE_WB is the default type. | 
|---|
| 118 | */ | 
|---|
| 119 |  | 
|---|
| 120 | #define _PGMT_WB		0 | 
|---|
| 121 | #define _PGMT_WC		(1UL << PG_arch_1) | 
|---|
| 122 | #define _PGMT_UC_MINUS		(1UL << PG_arch_2) | 
|---|
| 123 | #define _PGMT_WT		(1UL << PG_arch_2 | 1UL << PG_arch_1) | 
|---|
| 124 | #define _PGMT_MASK		(1UL << PG_arch_2 | 1UL << PG_arch_1) | 
|---|
| 125 | #define _PGMT_CLEAR_MASK	(~_PGMT_MASK) | 
|---|
| 126 |  | 
|---|
| 127 | static inline enum page_cache_mode get_page_memtype(struct page *pg) | 
|---|
| 128 | { | 
|---|
| 129 | unsigned long pg_flags = pg->flags.f & _PGMT_MASK; | 
|---|
| 130 |  | 
|---|
| 131 | if (pg_flags == _PGMT_WB) | 
|---|
| 132 | return _PAGE_CACHE_MODE_WB; | 
|---|
| 133 | else if (pg_flags == _PGMT_WC) | 
|---|
| 134 | return _PAGE_CACHE_MODE_WC; | 
|---|
| 135 | else if (pg_flags == _PGMT_UC_MINUS) | 
|---|
| 136 | return _PAGE_CACHE_MODE_UC_MINUS; | 
|---|
| 137 | else | 
|---|
| 138 | return _PAGE_CACHE_MODE_WT; | 
|---|
| 139 | } | 
|---|
| 140 |  | 
|---|
| 141 | static inline void set_page_memtype(struct page *pg, | 
|---|
| 142 | enum page_cache_mode memtype) | 
|---|
| 143 | { | 
|---|
| 144 | unsigned long memtype_flags; | 
|---|
| 145 | unsigned long old_flags; | 
|---|
| 146 | unsigned long new_flags; | 
|---|
| 147 |  | 
|---|
| 148 | switch (memtype) { | 
|---|
| 149 | case _PAGE_CACHE_MODE_WC: | 
|---|
| 150 | memtype_flags = _PGMT_WC; | 
|---|
| 151 | break; | 
|---|
| 152 | case _PAGE_CACHE_MODE_UC_MINUS: | 
|---|
| 153 | memtype_flags = _PGMT_UC_MINUS; | 
|---|
| 154 | break; | 
|---|
| 155 | case _PAGE_CACHE_MODE_WT: | 
|---|
| 156 | memtype_flags = _PGMT_WT; | 
|---|
| 157 | break; | 
|---|
| 158 | case _PAGE_CACHE_MODE_WB: | 
|---|
| 159 | default: | 
|---|
| 160 | memtype_flags = _PGMT_WB; | 
|---|
| 161 | break; | 
|---|
| 162 | } | 
|---|
| 163 |  | 
|---|
| 164 | old_flags = READ_ONCE(pg->flags.f); | 
|---|
| 165 | do { | 
|---|
| 166 | new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags; | 
|---|
| 167 | } while (!try_cmpxchg(&pg->flags.f, &old_flags, new_flags)); | 
|---|
| 168 | } | 
|---|
| 169 | #else | 
|---|
| 170 | static inline enum page_cache_mode get_page_memtype(struct page *pg) | 
|---|
| 171 | { | 
|---|
| 172 | return -1; | 
|---|
| 173 | } | 
|---|
| 174 | static inline void set_page_memtype(struct page *pg, | 
|---|
| 175 | enum page_cache_mode memtype) | 
|---|
| 176 | { | 
|---|
| 177 | } | 
|---|
| 178 | #endif | 
|---|
| 179 |  | 
|---|
| 180 | #define CM(c) (_PAGE_CACHE_MODE_ ## c) | 
|---|
| 181 |  | 
|---|
| 182 | static enum page_cache_mode __init pat_get_cache_mode(unsigned int pat_val, | 
|---|
| 183 | char *msg) | 
|---|
| 184 | { | 
|---|
| 185 | enum page_cache_mode cache; | 
|---|
| 186 | char *cache_mode; | 
|---|
| 187 |  | 
|---|
| 188 | switch (pat_val) { | 
|---|
| 189 | case X86_MEMTYPE_UC:       cache = CM(UC);       cache_mode = "UC  "; break; | 
|---|
| 190 | case X86_MEMTYPE_WC:       cache = CM(WC);       cache_mode = "WC  "; break; | 
|---|
| 191 | case X86_MEMTYPE_WT:       cache = CM(WT);       cache_mode = "WT  "; break; | 
|---|
| 192 | case X86_MEMTYPE_WP:       cache = CM(WP);       cache_mode = "WP  "; break; | 
|---|
| 193 | case X86_MEMTYPE_WB:       cache = CM(WB);       cache_mode = "WB  "; break; | 
|---|
| 194 | case X86_MEMTYPE_UC_MINUS: cache = CM(UC_MINUS); cache_mode = "UC- "; break; | 
|---|
| 195 | default:                   cache = CM(WB);       cache_mode = "WB  "; break; | 
|---|
| 196 | } | 
|---|
| 197 |  | 
|---|
| 198 | memcpy(to: msg, from: cache_mode, len: 4); | 
|---|
| 199 |  | 
|---|
| 200 | return cache; | 
|---|
| 201 | } | 
|---|
| 202 |  | 
|---|
| 203 | #undef CM | 
|---|
| 204 |  | 
|---|
| 205 | /* | 
|---|
| 206 | * Update the cache mode to pgprot translation tables according to PAT | 
|---|
| 207 | * configuration. | 
|---|
| 208 | * Using lower indices is preferred, so we start with highest index. | 
|---|
| 209 | */ | 
|---|
| 210 | static void __init init_cache_modes(u64 pat) | 
|---|
| 211 | { | 
|---|
| 212 | enum page_cache_mode cache; | 
|---|
| 213 | char pat_msg[33]; | 
|---|
| 214 | int i; | 
|---|
| 215 |  | 
|---|
| 216 | pat_msg[32] = 0; | 
|---|
| 217 | for (i = 7; i >= 0; i--) { | 
|---|
| 218 | cache = pat_get_cache_mode(pat_val: (pat >> (i * 8)) & 7, | 
|---|
| 219 | msg: pat_msg + 4 * i); | 
|---|
| 220 | update_cache_mode_entry(entry: i, cache); | 
|---|
| 221 | } | 
|---|
| 222 | pr_info( "x86/PAT: Configuration [0-7]: %s\n", pat_msg); | 
|---|
| 223 | } | 
|---|
| 224 |  | 
|---|
| 225 | void pat_cpu_init(void) | 
|---|
| 226 | { | 
|---|
| 227 | if (!boot_cpu_has(X86_FEATURE_PAT)) { | 
|---|
| 228 | /* | 
|---|
| 229 | * If this happens we are on a secondary CPU, but switched to | 
|---|
| 230 | * PAT on the boot CPU. We have no way to undo PAT. | 
|---|
| 231 | */ | 
|---|
| 232 | panic(fmt: "x86/PAT: PAT enabled, but not supported by secondary CPU\n"); | 
|---|
| 233 | } | 
|---|
| 234 |  | 
|---|
| 235 | wrmsrq(MSR_IA32_CR_PAT, val: pat_msr_val); | 
|---|
| 236 |  | 
|---|
| 237 | __flush_tlb_all(); | 
|---|
| 238 | } | 
|---|
| 239 |  | 
|---|
| 240 | /** | 
|---|
| 241 | * pat_bp_init - Initialize the PAT MSR value and PAT table | 
|---|
| 242 | * | 
|---|
| 243 | * This function initializes PAT MSR value and PAT table with an OS-defined | 
|---|
| 244 | * value to enable additional cache attributes, WC, WT and WP. | 
|---|
| 245 | * | 
|---|
| 246 | * This function prepares the calls of pat_cpu_init() via cache_cpu_init() | 
|---|
| 247 | * on all CPUs. | 
|---|
| 248 | */ | 
|---|
| 249 | void __init pat_bp_init(void) | 
|---|
| 250 | { | 
|---|
| 251 | struct cpuinfo_x86 *c = &boot_cpu_data; | 
|---|
| 252 |  | 
|---|
| 253 | if (!IS_ENABLED(CONFIG_X86_PAT)) | 
|---|
| 254 | pr_info_once( "x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n"); | 
|---|
| 255 |  | 
|---|
| 256 | if (!cpu_feature_enabled(X86_FEATURE_PAT)) | 
|---|
| 257 | pat_disable(msg_reason: "PAT not supported by the CPU."); | 
|---|
| 258 | else | 
|---|
| 259 | rdmsrq(MSR_IA32_CR_PAT, pat_msr_val); | 
|---|
| 260 |  | 
|---|
| 261 | if (!pat_msr_val) { | 
|---|
| 262 | pat_disable(msg_reason: "PAT support disabled by the firmware."); | 
|---|
| 263 |  | 
|---|
| 264 | /* | 
|---|
| 265 | * No PAT. Emulate the PAT table that corresponds to the two | 
|---|
| 266 | * cache bits, PWT (Write Through) and PCD (Cache Disable). | 
|---|
| 267 | * This setup is also the same as the BIOS default setup. | 
|---|
| 268 | * | 
|---|
| 269 | * PTE encoding: | 
|---|
| 270 | * | 
|---|
| 271 | *       PCD | 
|---|
| 272 | *       |PWT  PAT | 
|---|
| 273 | *       ||    slot | 
|---|
| 274 | *       00    0    WB : _PAGE_CACHE_MODE_WB | 
|---|
| 275 | *       01    1    WT : _PAGE_CACHE_MODE_WT | 
|---|
| 276 | *       10    2    UC-: _PAGE_CACHE_MODE_UC_MINUS | 
|---|
| 277 | *       11    3    UC : _PAGE_CACHE_MODE_UC | 
|---|
| 278 | * | 
|---|
| 279 | * NOTE: When WC or WP is used, it is redirected to UC- per | 
|---|
| 280 | * the default setup in __cachemode2pte_tbl[]. | 
|---|
| 281 | */ | 
|---|
| 282 | pat_msr_val = PAT_VALUE(WB, WT, UC_MINUS, UC, WB, WT, UC_MINUS, UC); | 
|---|
| 283 | } | 
|---|
| 284 |  | 
|---|
| 285 | /* | 
|---|
| 286 | * Xen PV doesn't allow to set PAT MSR, but all cache modes are | 
|---|
| 287 | * supported. | 
|---|
| 288 | */ | 
|---|
| 289 | if (pat_disabled || cpu_feature_enabled(X86_FEATURE_XENPV)) { | 
|---|
| 290 | init_cache_modes(pat: pat_msr_val); | 
|---|
| 291 | return; | 
|---|
| 292 | } | 
|---|
| 293 |  | 
|---|
| 294 | if ((c->x86_vfm >= INTEL_PENTIUM_PRO   && c->x86_vfm <= INTEL_PENTIUM_M_DOTHAN) || | 
|---|
| 295 | (c->x86_vfm >= INTEL_P4_WILLAMETTE && c->x86_vfm <= INTEL_P4_CEDARMILL)) { | 
|---|
| 296 | /* | 
|---|
| 297 | * PAT support with the lower four entries. Intel Pentium 2, | 
|---|
| 298 | * 3, M, and 4 are affected by PAT errata, which makes the | 
|---|
| 299 | * upper four entries unusable. To be on the safe side, we don't | 
|---|
| 300 | * use those. | 
|---|
| 301 | * | 
|---|
| 302 | *  PTE encoding: | 
|---|
| 303 | *      PAT | 
|---|
| 304 | *      |PCD | 
|---|
| 305 | *      ||PWT  PAT | 
|---|
| 306 | *      |||    slot | 
|---|
| 307 | *      000    0    WB : _PAGE_CACHE_MODE_WB | 
|---|
| 308 | *      001    1    WC : _PAGE_CACHE_MODE_WC | 
|---|
| 309 | *      010    2    UC-: _PAGE_CACHE_MODE_UC_MINUS | 
|---|
| 310 | *      011    3    UC : _PAGE_CACHE_MODE_UC | 
|---|
| 311 | * PAT bit unused | 
|---|
| 312 | * | 
|---|
| 313 | * NOTE: When WT or WP is used, it is redirected to UC- per | 
|---|
| 314 | * the default setup in __cachemode2pte_tbl[]. | 
|---|
| 315 | */ | 
|---|
| 316 | pat_msr_val = PAT_VALUE(WB, WC, UC_MINUS, UC, WB, WC, UC_MINUS, UC); | 
|---|
| 317 | } else { | 
|---|
| 318 | /* | 
|---|
| 319 | * Full PAT support.  We put WT in slot 7 to improve | 
|---|
| 320 | * robustness in the presence of errata that might cause | 
|---|
| 321 | * the high PAT bit to be ignored.  This way, a buggy slot 7 | 
|---|
| 322 | * access will hit slot 3, and slot 3 is UC, so at worst | 
|---|
| 323 | * we lose performance without causing a correctness issue. | 
|---|
| 324 | * Pentium 4 erratum N46 is an example for such an erratum, | 
|---|
| 325 | * although we try not to use PAT at all on affected CPUs. | 
|---|
| 326 | * | 
|---|
| 327 | *  PTE encoding: | 
|---|
| 328 | *      PAT | 
|---|
| 329 | *      |PCD | 
|---|
| 330 | *      ||PWT  PAT | 
|---|
| 331 | *      |||    slot | 
|---|
| 332 | *      000    0    WB : _PAGE_CACHE_MODE_WB | 
|---|
| 333 | *      001    1    WC : _PAGE_CACHE_MODE_WC | 
|---|
| 334 | *      010    2    UC-: _PAGE_CACHE_MODE_UC_MINUS | 
|---|
| 335 | *      011    3    UC : _PAGE_CACHE_MODE_UC | 
|---|
| 336 | *      100    4    WB : Reserved | 
|---|
| 337 | *      101    5    WP : _PAGE_CACHE_MODE_WP | 
|---|
| 338 | *      110    6    UC-: Reserved | 
|---|
| 339 | *      111    7    WT : _PAGE_CACHE_MODE_WT | 
|---|
| 340 | * | 
|---|
| 341 | * The reserved slots are unused, but mapped to their | 
|---|
| 342 | * corresponding types in the presence of PAT errata. | 
|---|
| 343 | */ | 
|---|
| 344 | pat_msr_val = PAT_VALUE(WB, WC, UC_MINUS, UC, WB, WP, UC_MINUS, WT); | 
|---|
| 345 | } | 
|---|
| 346 |  | 
|---|
| 347 | memory_caching_control |= CACHE_PAT; | 
|---|
| 348 |  | 
|---|
| 349 | init_cache_modes(pat: pat_msr_val); | 
|---|
| 350 | } | 
|---|
| 351 |  | 
|---|
| 352 | static DEFINE_SPINLOCK(memtype_lock);	/* protects memtype accesses */ | 
|---|
| 353 |  | 
|---|
| 354 | /* | 
|---|
| 355 | * Does intersection of PAT memory type and MTRR memory type and returns | 
|---|
| 356 | * the resulting memory type as PAT understands it. | 
|---|
| 357 | * (Type in pat and mtrr will not have same value) | 
|---|
| 358 | * The intersection is based on "Effective Memory Type" tables in IA-32 | 
|---|
| 359 | * SDM vol 3a | 
|---|
| 360 | */ | 
|---|
| 361 | static unsigned long pat_x_mtrr_type(u64 start, u64 end, | 
|---|
| 362 | enum page_cache_mode req_type) | 
|---|
| 363 | { | 
|---|
| 364 | /* | 
|---|
| 365 | * Look for MTRR hint to get the effective type in case where PAT | 
|---|
| 366 | * request is for WB. | 
|---|
| 367 | */ | 
|---|
| 368 | if (req_type == _PAGE_CACHE_MODE_WB) { | 
|---|
| 369 | u8 mtrr_type, uniform; | 
|---|
| 370 |  | 
|---|
| 371 | mtrr_type = mtrr_type_lookup(addr: start, end, uniform: &uniform); | 
|---|
| 372 | if (mtrr_type != MTRR_TYPE_WRBACK) | 
|---|
| 373 | return _PAGE_CACHE_MODE_UC_MINUS; | 
|---|
| 374 |  | 
|---|
| 375 | return _PAGE_CACHE_MODE_WB; | 
|---|
| 376 | } | 
|---|
| 377 |  | 
|---|
| 378 | return req_type; | 
|---|
| 379 | } | 
|---|
| 380 |  | 
|---|
| 381 | struct  { | 
|---|
| 382 | unsigned long		; | 
|---|
| 383 | int			; | 
|---|
| 384 | int			; | 
|---|
| 385 | }; | 
|---|
| 386 |  | 
|---|
| 387 | static int | 
|---|
| 388 | (unsigned long initial_pfn, unsigned long total_nr_pages, void *arg) | 
|---|
| 389 | { | 
|---|
| 390 | struct pagerange_state *state = arg; | 
|---|
| 391 |  | 
|---|
| 392 | state->not_ram	|= initial_pfn > state->cur_pfn; | 
|---|
| 393 | state->ram	|= total_nr_pages > 0; | 
|---|
| 394 | state->cur_pfn	 = initial_pfn + total_nr_pages; | 
|---|
| 395 |  | 
|---|
| 396 | return state->ram && state->not_ram; | 
|---|
| 397 | } | 
|---|
| 398 |  | 
|---|
| 399 | static int (resource_size_t start, resource_size_t end) | 
|---|
| 400 | { | 
|---|
| 401 | int ret = 0; | 
|---|
| 402 | unsigned long start_pfn = start >> PAGE_SHIFT; | 
|---|
| 403 | unsigned long end_pfn = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; | 
|---|
| 404 | struct pagerange_state state = {start_pfn, 0, 0}; | 
|---|
| 405 |  | 
|---|
| 406 | /* | 
|---|
| 407 | * For legacy reasons, physical address range in the legacy ISA | 
|---|
| 408 | * region is tracked as non-RAM. This will allow users of | 
|---|
| 409 | * /dev/mem to map portions of legacy ISA region, even when | 
|---|
| 410 | * some of those portions are listed(or not even listed) with | 
|---|
| 411 | * different e820 types(RAM/reserved/..) | 
|---|
| 412 | */ | 
|---|
| 413 | if (start_pfn < ISA_END_ADDRESS >> PAGE_SHIFT) | 
|---|
| 414 | start_pfn = ISA_END_ADDRESS >> PAGE_SHIFT; | 
|---|
| 415 |  | 
|---|
| 416 | if (start_pfn < end_pfn) { | 
|---|
| 417 | ret = walk_system_ram_range(start_pfn, nr_pages: end_pfn - start_pfn, | 
|---|
| 418 | arg: &state, func: pagerange_is_ram_callback); | 
|---|
| 419 | } | 
|---|
| 420 |  | 
|---|
| 421 | return (ret > 0) ? -1 : (state.ram ? 1 : 0); | 
|---|
| 422 | } | 
|---|
| 423 |  | 
|---|
| 424 | /* | 
|---|
| 425 | * For RAM pages, we use page flags to mark the pages with appropriate type. | 
|---|
| 426 | * The page flags are limited to four types, WB (default), WC, WT and UC-. | 
|---|
| 427 | * WP request fails with -EINVAL, and UC gets redirected to UC-.  Setting | 
|---|
| 428 | * a new memory type is only allowed for a page mapped with the default WB | 
|---|
| 429 | * type. | 
|---|
| 430 | * | 
|---|
| 431 | * Here we do two passes: | 
|---|
| 432 | * - Find the memtype of all the pages in the range, look for any conflicts. | 
|---|
| 433 | * - In case of no conflicts, set the new memtype for pages in the range. | 
|---|
| 434 | */ | 
|---|
| 435 | static int reserve_ram_pages_type(u64 start, u64 end, | 
|---|
| 436 | enum page_cache_mode req_type, | 
|---|
| 437 | enum page_cache_mode *new_type) | 
|---|
| 438 | { | 
|---|
| 439 | struct page *page; | 
|---|
| 440 | u64 pfn; | 
|---|
| 441 |  | 
|---|
| 442 | if (req_type == _PAGE_CACHE_MODE_WP) { | 
|---|
| 443 | if (new_type) | 
|---|
| 444 | *new_type = _PAGE_CACHE_MODE_UC_MINUS; | 
|---|
| 445 | return -EINVAL; | 
|---|
| 446 | } | 
|---|
| 447 |  | 
|---|
| 448 | if (req_type == _PAGE_CACHE_MODE_UC) { | 
|---|
| 449 | /* We do not support strong UC */ | 
|---|
| 450 | WARN_ON_ONCE(1); | 
|---|
| 451 | req_type = _PAGE_CACHE_MODE_UC_MINUS; | 
|---|
| 452 | } | 
|---|
| 453 |  | 
|---|
| 454 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | 
|---|
| 455 | enum page_cache_mode type; | 
|---|
| 456 |  | 
|---|
| 457 | page = pfn_to_page(pfn); | 
|---|
| 458 | type = get_page_memtype(pg: page); | 
|---|
| 459 | if (type != _PAGE_CACHE_MODE_WB) { | 
|---|
| 460 | pr_info( "x86/PAT: reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n", | 
|---|
| 461 | start, end - 1, type, req_type); | 
|---|
| 462 | if (new_type) | 
|---|
| 463 | *new_type = type; | 
|---|
| 464 |  | 
|---|
| 465 | return -EBUSY; | 
|---|
| 466 | } | 
|---|
| 467 | } | 
|---|
| 468 |  | 
|---|
| 469 | if (new_type) | 
|---|
| 470 | *new_type = req_type; | 
|---|
| 471 |  | 
|---|
| 472 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | 
|---|
| 473 | page = pfn_to_page(pfn); | 
|---|
| 474 | set_page_memtype(pg: page, memtype: req_type); | 
|---|
| 475 | } | 
|---|
| 476 | return 0; | 
|---|
| 477 | } | 
|---|
| 478 |  | 
|---|
| 479 | static int free_ram_pages_type(u64 start, u64 end) | 
|---|
| 480 | { | 
|---|
| 481 | struct page *page; | 
|---|
| 482 | u64 pfn; | 
|---|
| 483 |  | 
|---|
| 484 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | 
|---|
| 485 | page = pfn_to_page(pfn); | 
|---|
| 486 | set_page_memtype(pg: page, memtype: _PAGE_CACHE_MODE_WB); | 
|---|
| 487 | } | 
|---|
| 488 | return 0; | 
|---|
| 489 | } | 
|---|
| 490 |  | 
|---|
| 491 | static u64 sanitize_phys(u64 address) | 
|---|
| 492 | { | 
|---|
| 493 | /* | 
|---|
| 494 | * When changing the memtype for pages containing poison allow | 
|---|
| 495 | * for a "decoy" virtual address (bit 63 clear) passed to | 
|---|
| 496 | * set_memory_X(). __pa() on a "decoy" address results in a | 
|---|
| 497 | * physical address with bit 63 set. | 
|---|
| 498 | * | 
|---|
| 499 | * Decoy addresses are not present for 32-bit builds, see | 
|---|
| 500 | * set_mce_nospec(). | 
|---|
| 501 | */ | 
|---|
| 502 | if (IS_ENABLED(CONFIG_X86_64)) | 
|---|
| 503 | return address & __PHYSICAL_MASK; | 
|---|
| 504 | return address; | 
|---|
| 505 | } | 
|---|
| 506 |  | 
|---|
| 507 | /* | 
|---|
| 508 | * req_type typically has one of the: | 
|---|
| 509 | * - _PAGE_CACHE_MODE_WB | 
|---|
| 510 | * - _PAGE_CACHE_MODE_WC | 
|---|
| 511 | * - _PAGE_CACHE_MODE_UC_MINUS | 
|---|
| 512 | * - _PAGE_CACHE_MODE_UC | 
|---|
| 513 | * - _PAGE_CACHE_MODE_WT | 
|---|
| 514 | * | 
|---|
| 515 | * If new_type is NULL, function will return an error if it cannot reserve the | 
|---|
| 516 | * region with req_type. If new_type is non-NULL, function will return | 
|---|
| 517 | * available type in new_type in case of no error. In case of any error | 
|---|
| 518 | * it will return a negative return value. | 
|---|
| 519 | */ | 
|---|
| 520 | int memtype_reserve(u64 start, u64 end, enum page_cache_mode req_type, | 
|---|
| 521 | enum page_cache_mode *new_type) | 
|---|
| 522 | { | 
|---|
| 523 | struct memtype *entry_new; | 
|---|
| 524 | enum page_cache_mode actual_type; | 
|---|
| 525 | int is_range_ram; | 
|---|
| 526 | int err = 0; | 
|---|
| 527 |  | 
|---|
| 528 | start = sanitize_phys(address: start); | 
|---|
| 529 |  | 
|---|
| 530 | /* | 
|---|
| 531 | * The end address passed into this function is exclusive, but | 
|---|
| 532 | * sanitize_phys() expects an inclusive address. | 
|---|
| 533 | */ | 
|---|
| 534 | end = sanitize_phys(address: end - 1) + 1; | 
|---|
| 535 | if (start >= end) { | 
|---|
| 536 | WARN(1, "%s failed: [mem %#010Lx-%#010Lx], req %s\n", __func__, | 
|---|
| 537 | start, end - 1, cattr_name(req_type)); | 
|---|
| 538 | return -EINVAL; | 
|---|
| 539 | } | 
|---|
| 540 |  | 
|---|
| 541 | if (!pat_enabled()) { | 
|---|
| 542 | /* This is identical to page table setting without PAT */ | 
|---|
| 543 | if (new_type) | 
|---|
| 544 | *new_type = req_type; | 
|---|
| 545 | return 0; | 
|---|
| 546 | } | 
|---|
| 547 |  | 
|---|
| 548 | /* Low ISA region is always mapped WB in page table. No need to track */ | 
|---|
| 549 | if (x86_platform.is_untracked_pat_range(start, end)) { | 
|---|
| 550 | if (new_type) | 
|---|
| 551 | *new_type = _PAGE_CACHE_MODE_WB; | 
|---|
| 552 | return 0; | 
|---|
| 553 | } | 
|---|
| 554 |  | 
|---|
| 555 | /* | 
|---|
| 556 | * Call mtrr_lookup to get the type hint. This is an | 
|---|
| 557 | * optimization for /dev/mem mmap'ers into WB memory (BIOS | 
|---|
| 558 | * tools and ACPI tools). Use WB request for WB memory and use | 
|---|
| 559 | * UC_MINUS otherwise. | 
|---|
| 560 | */ | 
|---|
| 561 | actual_type = pat_x_mtrr_type(start, end, req_type); | 
|---|
| 562 |  | 
|---|
| 563 | if (new_type) | 
|---|
| 564 | *new_type = actual_type; | 
|---|
| 565 |  | 
|---|
| 566 | is_range_ram = pat_pagerange_is_ram(start, end); | 
|---|
| 567 | if (is_range_ram == 1) { | 
|---|
| 568 |  | 
|---|
| 569 | err = reserve_ram_pages_type(start, end, req_type, new_type); | 
|---|
| 570 |  | 
|---|
| 571 | return err; | 
|---|
| 572 | } else if (is_range_ram < 0) { | 
|---|
| 573 | return -EINVAL; | 
|---|
| 574 | } | 
|---|
| 575 |  | 
|---|
| 576 | entry_new = kzalloc(sizeof(struct memtype), GFP_KERNEL); | 
|---|
| 577 | if (!entry_new) | 
|---|
| 578 | return -ENOMEM; | 
|---|
| 579 |  | 
|---|
| 580 | entry_new->start = start; | 
|---|
| 581 | entry_new->end	 = end; | 
|---|
| 582 | entry_new->type	 = actual_type; | 
|---|
| 583 |  | 
|---|
| 584 | spin_lock(lock: &memtype_lock); | 
|---|
| 585 |  | 
|---|
| 586 | err = memtype_check_insert(entry_new, new_type); | 
|---|
| 587 | if (err) { | 
|---|
| 588 | pr_info( "x86/PAT: memtype_reserve failed [mem %#010Lx-%#010Lx], track %s, req %s\n", | 
|---|
| 589 | start, end - 1, | 
|---|
| 590 | cattr_name(entry_new->type), cattr_name(req_type)); | 
|---|
| 591 | kfree(objp: entry_new); | 
|---|
| 592 | spin_unlock(lock: &memtype_lock); | 
|---|
| 593 |  | 
|---|
| 594 | return err; | 
|---|
| 595 | } | 
|---|
| 596 |  | 
|---|
| 597 | spin_unlock(lock: &memtype_lock); | 
|---|
| 598 |  | 
|---|
| 599 | dprintk( "memtype_reserve added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n", | 
|---|
| 600 | start, end - 1, cattr_name(entry_new->type), cattr_name(req_type), | 
|---|
| 601 | new_type ? cattr_name(*new_type) : "-"); | 
|---|
| 602 |  | 
|---|
| 603 | return err; | 
|---|
| 604 | } | 
|---|
| 605 |  | 
|---|
| 606 | int memtype_free(u64 start, u64 end) | 
|---|
| 607 | { | 
|---|
| 608 | int is_range_ram; | 
|---|
| 609 | struct memtype *entry_old; | 
|---|
| 610 |  | 
|---|
| 611 | if (!pat_enabled()) | 
|---|
| 612 | return 0; | 
|---|
| 613 |  | 
|---|
| 614 | start = sanitize_phys(address: start); | 
|---|
| 615 | end = sanitize_phys(address: end); | 
|---|
| 616 |  | 
|---|
| 617 | /* Low ISA region is always mapped WB. No need to track */ | 
|---|
| 618 | if (x86_platform.is_untracked_pat_range(start, end)) | 
|---|
| 619 | return 0; | 
|---|
| 620 |  | 
|---|
| 621 | is_range_ram = pat_pagerange_is_ram(start, end); | 
|---|
| 622 | if (is_range_ram == 1) | 
|---|
| 623 | return free_ram_pages_type(start, end); | 
|---|
| 624 | if (is_range_ram < 0) | 
|---|
| 625 | return -EINVAL; | 
|---|
| 626 |  | 
|---|
| 627 | spin_lock(lock: &memtype_lock); | 
|---|
| 628 | entry_old = memtype_erase(start, end); | 
|---|
| 629 | spin_unlock(lock: &memtype_lock); | 
|---|
| 630 |  | 
|---|
| 631 | if (IS_ERR(ptr: entry_old)) { | 
|---|
| 632 | pr_info( "x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n", | 
|---|
| 633 | current->comm, current->pid, start, end - 1); | 
|---|
| 634 | return -EINVAL; | 
|---|
| 635 | } | 
|---|
| 636 |  | 
|---|
| 637 | kfree(objp: entry_old); | 
|---|
| 638 |  | 
|---|
| 639 | dprintk( "memtype_free request [mem %#010Lx-%#010Lx]\n", start, end - 1); | 
|---|
| 640 |  | 
|---|
| 641 | return 0; | 
|---|
| 642 | } | 
|---|
| 643 |  | 
|---|
| 644 |  | 
|---|
| 645 | /** | 
|---|
| 646 | * lookup_memtype - Looks up the memory type for a physical address | 
|---|
| 647 | * @paddr: physical address of which memory type needs to be looked up | 
|---|
| 648 | * | 
|---|
| 649 | * Only to be called when PAT is enabled | 
|---|
| 650 | * | 
|---|
| 651 | * Returns _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC, _PAGE_CACHE_MODE_UC_MINUS | 
|---|
| 652 | * or _PAGE_CACHE_MODE_WT. | 
|---|
| 653 | */ | 
|---|
| 654 | static enum page_cache_mode lookup_memtype(u64 paddr) | 
|---|
| 655 | { | 
|---|
| 656 | enum page_cache_mode rettype = _PAGE_CACHE_MODE_WB; | 
|---|
| 657 | struct memtype *entry; | 
|---|
| 658 |  | 
|---|
| 659 | if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE)) | 
|---|
| 660 | return rettype; | 
|---|
| 661 |  | 
|---|
| 662 | if (pat_pagerange_is_ram(start: paddr, end: paddr + PAGE_SIZE)) { | 
|---|
| 663 | struct page *page; | 
|---|
| 664 |  | 
|---|
| 665 | page = pfn_to_page(paddr >> PAGE_SHIFT); | 
|---|
| 666 | return get_page_memtype(pg: page); | 
|---|
| 667 | } | 
|---|
| 668 |  | 
|---|
| 669 | spin_lock(lock: &memtype_lock); | 
|---|
| 670 |  | 
|---|
| 671 | entry = memtype_lookup(addr: paddr); | 
|---|
| 672 | if (entry != NULL) | 
|---|
| 673 | rettype = entry->type; | 
|---|
| 674 | else | 
|---|
| 675 | rettype = _PAGE_CACHE_MODE_UC_MINUS; | 
|---|
| 676 |  | 
|---|
| 677 | spin_unlock(lock: &memtype_lock); | 
|---|
| 678 |  | 
|---|
| 679 | return rettype; | 
|---|
| 680 | } | 
|---|
| 681 |  | 
|---|
| 682 | /** | 
|---|
| 683 | * pat_pfn_immune_to_uc_mtrr - Check whether the PAT memory type | 
|---|
| 684 | * of @pfn cannot be overridden by UC MTRR memory type. | 
|---|
| 685 | * @pfn: The page frame number to check. | 
|---|
| 686 | * | 
|---|
| 687 | * Only to be called when PAT is enabled. | 
|---|
| 688 | * | 
|---|
| 689 | * Returns true, if the PAT memory type of @pfn is UC, UC-, or WC. | 
|---|
| 690 | * Returns false in other cases. | 
|---|
| 691 | */ | 
|---|
| 692 | bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn) | 
|---|
| 693 | { | 
|---|
| 694 | enum page_cache_mode cm = lookup_memtype(PFN_PHYS(pfn)); | 
|---|
| 695 |  | 
|---|
| 696 | return cm == _PAGE_CACHE_MODE_UC || | 
|---|
| 697 | cm == _PAGE_CACHE_MODE_UC_MINUS || | 
|---|
| 698 | cm == _PAGE_CACHE_MODE_WC; | 
|---|
| 699 | } | 
|---|
| 700 | EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr); | 
|---|
| 701 |  | 
|---|
| 702 | /** | 
|---|
| 703 | * memtype_reserve_io - Request a memory type mapping for a region of memory | 
|---|
| 704 | * @start: start (physical address) of the region | 
|---|
| 705 | * @end: end (physical address) of the region | 
|---|
| 706 | * @type: A pointer to memtype, with requested type. On success, requested | 
|---|
| 707 | * or any other compatible type that was available for the region is returned | 
|---|
| 708 | * | 
|---|
| 709 | * On success, returns 0 | 
|---|
| 710 | * On failure, returns non-zero | 
|---|
| 711 | */ | 
|---|
| 712 | int memtype_reserve_io(resource_size_t start, resource_size_t end, | 
|---|
| 713 | enum page_cache_mode *type) | 
|---|
| 714 | { | 
|---|
| 715 | resource_size_t size = end - start; | 
|---|
| 716 | enum page_cache_mode req_type = *type; | 
|---|
| 717 | enum page_cache_mode new_type; | 
|---|
| 718 | int ret; | 
|---|
| 719 |  | 
|---|
| 720 | WARN_ON_ONCE(iomem_map_sanity_check(start, size)); | 
|---|
| 721 |  | 
|---|
| 722 | ret = memtype_reserve(start, end, req_type, new_type: &new_type); | 
|---|
| 723 | if (ret) | 
|---|
| 724 | goto out_err; | 
|---|
| 725 |  | 
|---|
| 726 | if (!is_new_memtype_allowed(paddr: start, size, pcm: req_type, new_pcm: new_type)) | 
|---|
| 727 | goto out_free; | 
|---|
| 728 |  | 
|---|
| 729 | if (memtype_kernel_map_sync(base: start, size, pcm: new_type) < 0) | 
|---|
| 730 | goto out_free; | 
|---|
| 731 |  | 
|---|
| 732 | *type = new_type; | 
|---|
| 733 | return 0; | 
|---|
| 734 |  | 
|---|
| 735 | out_free: | 
|---|
| 736 | memtype_free(start, end); | 
|---|
| 737 | ret = -EBUSY; | 
|---|
| 738 | out_err: | 
|---|
| 739 | return ret; | 
|---|
| 740 | } | 
|---|
| 741 |  | 
|---|
| 742 | /** | 
|---|
| 743 | * memtype_free_io - Release a memory type mapping for a region of memory | 
|---|
| 744 | * @start: start (physical address) of the region | 
|---|
| 745 | * @end: end (physical address) of the region | 
|---|
| 746 | */ | 
|---|
| 747 | void memtype_free_io(resource_size_t start, resource_size_t end) | 
|---|
| 748 | { | 
|---|
| 749 | memtype_free(start, end); | 
|---|
| 750 | } | 
|---|
| 751 |  | 
|---|
| 752 | #ifdef CONFIG_X86_PAT | 
|---|
| 753 | int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size) | 
|---|
| 754 | { | 
|---|
| 755 | enum page_cache_mode type = _PAGE_CACHE_MODE_WC; | 
|---|
| 756 |  | 
|---|
| 757 | return memtype_reserve_io(start, end: start + size, type: &type); | 
|---|
| 758 | } | 
|---|
| 759 | EXPORT_SYMBOL(arch_io_reserve_memtype_wc); | 
|---|
| 760 |  | 
|---|
| 761 | void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) | 
|---|
| 762 | { | 
|---|
| 763 | memtype_free_io(start, end: start + size); | 
|---|
| 764 | } | 
|---|
| 765 | EXPORT_SYMBOL(arch_io_free_memtype_wc); | 
|---|
| 766 | #endif | 
|---|
| 767 |  | 
|---|
| 768 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, | 
|---|
| 769 | unsigned long size, pgprot_t vma_prot) | 
|---|
| 770 | { | 
|---|
| 771 | if (!phys_mem_access_encrypted(phys_addr: pfn << PAGE_SHIFT, size)) | 
|---|
| 772 | vma_prot = pgprot_decrypted(vma_prot); | 
|---|
| 773 |  | 
|---|
| 774 | return vma_prot; | 
|---|
| 775 | } | 
|---|
| 776 |  | 
|---|
| 777 | static inline void pgprot_set_cachemode(pgprot_t *prot, enum page_cache_mode pcm) | 
|---|
| 778 | { | 
|---|
| 779 | *prot = __pgprot((pgprot_val(*prot) & ~_PAGE_CACHE_MASK) | | 
|---|
| 780 | cachemode2protval(pcm)); | 
|---|
| 781 | } | 
|---|
| 782 |  | 
|---|
| 783 | int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | 
|---|
| 784 | unsigned long size, pgprot_t *vma_prot) | 
|---|
| 785 | { | 
|---|
| 786 | enum page_cache_mode pcm = _PAGE_CACHE_MODE_WB; | 
|---|
| 787 |  | 
|---|
| 788 | if (!pat_enabled()) | 
|---|
| 789 | return 1; | 
|---|
| 790 |  | 
|---|
| 791 | if (!range_is_allowed(pfn, size)) | 
|---|
| 792 | return 0; | 
|---|
| 793 |  | 
|---|
| 794 | if (file->f_flags & O_DSYNC) | 
|---|
| 795 | pcm = _PAGE_CACHE_MODE_UC_MINUS; | 
|---|
| 796 |  | 
|---|
| 797 | pgprot_set_cachemode(prot: vma_prot, pcm); | 
|---|
| 798 | return 1; | 
|---|
| 799 | } | 
|---|
| 800 |  | 
|---|
| 801 | /* | 
|---|
| 802 | * Change the memory type for the physical address range in kernel identity | 
|---|
| 803 | * mapping space if that range is a part of identity map. | 
|---|
| 804 | */ | 
|---|
| 805 | int memtype_kernel_map_sync(u64 base, unsigned long size, | 
|---|
| 806 | enum page_cache_mode pcm) | 
|---|
| 807 | { | 
|---|
| 808 | unsigned long id_sz; | 
|---|
| 809 |  | 
|---|
| 810 | if (base > __pa(high_memory-1)) | 
|---|
| 811 | return 0; | 
|---|
| 812 |  | 
|---|
| 813 | /* | 
|---|
| 814 | * Some areas in the middle of the kernel identity range | 
|---|
| 815 | * are not mapped, for example the PCI space. | 
|---|
| 816 | */ | 
|---|
| 817 | if (!page_is_ram(pfn: base >> PAGE_SHIFT)) | 
|---|
| 818 | return 0; | 
|---|
| 819 |  | 
|---|
| 820 | id_sz = (__pa(high_memory-1) <= base + size) ? | 
|---|
| 821 | __pa(high_memory) - base : size; | 
|---|
| 822 |  | 
|---|
| 823 | if (ioremap_change_attr(vaddr: (unsigned long)__va(base), size: id_sz, pcm) < 0) { | 
|---|
| 824 | pr_info( "x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n", | 
|---|
| 825 | current->comm, current->pid, | 
|---|
| 826 | cattr_name(pcm), | 
|---|
| 827 | base, (unsigned long long)(base + size-1)); | 
|---|
| 828 | return -EINVAL; | 
|---|
| 829 | } | 
|---|
| 830 | return 0; | 
|---|
| 831 | } | 
|---|
| 832 |  | 
|---|
| 833 | /* | 
|---|
| 834 | * Internal interface to reserve a range of physical memory with prot. | 
|---|
| 835 | * Reserved non RAM regions only and after successful memtype_reserve, | 
|---|
| 836 | * this func also keeps identity mapping (if any) in sync with this new prot. | 
|---|
| 837 | */ | 
|---|
| 838 | static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot) | 
|---|
| 839 | { | 
|---|
| 840 | int is_ram = 0; | 
|---|
| 841 | int ret; | 
|---|
| 842 | enum page_cache_mode want_pcm = pgprot2cachemode(pgprot: *vma_prot); | 
|---|
| 843 | enum page_cache_mode pcm = want_pcm; | 
|---|
| 844 |  | 
|---|
| 845 | is_ram = pat_pagerange_is_ram(start: paddr, end: paddr + size); | 
|---|
| 846 |  | 
|---|
| 847 | /* | 
|---|
| 848 | * reserve_pfn_range() for RAM pages. We do not refcount to keep | 
|---|
| 849 | * track of number of mappings of RAM pages. We can assert that | 
|---|
| 850 | * the type requested matches the type of first page in the range. | 
|---|
| 851 | */ | 
|---|
| 852 | if (is_ram) { | 
|---|
| 853 | if (!pat_enabled()) | 
|---|
| 854 | return 0; | 
|---|
| 855 |  | 
|---|
| 856 | pcm = lookup_memtype(paddr); | 
|---|
| 857 | if (want_pcm != pcm) { | 
|---|
| 858 | pr_warn( "x86/PAT: %s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n", | 
|---|
| 859 | current->comm, current->pid, | 
|---|
| 860 | cattr_name(want_pcm), | 
|---|
| 861 | (unsigned long long)paddr, | 
|---|
| 862 | (unsigned long long)(paddr + size - 1), | 
|---|
| 863 | cattr_name(pcm)); | 
|---|
| 864 | pgprot_set_cachemode(prot: vma_prot, pcm); | 
|---|
| 865 | } | 
|---|
| 866 | return 0; | 
|---|
| 867 | } | 
|---|
| 868 |  | 
|---|
| 869 | ret = memtype_reserve(start: paddr, end: paddr + size, req_type: want_pcm, new_type: &pcm); | 
|---|
| 870 | if (ret) | 
|---|
| 871 | return ret; | 
|---|
| 872 |  | 
|---|
| 873 | if (pcm != want_pcm) { | 
|---|
| 874 | if (!is_new_memtype_allowed(paddr, size, pcm: want_pcm, new_pcm: pcm)) { | 
|---|
| 875 | memtype_free(start: paddr, end: paddr + size); | 
|---|
| 876 | pr_err( "x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n", | 
|---|
| 877 | current->comm, current->pid, | 
|---|
| 878 | cattr_name(want_pcm), | 
|---|
| 879 | (unsigned long long)paddr, | 
|---|
| 880 | (unsigned long long)(paddr + size - 1), | 
|---|
| 881 | cattr_name(pcm)); | 
|---|
| 882 | return -EINVAL; | 
|---|
| 883 | } | 
|---|
| 884 | pgprot_set_cachemode(prot: vma_prot, pcm); | 
|---|
| 885 | } | 
|---|
| 886 |  | 
|---|
| 887 | if (memtype_kernel_map_sync(base: paddr, size, pcm) < 0) { | 
|---|
| 888 | memtype_free(start: paddr, end: paddr + size); | 
|---|
| 889 | return -EINVAL; | 
|---|
| 890 | } | 
|---|
| 891 | return 0; | 
|---|
| 892 | } | 
|---|
| 893 |  | 
|---|
| 894 | /* | 
|---|
| 895 | * Internal interface to free a range of physical memory. | 
|---|
| 896 | * Frees non RAM regions only. | 
|---|
| 897 | */ | 
|---|
| 898 | static void free_pfn_range(u64 paddr, unsigned long size) | 
|---|
| 899 | { | 
|---|
| 900 | int is_ram; | 
|---|
| 901 |  | 
|---|
| 902 | is_ram = pat_pagerange_is_ram(start: paddr, end: paddr + size); | 
|---|
| 903 | if (is_ram == 0) | 
|---|
| 904 | memtype_free(start: paddr, end: paddr + size); | 
|---|
| 905 | } | 
|---|
| 906 |  | 
|---|
| 907 | int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, pgprot_t *prot) | 
|---|
| 908 | { | 
|---|
| 909 | resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; | 
|---|
| 910 | enum page_cache_mode pcm; | 
|---|
| 911 |  | 
|---|
| 912 | if (!pat_enabled()) | 
|---|
| 913 | return 0; | 
|---|
| 914 |  | 
|---|
| 915 | pcm = lookup_memtype(paddr); | 
|---|
| 916 |  | 
|---|
| 917 | /* Check memtype for the remaining pages */ | 
|---|
| 918 | while (size > PAGE_SIZE) { | 
|---|
| 919 | size -= PAGE_SIZE; | 
|---|
| 920 | paddr += PAGE_SIZE; | 
|---|
| 921 | if (pcm != lookup_memtype(paddr)) | 
|---|
| 922 | return -EINVAL; | 
|---|
| 923 | } | 
|---|
| 924 |  | 
|---|
| 925 | pgprot_set_cachemode(prot, pcm); | 
|---|
| 926 | return 0; | 
|---|
| 927 | } | 
|---|
| 928 |  | 
|---|
| 929 | int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot) | 
|---|
| 930 | { | 
|---|
| 931 | const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; | 
|---|
| 932 |  | 
|---|
| 933 | return reserve_pfn_range(paddr, size, vma_prot: prot); | 
|---|
| 934 | } | 
|---|
| 935 |  | 
|---|
| 936 | void pfnmap_untrack(unsigned long pfn, unsigned long size) | 
|---|
| 937 | { | 
|---|
| 938 | const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; | 
|---|
| 939 |  | 
|---|
| 940 | free_pfn_range(paddr, size); | 
|---|
| 941 | } | 
|---|
| 942 |  | 
|---|
| 943 | pgprot_t pgprot_writecombine(pgprot_t prot) | 
|---|
| 944 | { | 
|---|
| 945 | pgprot_set_cachemode(prot: &prot, pcm: _PAGE_CACHE_MODE_WC); | 
|---|
| 946 | return prot; | 
|---|
| 947 | } | 
|---|
| 948 | EXPORT_SYMBOL_GPL(pgprot_writecombine); | 
|---|
| 949 |  | 
|---|
| 950 | pgprot_t pgprot_writethrough(pgprot_t prot) | 
|---|
| 951 | { | 
|---|
| 952 | pgprot_set_cachemode(prot: &prot, pcm: _PAGE_CACHE_MODE_WT); | 
|---|
| 953 | return prot; | 
|---|
| 954 | } | 
|---|
| 955 | EXPORT_SYMBOL_GPL(pgprot_writethrough); | 
|---|
| 956 |  | 
|---|
| 957 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) | 
|---|
| 958 |  | 
|---|
| 959 | /* | 
|---|
| 960 | * We are allocating a temporary printout-entry to be passed | 
|---|
| 961 | * between seq_start()/next() and seq_show(): | 
|---|
| 962 | */ | 
|---|
| 963 | static struct memtype *memtype_get_idx(loff_t pos) | 
|---|
| 964 | { | 
|---|
| 965 | struct memtype *entry_print; | 
|---|
| 966 | int ret; | 
|---|
| 967 |  | 
|---|
| 968 | entry_print  = kzalloc(sizeof(struct memtype), GFP_KERNEL); | 
|---|
| 969 | if (!entry_print) | 
|---|
| 970 | return NULL; | 
|---|
| 971 |  | 
|---|
| 972 | spin_lock(lock: &memtype_lock); | 
|---|
| 973 | ret = memtype_copy_nth_element(entry_out: entry_print, pos); | 
|---|
| 974 | spin_unlock(lock: &memtype_lock); | 
|---|
| 975 |  | 
|---|
| 976 | /* Free it on error: */ | 
|---|
| 977 | if (ret) { | 
|---|
| 978 | kfree(objp: entry_print); | 
|---|
| 979 | return NULL; | 
|---|
| 980 | } | 
|---|
| 981 |  | 
|---|
| 982 | return entry_print; | 
|---|
| 983 | } | 
|---|
| 984 |  | 
|---|
| 985 | static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) | 
|---|
| 986 | { | 
|---|
| 987 | if (*pos == 0) { | 
|---|
| 988 | ++*pos; | 
|---|
| 989 | seq_puts(m: seq, s: "PAT memtype list:\n"); | 
|---|
| 990 | } | 
|---|
| 991 |  | 
|---|
| 992 | return memtype_get_idx(pos: *pos); | 
|---|
| 993 | } | 
|---|
| 994 |  | 
|---|
| 995 | static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 
|---|
| 996 | { | 
|---|
| 997 | kfree(objp: v); | 
|---|
| 998 | ++*pos; | 
|---|
| 999 | return memtype_get_idx(pos: *pos); | 
|---|
| 1000 | } | 
|---|
| 1001 |  | 
|---|
| 1002 | static void memtype_seq_stop(struct seq_file *seq, void *v) | 
|---|
| 1003 | { | 
|---|
| 1004 | kfree(objp: v); | 
|---|
| 1005 | } | 
|---|
| 1006 |  | 
|---|
| 1007 | static int memtype_seq_show(struct seq_file *seq, void *v) | 
|---|
| 1008 | { | 
|---|
| 1009 | struct memtype *entry_print = (struct memtype *)v; | 
|---|
| 1010 |  | 
|---|
| 1011 | seq_printf(m: seq, fmt: "PAT: [mem 0x%016Lx-0x%016Lx] %s\n", | 
|---|
| 1012 | entry_print->start, | 
|---|
| 1013 | entry_print->end, | 
|---|
| 1014 | cattr_name(pcm: entry_print->type)); | 
|---|
| 1015 |  | 
|---|
| 1016 | return 0; | 
|---|
| 1017 | } | 
|---|
| 1018 |  | 
|---|
| 1019 | static const struct seq_operations memtype_seq_ops = { | 
|---|
| 1020 | .start = memtype_seq_start, | 
|---|
| 1021 | .next  = memtype_seq_next, | 
|---|
| 1022 | .stop  = memtype_seq_stop, | 
|---|
| 1023 | .show  = memtype_seq_show, | 
|---|
| 1024 | }; | 
|---|
| 1025 |  | 
|---|
| 1026 | static int memtype_seq_open(struct inode *inode, struct file *file) | 
|---|
| 1027 | { | 
|---|
| 1028 | return seq_open(file, &memtype_seq_ops); | 
|---|
| 1029 | } | 
|---|
| 1030 |  | 
|---|
| 1031 | static const struct file_operations memtype_fops = { | 
|---|
| 1032 | .open    = memtype_seq_open, | 
|---|
| 1033 | .read    = seq_read, | 
|---|
| 1034 | .llseek  = seq_lseek, | 
|---|
| 1035 | .release = seq_release, | 
|---|
| 1036 | }; | 
|---|
| 1037 |  | 
|---|
| 1038 | static int __init pat_memtype_list_init(void) | 
|---|
| 1039 | { | 
|---|
| 1040 | if (pat_enabled()) { | 
|---|
| 1041 | debugfs_create_file( "pat_memtype_list", S_IRUSR, | 
|---|
| 1042 | arch_debugfs_dir, NULL, &memtype_fops); | 
|---|
| 1043 | } | 
|---|
| 1044 | return 0; | 
|---|
| 1045 | } | 
|---|
| 1046 | late_initcall(pat_memtype_list_init); | 
|---|
| 1047 |  | 
|---|
| 1048 | #endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */ | 
|---|
| 1049 |  | 
|---|