| 1 | // SPDX-License-Identifier: GPL-2.0 | 
|---|
| 2 | /* | 
|---|
| 3 | * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds | 
|---|
| 4 | * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> | 
|---|
| 5 | * Copyright (C) 2002 Andi Kleen | 
|---|
| 6 | * | 
|---|
| 7 | * This handles calls from both 32bit and 64bit mode. | 
|---|
| 8 | * | 
|---|
| 9 | * Lock order: | 
|---|
| 10 | *	context.ldt_usr_sem | 
|---|
| 11 | *	  mmap_lock | 
|---|
| 12 | *	    context.lock | 
|---|
| 13 | */ | 
|---|
| 14 |  | 
|---|
| 15 | #include <linux/errno.h> | 
|---|
| 16 | #include <linux/gfp.h> | 
|---|
| 17 | #include <linux/sched.h> | 
|---|
| 18 | #include <linux/string.h> | 
|---|
| 19 | #include <linux/mm.h> | 
|---|
| 20 | #include <linux/smp.h> | 
|---|
| 21 | #include <linux/syscalls.h> | 
|---|
| 22 | #include <linux/slab.h> | 
|---|
| 23 | #include <linux/vmalloc.h> | 
|---|
| 24 | #include <linux/uaccess.h> | 
|---|
| 25 |  | 
|---|
| 26 | #include <asm/ldt.h> | 
|---|
| 27 | #include <asm/tlb.h> | 
|---|
| 28 | #include <asm/desc.h> | 
|---|
| 29 | #include <asm/mmu_context.h> | 
|---|
| 30 | #include <asm/pgtable_areas.h> | 
|---|
| 31 |  | 
|---|
| 32 | #include <xen/xen.h> | 
|---|
| 33 |  | 
|---|
| 34 | /* This is a multiple of PAGE_SIZE. */ | 
|---|
| 35 | #define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) | 
|---|
| 36 |  | 
|---|
| 37 | static inline void *ldt_slot_va(int slot) | 
|---|
| 38 | { | 
|---|
| 39 | return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); | 
|---|
| 40 | } | 
|---|
| 41 |  | 
|---|
| 42 | void load_mm_ldt(struct mm_struct *mm) | 
|---|
| 43 | { | 
|---|
| 44 | struct ldt_struct *ldt; | 
|---|
| 45 |  | 
|---|
| 46 | /* READ_ONCE synchronizes with smp_store_release */ | 
|---|
| 47 | ldt = READ_ONCE(mm->context.ldt); | 
|---|
| 48 |  | 
|---|
| 49 | /* | 
|---|
| 50 | * Any change to mm->context.ldt is followed by an IPI to all | 
|---|
| 51 | * CPUs with the mm active.  The LDT will not be freed until | 
|---|
| 52 | * after the IPI is handled by all such CPUs.  This means that | 
|---|
| 53 | * if the ldt_struct changes before we return, the values we see | 
|---|
| 54 | * will be safe, and the new values will be loaded before we run | 
|---|
| 55 | * any user code. | 
|---|
| 56 | * | 
|---|
| 57 | * NB: don't try to convert this to use RCU without extreme care. | 
|---|
| 58 | * We would still need IRQs off, because we don't want to change | 
|---|
| 59 | * the local LDT after an IPI loaded a newer value than the one | 
|---|
| 60 | * that we can see. | 
|---|
| 61 | */ | 
|---|
| 62 |  | 
|---|
| 63 | if (unlikely(ldt)) { | 
|---|
| 64 | if (static_cpu_has(X86_FEATURE_PTI)) { | 
|---|
| 65 | if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { | 
|---|
| 66 | /* | 
|---|
| 67 | * Whoops -- either the new LDT isn't mapped | 
|---|
| 68 | * (if slot == -1) or is mapped into a bogus | 
|---|
| 69 | * slot (if slot > 1). | 
|---|
| 70 | */ | 
|---|
| 71 | clear_LDT(); | 
|---|
| 72 | return; | 
|---|
| 73 | } | 
|---|
| 74 |  | 
|---|
| 75 | /* | 
|---|
| 76 | * If page table isolation is enabled, ldt->entries | 
|---|
| 77 | * will not be mapped in the userspace pagetables. | 
|---|
| 78 | * Tell the CPU to access the LDT through the alias | 
|---|
| 79 | * at ldt_slot_va(ldt->slot). | 
|---|
| 80 | */ | 
|---|
| 81 | set_ldt(addr: ldt_slot_va(slot: ldt->slot), entries: ldt->nr_entries); | 
|---|
| 82 | } else { | 
|---|
| 83 | set_ldt(addr: ldt->entries, entries: ldt->nr_entries); | 
|---|
| 84 | } | 
|---|
| 85 | } else { | 
|---|
| 86 | clear_LDT(); | 
|---|
| 87 | } | 
|---|
| 88 | } | 
|---|
| 89 |  | 
|---|
| 90 | void switch_ldt(struct mm_struct *prev, struct mm_struct *next) | 
|---|
| 91 | { | 
|---|
| 92 | /* | 
|---|
| 93 | * Load the LDT if either the old or new mm had an LDT. | 
|---|
| 94 | * | 
|---|
| 95 | * An mm will never go from having an LDT to not having an LDT.  Two | 
|---|
| 96 | * mms never share an LDT, so we don't gain anything by checking to | 
|---|
| 97 | * see whether the LDT changed.  There's also no guarantee that | 
|---|
| 98 | * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, | 
|---|
| 99 | * then prev->context.ldt will also be non-NULL. | 
|---|
| 100 | * | 
|---|
| 101 | * If we really cared, we could optimize the case where prev == next | 
|---|
| 102 | * and we're exiting lazy mode.  Most of the time, if this happens, | 
|---|
| 103 | * we don't actually need to reload LDTR, but modify_ldt() is mostly | 
|---|
| 104 | * used by legacy code and emulators where we don't need this level of | 
|---|
| 105 | * performance. | 
|---|
| 106 | * | 
|---|
| 107 | * This uses | instead of || because it generates better code. | 
|---|
| 108 | */ | 
|---|
| 109 | if (unlikely((unsigned long)prev->context.ldt | | 
|---|
| 110 | (unsigned long)next->context.ldt)) | 
|---|
| 111 | load_mm_ldt(mm: next); | 
|---|
| 112 |  | 
|---|
| 113 | DEBUG_LOCKS_WARN_ON(preemptible()); | 
|---|
| 114 | } | 
|---|
| 115 |  | 
|---|
| 116 | static void refresh_ldt_segments(void) | 
|---|
| 117 | { | 
|---|
| 118 | #ifdef CONFIG_X86_64 | 
|---|
| 119 | unsigned short sel; | 
|---|
| 120 |  | 
|---|
| 121 | /* | 
|---|
| 122 | * Make sure that the cached DS and ES descriptors match the updated | 
|---|
| 123 | * LDT. | 
|---|
| 124 | */ | 
|---|
| 125 | savesegment(ds, sel); | 
|---|
| 126 | if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT) | 
|---|
| 127 | loadsegment(ds, sel); | 
|---|
| 128 |  | 
|---|
| 129 | savesegment(es, sel); | 
|---|
| 130 | if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT) | 
|---|
| 131 | loadsegment(es, sel); | 
|---|
| 132 | #endif | 
|---|
| 133 | } | 
|---|
| 134 |  | 
|---|
| 135 | /* context.lock is held by the task which issued the smp function call */ | 
|---|
| 136 | static void flush_ldt(void *__mm) | 
|---|
| 137 | { | 
|---|
| 138 | struct mm_struct *mm = __mm; | 
|---|
| 139 |  | 
|---|
| 140 | if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm) | 
|---|
| 141 | return; | 
|---|
| 142 |  | 
|---|
| 143 | load_mm_ldt(mm); | 
|---|
| 144 |  | 
|---|
| 145 | refresh_ldt_segments(); | 
|---|
| 146 | } | 
|---|
| 147 |  | 
|---|
| 148 | /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ | 
|---|
| 149 | static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) | 
|---|
| 150 | { | 
|---|
| 151 | struct ldt_struct *new_ldt; | 
|---|
| 152 | unsigned int alloc_size; | 
|---|
| 153 |  | 
|---|
| 154 | if (num_entries > LDT_ENTRIES) | 
|---|
| 155 | return NULL; | 
|---|
| 156 |  | 
|---|
| 157 | new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL_ACCOUNT); | 
|---|
| 158 | if (!new_ldt) | 
|---|
| 159 | return NULL; | 
|---|
| 160 |  | 
|---|
| 161 | BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct)); | 
|---|
| 162 | alloc_size = num_entries * LDT_ENTRY_SIZE; | 
|---|
| 163 |  | 
|---|
| 164 | /* | 
|---|
| 165 | * Xen is very picky: it requires a page-aligned LDT that has no | 
|---|
| 166 | * trailing nonzero bytes in any page that contains LDT descriptors. | 
|---|
| 167 | * Keep it simple: zero the whole allocation and never allocate less | 
|---|
| 168 | * than PAGE_SIZE. | 
|---|
| 169 | */ | 
|---|
| 170 | if (alloc_size > PAGE_SIZE) | 
|---|
| 171 | new_ldt->entries = __vmalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); | 
|---|
| 172 | else | 
|---|
| 173 | new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); | 
|---|
| 174 |  | 
|---|
| 175 | if (!new_ldt->entries) { | 
|---|
| 176 | kfree(objp: new_ldt); | 
|---|
| 177 | return NULL; | 
|---|
| 178 | } | 
|---|
| 179 |  | 
|---|
| 180 | /* The new LDT isn't aliased for PTI yet. */ | 
|---|
| 181 | new_ldt->slot = -1; | 
|---|
| 182 |  | 
|---|
| 183 | new_ldt->nr_entries = num_entries; | 
|---|
| 184 | return new_ldt; | 
|---|
| 185 | } | 
|---|
| 186 |  | 
|---|
| 187 | #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION | 
|---|
| 188 |  | 
|---|
| 189 | static void do_sanity_check(struct mm_struct *mm, | 
|---|
| 190 | bool had_kernel_mapping, | 
|---|
| 191 | bool had_user_mapping) | 
|---|
| 192 | { | 
|---|
| 193 | if (mm->context.ldt) { | 
|---|
| 194 | /* | 
|---|
| 195 | * We already had an LDT.  The top-level entry should already | 
|---|
| 196 | * have been allocated and synchronized with the usermode | 
|---|
| 197 | * tables. | 
|---|
| 198 | */ | 
|---|
| 199 | WARN_ON(!had_kernel_mapping); | 
|---|
| 200 | if (boot_cpu_has(X86_FEATURE_PTI)) | 
|---|
| 201 | WARN_ON(!had_user_mapping); | 
|---|
| 202 | } else { | 
|---|
| 203 | /* | 
|---|
| 204 | * This is the first time we're mapping an LDT for this process. | 
|---|
| 205 | * Sync the pgd to the usermode tables. | 
|---|
| 206 | */ | 
|---|
| 207 | WARN_ON(had_kernel_mapping); | 
|---|
| 208 | if (boot_cpu_has(X86_FEATURE_PTI)) | 
|---|
| 209 | WARN_ON(had_user_mapping); | 
|---|
| 210 | } | 
|---|
| 211 | } | 
|---|
| 212 |  | 
|---|
| 213 | #ifdef CONFIG_X86_PAE | 
|---|
| 214 |  | 
|---|
| 215 | static pmd_t *pgd_to_pmd_walk(pgd_t *pgd, unsigned long va) | 
|---|
| 216 | { | 
|---|
| 217 | p4d_t *p4d; | 
|---|
| 218 | pud_t *pud; | 
|---|
| 219 |  | 
|---|
| 220 | if (pgd->pgd == 0) | 
|---|
| 221 | return NULL; | 
|---|
| 222 |  | 
|---|
| 223 | p4d = p4d_offset(pgd, va); | 
|---|
| 224 | if (p4d_none(*p4d)) | 
|---|
| 225 | return NULL; | 
|---|
| 226 |  | 
|---|
| 227 | pud = pud_offset(p4d, va); | 
|---|
| 228 | if (pud_none(*pud)) | 
|---|
| 229 | return NULL; | 
|---|
| 230 |  | 
|---|
| 231 | return pmd_offset(pud, va); | 
|---|
| 232 | } | 
|---|
| 233 |  | 
|---|
| 234 | static void map_ldt_struct_to_user(struct mm_struct *mm) | 
|---|
| 235 | { | 
|---|
| 236 | pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR); | 
|---|
| 237 | pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd); | 
|---|
| 238 | pmd_t *k_pmd, *u_pmd; | 
|---|
| 239 |  | 
|---|
| 240 | k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR); | 
|---|
| 241 | u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR); | 
|---|
| 242 |  | 
|---|
| 243 | if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt) | 
|---|
| 244 | set_pmd(u_pmd, *k_pmd); | 
|---|
| 245 | } | 
|---|
| 246 |  | 
|---|
| 247 | static void sanity_check_ldt_mapping(struct mm_struct *mm) | 
|---|
| 248 | { | 
|---|
| 249 | pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR); | 
|---|
| 250 | pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd); | 
|---|
| 251 | bool had_kernel, had_user; | 
|---|
| 252 | pmd_t *k_pmd, *u_pmd; | 
|---|
| 253 |  | 
|---|
| 254 | k_pmd      = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR); | 
|---|
| 255 | u_pmd      = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR); | 
|---|
| 256 | had_kernel = (k_pmd->pmd != 0); | 
|---|
| 257 | had_user   = (u_pmd->pmd != 0); | 
|---|
| 258 |  | 
|---|
| 259 | do_sanity_check(mm, had_kernel, had_user); | 
|---|
| 260 | } | 
|---|
| 261 |  | 
|---|
| 262 | #else /* !CONFIG_X86_PAE */ | 
|---|
| 263 |  | 
|---|
| 264 | static void map_ldt_struct_to_user(struct mm_struct *mm) | 
|---|
| 265 | { | 
|---|
| 266 | pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR); | 
|---|
| 267 |  | 
|---|
| 268 | if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt) | 
|---|
| 269 | set_pgd(kernel_to_user_pgdp(pgd), *pgd); | 
|---|
| 270 | } | 
|---|
| 271 |  | 
|---|
| 272 | static void sanity_check_ldt_mapping(struct mm_struct *mm) | 
|---|
| 273 | { | 
|---|
| 274 | pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR); | 
|---|
| 275 | bool had_kernel = (pgd->pgd != 0); | 
|---|
| 276 | bool had_user   = (kernel_to_user_pgdp(pgdp: pgd)->pgd != 0); | 
|---|
| 277 |  | 
|---|
| 278 | do_sanity_check(mm, had_kernel_mapping: had_kernel, had_user_mapping: had_user); | 
|---|
| 279 | } | 
|---|
| 280 |  | 
|---|
| 281 | #endif /* CONFIG_X86_PAE */ | 
|---|
| 282 |  | 
|---|
| 283 | /* | 
|---|
| 284 | * If PTI is enabled, this maps the LDT into the kernelmode and | 
|---|
| 285 | * usermode tables for the given mm. | 
|---|
| 286 | */ | 
|---|
| 287 | static int | 
|---|
| 288 | map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) | 
|---|
| 289 | { | 
|---|
| 290 | unsigned long va; | 
|---|
| 291 | bool is_vmalloc; | 
|---|
| 292 | spinlock_t *ptl; | 
|---|
| 293 | int i, nr_pages; | 
|---|
| 294 |  | 
|---|
| 295 | if (!boot_cpu_has(X86_FEATURE_PTI)) | 
|---|
| 296 | return 0; | 
|---|
| 297 |  | 
|---|
| 298 | /* | 
|---|
| 299 | * Any given ldt_struct should have map_ldt_struct() called at most | 
|---|
| 300 | * once. | 
|---|
| 301 | */ | 
|---|
| 302 | WARN_ON(ldt->slot != -1); | 
|---|
| 303 |  | 
|---|
| 304 | /* Check if the current mappings are sane */ | 
|---|
| 305 | sanity_check_ldt_mapping(mm); | 
|---|
| 306 |  | 
|---|
| 307 | is_vmalloc = is_vmalloc_addr(x: ldt->entries); | 
|---|
| 308 |  | 
|---|
| 309 | nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); | 
|---|
| 310 |  | 
|---|
| 311 | for (i = 0; i < nr_pages; i++) { | 
|---|
| 312 | unsigned long offset = i << PAGE_SHIFT; | 
|---|
| 313 | const void *src = (char *)ldt->entries + offset; | 
|---|
| 314 | unsigned long pfn; | 
|---|
| 315 | pgprot_t pte_prot; | 
|---|
| 316 | pte_t pte, *ptep; | 
|---|
| 317 |  | 
|---|
| 318 | va = (unsigned long)ldt_slot_va(slot) + offset; | 
|---|
| 319 | pfn = is_vmalloc ? vmalloc_to_pfn(addr: src) : | 
|---|
| 320 | page_to_pfn(virt_to_page(src)); | 
|---|
| 321 | /* | 
|---|
| 322 | * Treat the PTI LDT range as a *userspace* range. | 
|---|
| 323 | * get_locked_pte() will allocate all needed pagetables | 
|---|
| 324 | * and account for them in this mm. | 
|---|
| 325 | */ | 
|---|
| 326 | ptep = get_locked_pte(mm, addr: va, ptl: &ptl); | 
|---|
| 327 | if (!ptep) | 
|---|
| 328 | return -ENOMEM; | 
|---|
| 329 | /* | 
|---|
| 330 | * Map it RO so the easy to find address is not a primary | 
|---|
| 331 | * target via some kernel interface which misses a | 
|---|
| 332 | * permission check. | 
|---|
| 333 | */ | 
|---|
| 334 | pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL); | 
|---|
| 335 | /* Filter out unsuppored __PAGE_KERNEL* bits: */ | 
|---|
| 336 | pgprot_val(pte_prot) &= __supported_pte_mask; | 
|---|
| 337 | pte = pfn_pte(page_nr: pfn, pgprot: pte_prot); | 
|---|
| 338 | set_pte_at(mm, va, ptep, pte); | 
|---|
| 339 | pte_unmap_unlock(ptep, ptl); | 
|---|
| 340 | } | 
|---|
| 341 |  | 
|---|
| 342 | /* Propagate LDT mapping to the user page-table */ | 
|---|
| 343 | map_ldt_struct_to_user(mm); | 
|---|
| 344 |  | 
|---|
| 345 | ldt->slot = slot; | 
|---|
| 346 | return 0; | 
|---|
| 347 | } | 
|---|
| 348 |  | 
|---|
| 349 | static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) | 
|---|
| 350 | { | 
|---|
| 351 | unsigned long va; | 
|---|
| 352 | int i, nr_pages; | 
|---|
| 353 |  | 
|---|
| 354 | if (!ldt) | 
|---|
| 355 | return; | 
|---|
| 356 |  | 
|---|
| 357 | /* LDT map/unmap is only required for PTI */ | 
|---|
| 358 | if (!boot_cpu_has(X86_FEATURE_PTI)) | 
|---|
| 359 | return; | 
|---|
| 360 |  | 
|---|
| 361 | nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); | 
|---|
| 362 |  | 
|---|
| 363 | for (i = 0; i < nr_pages; i++) { | 
|---|
| 364 | unsigned long offset = i << PAGE_SHIFT; | 
|---|
| 365 | spinlock_t *ptl; | 
|---|
| 366 | pte_t *ptep; | 
|---|
| 367 |  | 
|---|
| 368 | va = (unsigned long)ldt_slot_va(slot: ldt->slot) + offset; | 
|---|
| 369 | ptep = get_locked_pte(mm, addr: va, ptl: &ptl); | 
|---|
| 370 | if (!WARN_ON_ONCE(!ptep)) { | 
|---|
| 371 | pte_clear(mm, va, ptep); | 
|---|
| 372 | pte_unmap_unlock(ptep, ptl); | 
|---|
| 373 | } | 
|---|
| 374 | } | 
|---|
| 375 |  | 
|---|
| 376 | va = (unsigned long)ldt_slot_va(slot: ldt->slot); | 
|---|
| 377 | flush_tlb_mm_range(mm, start: va, end: va + nr_pages * PAGE_SIZE, PAGE_SHIFT, freed_tables: false); | 
|---|
| 378 | } | 
|---|
| 379 |  | 
|---|
| 380 | #else /* !CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ | 
|---|
| 381 |  | 
|---|
| 382 | static int | 
|---|
| 383 | map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) | 
|---|
| 384 | { | 
|---|
| 385 | return 0; | 
|---|
| 386 | } | 
|---|
| 387 |  | 
|---|
| 388 | static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) | 
|---|
| 389 | { | 
|---|
| 390 | } | 
|---|
| 391 | #endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ | 
|---|
| 392 |  | 
|---|
| 393 | static void free_ldt_pgtables(struct mm_struct *mm) | 
|---|
| 394 | { | 
|---|
| 395 | #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION | 
|---|
| 396 | struct mmu_gather tlb; | 
|---|
| 397 | unsigned long start = LDT_BASE_ADDR; | 
|---|
| 398 | unsigned long end = LDT_END_ADDR; | 
|---|
| 399 |  | 
|---|
| 400 | if (!boot_cpu_has(X86_FEATURE_PTI)) | 
|---|
| 401 | return; | 
|---|
| 402 |  | 
|---|
| 403 | /* | 
|---|
| 404 | * Although free_pgd_range() is intended for freeing user | 
|---|
| 405 | * page-tables, it also works out for kernel mappings on x86. | 
|---|
| 406 | * We use tlb_gather_mmu_fullmm() to avoid confusing the | 
|---|
| 407 | * range-tracking logic in __tlb_adjust_range(). | 
|---|
| 408 | */ | 
|---|
| 409 | tlb_gather_mmu_fullmm(tlb: &tlb, mm); | 
|---|
| 410 | free_pgd_range(tlb: &tlb, addr: start, end, floor: start, ceiling: end); | 
|---|
| 411 | tlb_finish_mmu(tlb: &tlb); | 
|---|
| 412 | #endif | 
|---|
| 413 | } | 
|---|
| 414 |  | 
|---|
| 415 | /* After calling this, the LDT is immutable. */ | 
|---|
| 416 | static void finalize_ldt_struct(struct ldt_struct *ldt) | 
|---|
| 417 | { | 
|---|
| 418 | paravirt_alloc_ldt(ldt: ldt->entries, entries: ldt->nr_entries); | 
|---|
| 419 | } | 
|---|
| 420 |  | 
|---|
| 421 | static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt) | 
|---|
| 422 | { | 
|---|
| 423 | mutex_lock(lock: &mm->context.lock); | 
|---|
| 424 |  | 
|---|
| 425 | /* Synchronizes with READ_ONCE in load_mm_ldt. */ | 
|---|
| 426 | smp_store_release(&mm->context.ldt, ldt); | 
|---|
| 427 |  | 
|---|
| 428 | /* Activate the LDT for all CPUs using currents mm. */ | 
|---|
| 429 | on_each_cpu_mask(mask: mm_cpumask(mm), func: flush_ldt, info: mm, wait: true); | 
|---|
| 430 |  | 
|---|
| 431 | mutex_unlock(lock: &mm->context.lock); | 
|---|
| 432 | } | 
|---|
| 433 |  | 
|---|
| 434 | static void free_ldt_struct(struct ldt_struct *ldt) | 
|---|
| 435 | { | 
|---|
| 436 | if (likely(!ldt)) | 
|---|
| 437 | return; | 
|---|
| 438 |  | 
|---|
| 439 | paravirt_free_ldt(ldt: ldt->entries, entries: ldt->nr_entries); | 
|---|
| 440 | if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE) | 
|---|
| 441 | vfree_atomic(addr: ldt->entries); | 
|---|
| 442 | else | 
|---|
| 443 | free_page((unsigned long)ldt->entries); | 
|---|
| 444 | kfree(objp: ldt); | 
|---|
| 445 | } | 
|---|
| 446 |  | 
|---|
| 447 | /* | 
|---|
| 448 | * Called on fork from arch_dup_mmap(). Just copy the current LDT state, | 
|---|
| 449 | * the new task is not running, so nothing can be installed. | 
|---|
| 450 | */ | 
|---|
| 451 | int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm) | 
|---|
| 452 | { | 
|---|
| 453 | struct ldt_struct *new_ldt; | 
|---|
| 454 | int retval = 0; | 
|---|
| 455 |  | 
|---|
| 456 | if (!old_mm) | 
|---|
| 457 | return 0; | 
|---|
| 458 |  | 
|---|
| 459 | mutex_lock(lock: &old_mm->context.lock); | 
|---|
| 460 | if (!old_mm->context.ldt) | 
|---|
| 461 | goto out_unlock; | 
|---|
| 462 |  | 
|---|
| 463 | new_ldt = alloc_ldt_struct(num_entries: old_mm->context.ldt->nr_entries); | 
|---|
| 464 | if (!new_ldt) { | 
|---|
| 465 | retval = -ENOMEM; | 
|---|
| 466 | goto out_unlock; | 
|---|
| 467 | } | 
|---|
| 468 |  | 
|---|
| 469 | memcpy(to: new_ldt->entries, from: old_mm->context.ldt->entries, | 
|---|
| 470 | len: new_ldt->nr_entries * LDT_ENTRY_SIZE); | 
|---|
| 471 | finalize_ldt_struct(ldt: new_ldt); | 
|---|
| 472 |  | 
|---|
| 473 | retval = map_ldt_struct(mm, ldt: new_ldt, slot: 0); | 
|---|
| 474 | if (retval) { | 
|---|
| 475 | free_ldt_pgtables(mm); | 
|---|
| 476 | free_ldt_struct(ldt: new_ldt); | 
|---|
| 477 | goto out_unlock; | 
|---|
| 478 | } | 
|---|
| 479 | mm->context.ldt = new_ldt; | 
|---|
| 480 |  | 
|---|
| 481 | out_unlock: | 
|---|
| 482 | mutex_unlock(lock: &old_mm->context.lock); | 
|---|
| 483 | return retval; | 
|---|
| 484 | } | 
|---|
| 485 |  | 
|---|
| 486 | /* | 
|---|
| 487 | * No need to lock the MM as we are the last user | 
|---|
| 488 | * | 
|---|
| 489 | * 64bit: Don't touch the LDT register - we're already in the next thread. | 
|---|
| 490 | */ | 
|---|
| 491 | void destroy_context_ldt(struct mm_struct *mm) | 
|---|
| 492 | { | 
|---|
| 493 | free_ldt_struct(ldt: mm->context.ldt); | 
|---|
| 494 | mm->context.ldt = NULL; | 
|---|
| 495 | } | 
|---|
| 496 |  | 
|---|
| 497 | void ldt_arch_exit_mmap(struct mm_struct *mm) | 
|---|
| 498 | { | 
|---|
| 499 | free_ldt_pgtables(mm); | 
|---|
| 500 | } | 
|---|
| 501 |  | 
|---|
| 502 | static int read_ldt(void __user *ptr, unsigned long bytecount) | 
|---|
| 503 | { | 
|---|
| 504 | struct mm_struct *mm = current->mm; | 
|---|
| 505 | unsigned long entries_size; | 
|---|
| 506 | int retval; | 
|---|
| 507 |  | 
|---|
| 508 | down_read(sem: &mm->context.ldt_usr_sem); | 
|---|
| 509 |  | 
|---|
| 510 | if (!mm->context.ldt) { | 
|---|
| 511 | retval = 0; | 
|---|
| 512 | goto out_unlock; | 
|---|
| 513 | } | 
|---|
| 514 |  | 
|---|
| 515 | if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) | 
|---|
| 516 | bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; | 
|---|
| 517 |  | 
|---|
| 518 | entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE; | 
|---|
| 519 | if (entries_size > bytecount) | 
|---|
| 520 | entries_size = bytecount; | 
|---|
| 521 |  | 
|---|
| 522 | if (copy_to_user(to: ptr, from: mm->context.ldt->entries, n: entries_size)) { | 
|---|
| 523 | retval = -EFAULT; | 
|---|
| 524 | goto out_unlock; | 
|---|
| 525 | } | 
|---|
| 526 |  | 
|---|
| 527 | if (entries_size != bytecount) { | 
|---|
| 528 | /* Zero-fill the rest and pretend we read bytecount bytes. */ | 
|---|
| 529 | if (clear_user(to: ptr + entries_size, n: bytecount - entries_size)) { | 
|---|
| 530 | retval = -EFAULT; | 
|---|
| 531 | goto out_unlock; | 
|---|
| 532 | } | 
|---|
| 533 | } | 
|---|
| 534 | retval = bytecount; | 
|---|
| 535 |  | 
|---|
| 536 | out_unlock: | 
|---|
| 537 | up_read(sem: &mm->context.ldt_usr_sem); | 
|---|
| 538 | return retval; | 
|---|
| 539 | } | 
|---|
| 540 |  | 
|---|
| 541 | static int read_default_ldt(void __user *ptr, unsigned long bytecount) | 
|---|
| 542 | { | 
|---|
| 543 | /* CHECKME: Can we use _one_ random number ? */ | 
|---|
| 544 | #ifdef CONFIG_X86_32 | 
|---|
| 545 | unsigned long size = 5 * sizeof(struct desc_struct); | 
|---|
| 546 | #else | 
|---|
| 547 | unsigned long size = 128; | 
|---|
| 548 | #endif | 
|---|
| 549 | if (bytecount > size) | 
|---|
| 550 | bytecount = size; | 
|---|
| 551 | if (clear_user(to: ptr, n: bytecount)) | 
|---|
| 552 | return -EFAULT; | 
|---|
| 553 | return bytecount; | 
|---|
| 554 | } | 
|---|
| 555 |  | 
|---|
| 556 | static bool allow_16bit_segments(void) | 
|---|
| 557 | { | 
|---|
| 558 | if (!IS_ENABLED(CONFIG_X86_16BIT)) | 
|---|
| 559 | return false; | 
|---|
| 560 |  | 
|---|
| 561 | #ifdef CONFIG_XEN_PV | 
|---|
| 562 | /* | 
|---|
| 563 | * Xen PV does not implement ESPFIX64, which means that 16-bit | 
|---|
| 564 | * segments will not work correctly.  Until either Xen PV implements | 
|---|
| 565 | * ESPFIX64 and can signal this fact to the guest or unless someone | 
|---|
| 566 | * provides compelling evidence that allowing broken 16-bit segments | 
|---|
| 567 | * is worthwhile, disallow 16-bit segments under Xen PV. | 
|---|
| 568 | */ | 
|---|
| 569 | if (xen_pv_domain()) { | 
|---|
| 570 | pr_info_once( "Warning: 16-bit segments do not work correctly in a Xen PV guest\n"); | 
|---|
| 571 | return false; | 
|---|
| 572 | } | 
|---|
| 573 | #endif | 
|---|
| 574 |  | 
|---|
| 575 | return true; | 
|---|
| 576 | } | 
|---|
| 577 |  | 
|---|
| 578 | static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) | 
|---|
| 579 | { | 
|---|
| 580 | struct mm_struct *mm = current->mm; | 
|---|
| 581 | struct ldt_struct *new_ldt, *old_ldt; | 
|---|
| 582 | unsigned int old_nr_entries, new_nr_entries; | 
|---|
| 583 | struct user_desc ldt_info; | 
|---|
| 584 | struct desc_struct ldt; | 
|---|
| 585 | int error; | 
|---|
| 586 |  | 
|---|
| 587 | error = -EINVAL; | 
|---|
| 588 | if (bytecount != sizeof(ldt_info)) | 
|---|
| 589 | goto out; | 
|---|
| 590 | error = -EFAULT; | 
|---|
| 591 | if (copy_from_user(to: &ldt_info, from: ptr, n: sizeof(ldt_info))) | 
|---|
| 592 | goto out; | 
|---|
| 593 |  | 
|---|
| 594 | error = -EINVAL; | 
|---|
| 595 | if (ldt_info.entry_number >= LDT_ENTRIES) | 
|---|
| 596 | goto out; | 
|---|
| 597 | if (ldt_info.contents == 3) { | 
|---|
| 598 | if (oldmode) | 
|---|
| 599 | goto out; | 
|---|
| 600 | if (ldt_info.seg_not_present == 0) | 
|---|
| 601 | goto out; | 
|---|
| 602 | } | 
|---|
| 603 |  | 
|---|
| 604 | if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) || | 
|---|
| 605 | LDT_empty(&ldt_info)) { | 
|---|
| 606 | /* The user wants to clear the entry. */ | 
|---|
| 607 | memset(s: &ldt, c: 0, n: sizeof(ldt)); | 
|---|
| 608 | } else { | 
|---|
| 609 | if (!ldt_info.seg_32bit && !allow_16bit_segments()) { | 
|---|
| 610 | error = -EINVAL; | 
|---|
| 611 | goto out; | 
|---|
| 612 | } | 
|---|
| 613 |  | 
|---|
| 614 | fill_ldt(desc: &ldt, info: &ldt_info); | 
|---|
| 615 | if (oldmode) | 
|---|
| 616 | ldt.avl = 0; | 
|---|
| 617 | } | 
|---|
| 618 |  | 
|---|
| 619 | if (down_write_killable(sem: &mm->context.ldt_usr_sem)) | 
|---|
| 620 | return -EINTR; | 
|---|
| 621 |  | 
|---|
| 622 | old_ldt       = mm->context.ldt; | 
|---|
| 623 | old_nr_entries = old_ldt ? old_ldt->nr_entries : 0; | 
|---|
| 624 | new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries); | 
|---|
| 625 |  | 
|---|
| 626 | error = -ENOMEM; | 
|---|
| 627 | new_ldt = alloc_ldt_struct(num_entries: new_nr_entries); | 
|---|
| 628 | if (!new_ldt) | 
|---|
| 629 | goto out_unlock; | 
|---|
| 630 |  | 
|---|
| 631 | if (old_ldt) | 
|---|
| 632 | memcpy(to: new_ldt->entries, from: old_ldt->entries, len: old_nr_entries * LDT_ENTRY_SIZE); | 
|---|
| 633 |  | 
|---|
| 634 | new_ldt->entries[ldt_info.entry_number] = ldt; | 
|---|
| 635 | finalize_ldt_struct(ldt: new_ldt); | 
|---|
| 636 |  | 
|---|
| 637 | /* | 
|---|
| 638 | * If we are using PTI, map the new LDT into the userspace pagetables. | 
|---|
| 639 | * If there is already an LDT, use the other slot so that other CPUs | 
|---|
| 640 | * will continue to use the old LDT until install_ldt() switches | 
|---|
| 641 | * them over to the new LDT. | 
|---|
| 642 | */ | 
|---|
| 643 | error = map_ldt_struct(mm, ldt: new_ldt, slot: old_ldt ? !old_ldt->slot : 0); | 
|---|
| 644 | if (error) { | 
|---|
| 645 | /* | 
|---|
| 646 | * This only can fail for the first LDT setup. If an LDT is | 
|---|
| 647 | * already installed then the PTE page is already | 
|---|
| 648 | * populated. Mop up a half populated page table. | 
|---|
| 649 | */ | 
|---|
| 650 | if (!WARN_ON_ONCE(old_ldt)) | 
|---|
| 651 | free_ldt_pgtables(mm); | 
|---|
| 652 | free_ldt_struct(ldt: new_ldt); | 
|---|
| 653 | goto out_unlock; | 
|---|
| 654 | } | 
|---|
| 655 |  | 
|---|
| 656 | install_ldt(mm, ldt: new_ldt); | 
|---|
| 657 | unmap_ldt_struct(mm, ldt: old_ldt); | 
|---|
| 658 | free_ldt_struct(ldt: old_ldt); | 
|---|
| 659 | error = 0; | 
|---|
| 660 |  | 
|---|
| 661 | out_unlock: | 
|---|
| 662 | up_write(sem: &mm->context.ldt_usr_sem); | 
|---|
| 663 | out: | 
|---|
| 664 | return error; | 
|---|
| 665 | } | 
|---|
| 666 |  | 
|---|
| 667 | SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr , | 
|---|
| 668 | unsigned long , bytecount) | 
|---|
| 669 | { | 
|---|
| 670 | int ret = -ENOSYS; | 
|---|
| 671 |  | 
|---|
| 672 | switch (func) { | 
|---|
| 673 | case 0: | 
|---|
| 674 | ret = read_ldt(ptr, bytecount); | 
|---|
| 675 | break; | 
|---|
| 676 | case 1: | 
|---|
| 677 | ret = write_ldt(ptr, bytecount, oldmode: 1); | 
|---|
| 678 | break; | 
|---|
| 679 | case 2: | 
|---|
| 680 | ret = read_default_ldt(ptr, bytecount); | 
|---|
| 681 | break; | 
|---|
| 682 | case 0x11: | 
|---|
| 683 | ret = write_ldt(ptr, bytecount, oldmode: 0); | 
|---|
| 684 | break; | 
|---|
| 685 | } | 
|---|
| 686 | /* | 
|---|
| 687 | * The SYSCALL_DEFINE() macros give us an 'unsigned long' | 
|---|
| 688 | * return type, but the ABI for sys_modify_ldt() expects | 
|---|
| 689 | * 'int'.  This cast gives us an int-sized value in %rax | 
|---|
| 690 | * for the return code.  The 'unsigned' is necessary so | 
|---|
| 691 | * the compiler does not try to sign-extend the negative | 
|---|
| 692 | * return codes into the high half of the register when | 
|---|
| 693 | * taking the value from int->long. | 
|---|
| 694 | */ | 
|---|
| 695 | return (unsigned int)ret; | 
|---|
| 696 | } | 
|---|
| 697 |  | 
|---|