| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | 
|---|
| 2 | #ifndef _ASM_X86_SEGMENT_H | 
|---|
| 3 | #define _ASM_X86_SEGMENT_H | 
|---|
| 4 |  | 
|---|
| 5 | #include <linux/const.h> | 
|---|
| 6 | #include <asm/alternative.h> | 
|---|
| 7 | #include <asm/ibt.h> | 
|---|
| 8 |  | 
|---|
| 9 | /* | 
|---|
| 10 | * Constructor for a conventional segment GDT (or LDT) entry. | 
|---|
| 11 | * This is a macro so it can be used in initializers. | 
|---|
| 12 | */ | 
|---|
| 13 | #define GDT_ENTRY(flags, base, limit)			\ | 
|---|
| 14 | ((((base)  & _AC(0xff000000,ULL)) << (56-24)) |	\ | 
|---|
| 15 | (((flags) & _AC(0x0000f0ff,ULL)) << 40) |	\ | 
|---|
| 16 | (((limit) & _AC(0x000f0000,ULL)) << (48-16)) |	\ | 
|---|
| 17 | (((base)  & _AC(0x00ffffff,ULL)) << 16) |	\ | 
|---|
| 18 | (((limit) & _AC(0x0000ffff,ULL)))) | 
|---|
| 19 |  | 
|---|
| 20 | /* Simple and small GDT entries for booting only: */ | 
|---|
| 21 |  | 
|---|
| 22 | #define GDT_ENTRY_BOOT_CS	2 | 
|---|
| 23 | #define GDT_ENTRY_BOOT_DS	3 | 
|---|
| 24 | #define GDT_ENTRY_BOOT_TSS	4 | 
|---|
| 25 | #define __BOOT_CS		(GDT_ENTRY_BOOT_CS*8) | 
|---|
| 26 | #define __BOOT_DS		(GDT_ENTRY_BOOT_DS*8) | 
|---|
| 27 | #define __BOOT_TSS		(GDT_ENTRY_BOOT_TSS*8) | 
|---|
| 28 |  | 
|---|
| 29 | /* | 
|---|
| 30 | * Bottom two bits of selector give the ring | 
|---|
| 31 | * privilege level | 
|---|
| 32 | */ | 
|---|
| 33 | #define SEGMENT_RPL_MASK	0x3 | 
|---|
| 34 |  | 
|---|
| 35 | /* | 
|---|
| 36 | * When running on Xen PV, the actual privilege level of the kernel is 1, | 
|---|
| 37 | * not 0. Testing the Requested Privilege Level in a segment selector to | 
|---|
| 38 | * determine whether the context is user mode or kernel mode with | 
|---|
| 39 | * SEGMENT_RPL_MASK is wrong because the PV kernel's privilege level | 
|---|
| 40 | * matches the 0x3 mask. | 
|---|
| 41 | * | 
|---|
| 42 | * Testing with USER_SEGMENT_RPL_MASK is valid for both native and Xen PV | 
|---|
| 43 | * kernels because privilege level 2 is never used. | 
|---|
| 44 | */ | 
|---|
| 45 | #define USER_SEGMENT_RPL_MASK	0x2 | 
|---|
| 46 |  | 
|---|
| 47 | /* User mode is privilege level 3: */ | 
|---|
| 48 | #define USER_RPL		0x3 | 
|---|
| 49 |  | 
|---|
| 50 | /* Bit 2 is Table Indicator (TI): selects between LDT or GDT */ | 
|---|
| 51 | #define SEGMENT_TI_MASK		0x4 | 
|---|
| 52 | /* LDT segment has TI set ... */ | 
|---|
| 53 | #define SEGMENT_LDT		0x4 | 
|---|
| 54 | /* ... GDT has it cleared */ | 
|---|
| 55 | #define SEGMENT_GDT		0x0 | 
|---|
| 56 |  | 
|---|
| 57 | #define GDT_ENTRY_INVALID_SEG	0 | 
|---|
| 58 |  | 
|---|
| 59 | #if defined(CONFIG_X86_32) && !defined(BUILD_VDSO32_64) | 
|---|
| 60 | /* | 
|---|
| 61 | * The layout of the per-CPU GDT under Linux: | 
|---|
| 62 | * | 
|---|
| 63 | *   0 - null								<=== cacheline #1 | 
|---|
| 64 | *   1 - reserved | 
|---|
| 65 | *   2 - reserved | 
|---|
| 66 | *   3 - reserved | 
|---|
| 67 | * | 
|---|
| 68 | *   4 - unused								<=== cacheline #2 | 
|---|
| 69 | *   5 - unused | 
|---|
| 70 | * | 
|---|
| 71 | *  ------- start of TLS (Thread-Local Storage) segments: | 
|---|
| 72 | * | 
|---|
| 73 | *   6 - TLS segment #1			[ glibc's TLS segment ] | 
|---|
| 74 | *   7 - TLS segment #2			[ Wine's %fs Win32 segment ] | 
|---|
| 75 | *   8 - TLS segment #3							<=== cacheline #3 | 
|---|
| 76 | *   9 - reserved | 
|---|
| 77 | *  10 - reserved | 
|---|
| 78 | *  11 - reserved | 
|---|
| 79 | * | 
|---|
| 80 | *  ------- start of kernel segments: | 
|---|
| 81 | * | 
|---|
| 82 | *  12 - kernel code segment						<=== cacheline #4 | 
|---|
| 83 | *  13 - kernel data segment | 
|---|
| 84 | *  14 - default user CS | 
|---|
| 85 | *  15 - default user DS | 
|---|
| 86 | *  16 - TSS								<=== cacheline #5 | 
|---|
| 87 | *  17 - LDT | 
|---|
| 88 | *  18 - PNPBIOS support (16->32 gate) | 
|---|
| 89 | *  19 - PNPBIOS support | 
|---|
| 90 | *  20 - PNPBIOS support						<=== cacheline #6 | 
|---|
| 91 | *  21 - PNPBIOS support | 
|---|
| 92 | *  22 - PNPBIOS support | 
|---|
| 93 | *  23 - APM BIOS support | 
|---|
| 94 | *  24 - APM BIOS support						<=== cacheline #7 | 
|---|
| 95 | *  25 - APM BIOS support | 
|---|
| 96 | * | 
|---|
| 97 | *  26 - ESPFIX small SS | 
|---|
| 98 | *  27 - per-cpu			[ offset to per-cpu data area ] | 
|---|
| 99 | *  28 - VDSO getcpu | 
|---|
| 100 | *  29 - unused | 
|---|
| 101 | *  30 - unused | 
|---|
| 102 | *  31 - TSS for double fault handler | 
|---|
| 103 | */ | 
|---|
| 104 | #define GDT_ENTRY_TLS_MIN		6 | 
|---|
| 105 | #define GDT_ENTRY_TLS_MAX 		(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) | 
|---|
| 106 |  | 
|---|
| 107 | #define GDT_ENTRY_KERNEL_CS		12 | 
|---|
| 108 | #define GDT_ENTRY_KERNEL_DS		13 | 
|---|
| 109 | #define GDT_ENTRY_DEFAULT_USER_CS	14 | 
|---|
| 110 | #define GDT_ENTRY_DEFAULT_USER_DS	15 | 
|---|
| 111 | #define GDT_ENTRY_TSS			16 | 
|---|
| 112 | #define GDT_ENTRY_LDT			17 | 
|---|
| 113 | #define GDT_ENTRY_PNPBIOS_CS32		18 | 
|---|
| 114 | #define GDT_ENTRY_PNPBIOS_CS16		19 | 
|---|
| 115 | #define GDT_ENTRY_PNPBIOS_DS		20 | 
|---|
| 116 | #define GDT_ENTRY_PNPBIOS_TS1		21 | 
|---|
| 117 | #define GDT_ENTRY_PNPBIOS_TS2		22 | 
|---|
| 118 | #define GDT_ENTRY_APMBIOS_BASE		23 | 
|---|
| 119 |  | 
|---|
| 120 | #define GDT_ENTRY_ESPFIX_SS		26 | 
|---|
| 121 | #define GDT_ENTRY_PERCPU		27 | 
|---|
| 122 | #define GDT_ENTRY_CPUNODE		28 | 
|---|
| 123 |  | 
|---|
| 124 | #define GDT_ENTRY_DOUBLEFAULT_TSS	31 | 
|---|
| 125 |  | 
|---|
| 126 | /* | 
|---|
| 127 | * Number of entries in the GDT table: | 
|---|
| 128 | */ | 
|---|
| 129 | #define GDT_ENTRIES			32 | 
|---|
| 130 |  | 
|---|
| 131 | /* | 
|---|
| 132 | * Segment selector values corresponding to the above entries: | 
|---|
| 133 | */ | 
|---|
| 134 |  | 
|---|
| 135 | #define __KERNEL_CS			(GDT_ENTRY_KERNEL_CS*8) | 
|---|
| 136 | #define __KERNEL_DS			(GDT_ENTRY_KERNEL_DS*8) | 
|---|
| 137 | #define __USER_DS			(GDT_ENTRY_DEFAULT_USER_DS*8 + 3) | 
|---|
| 138 | #define __USER_CS			(GDT_ENTRY_DEFAULT_USER_CS*8 + 3) | 
|---|
| 139 | #define __USER32_CS			__USER_CS | 
|---|
| 140 | #define __ESPFIX_SS			(GDT_ENTRY_ESPFIX_SS*8) | 
|---|
| 141 |  | 
|---|
| 142 | /* segment for calling fn: */ | 
|---|
| 143 | #define PNP_CS32			(GDT_ENTRY_PNPBIOS_CS32*8) | 
|---|
| 144 | /* code segment for BIOS: */ | 
|---|
| 145 | #define PNP_CS16			(GDT_ENTRY_PNPBIOS_CS16*8) | 
|---|
| 146 |  | 
|---|
| 147 | /* "Is this PNP code selector (PNP_CS32 or PNP_CS16)?" */ | 
|---|
| 148 | #define SEGMENT_IS_PNP_CODE(x)		(((x) & 0xf4) == PNP_CS32) | 
|---|
| 149 |  | 
|---|
| 150 | /* data segment for BIOS: */ | 
|---|
| 151 | #define PNP_DS				(GDT_ENTRY_PNPBIOS_DS*8) | 
|---|
| 152 | /* transfer data segment: */ | 
|---|
| 153 | #define PNP_TS1				(GDT_ENTRY_PNPBIOS_TS1*8) | 
|---|
| 154 | /* another data segment: */ | 
|---|
| 155 | #define PNP_TS2				(GDT_ENTRY_PNPBIOS_TS2*8) | 
|---|
| 156 |  | 
|---|
| 157 | #ifdef CONFIG_SMP | 
|---|
| 158 | # define __KERNEL_PERCPU		(GDT_ENTRY_PERCPU*8) | 
|---|
| 159 | #else | 
|---|
| 160 | # define __KERNEL_PERCPU		0 | 
|---|
| 161 | #endif | 
|---|
| 162 |  | 
|---|
| 163 | #define __CPUNODE_SEG			(GDT_ENTRY_CPUNODE*8 + 3) | 
|---|
| 164 |  | 
|---|
| 165 | #else /* 64-bit: */ | 
|---|
| 166 |  | 
|---|
| 167 | #include <asm/cache.h> | 
|---|
| 168 |  | 
|---|
| 169 | #define GDT_ENTRY_KERNEL32_CS		1 | 
|---|
| 170 | #define GDT_ENTRY_KERNEL_CS		2 | 
|---|
| 171 | #define GDT_ENTRY_KERNEL_DS		3 | 
|---|
| 172 |  | 
|---|
| 173 | /* | 
|---|
| 174 | * We cannot use the same code segment descriptor for user and kernel mode, | 
|---|
| 175 | * not even in long flat mode, because of different DPL. | 
|---|
| 176 | * | 
|---|
| 177 | * GDT layout to get 64-bit SYSCALL/SYSRET support right. SYSRET hardcodes | 
|---|
| 178 | * selectors: | 
|---|
| 179 | * | 
|---|
| 180 | *   if returning to 32-bit userspace: cs = STAR.SYSRET_CS, | 
|---|
| 181 | *   if returning to 64-bit userspace: cs = STAR.SYSRET_CS+16, | 
|---|
| 182 | * | 
|---|
| 183 | * ss = STAR.SYSRET_CS+8 (in either case) | 
|---|
| 184 | * | 
|---|
| 185 | * thus USER_DS should be between 32-bit and 64-bit code selectors: | 
|---|
| 186 | */ | 
|---|
| 187 | #define GDT_ENTRY_DEFAULT_USER32_CS	4 | 
|---|
| 188 | #define GDT_ENTRY_DEFAULT_USER_DS	5 | 
|---|
| 189 | #define GDT_ENTRY_DEFAULT_USER_CS	6 | 
|---|
| 190 |  | 
|---|
| 191 | /* Needs two entries */ | 
|---|
| 192 | #define GDT_ENTRY_TSS			8 | 
|---|
| 193 | /* Needs two entries */ | 
|---|
| 194 | #define GDT_ENTRY_LDT			10 | 
|---|
| 195 |  | 
|---|
| 196 | #define GDT_ENTRY_TLS_MIN		12 | 
|---|
| 197 | #define GDT_ENTRY_TLS_MAX		14 | 
|---|
| 198 |  | 
|---|
| 199 | #define GDT_ENTRY_CPUNODE		15 | 
|---|
| 200 |  | 
|---|
| 201 | /* | 
|---|
| 202 | * Number of entries in the GDT table: | 
|---|
| 203 | */ | 
|---|
| 204 | #define GDT_ENTRIES			16 | 
|---|
| 205 |  | 
|---|
| 206 | /* | 
|---|
| 207 | * Segment selector values corresponding to the above entries: | 
|---|
| 208 | * | 
|---|
| 209 | * Note, selectors also need to have a correct RPL, | 
|---|
| 210 | * expressed with the +3 value for user-space selectors: | 
|---|
| 211 | */ | 
|---|
| 212 | #define __KERNEL32_CS			(GDT_ENTRY_KERNEL32_CS*8) | 
|---|
| 213 | #define __KERNEL_CS			(GDT_ENTRY_KERNEL_CS*8) | 
|---|
| 214 | #define __KERNEL_DS			(GDT_ENTRY_KERNEL_DS*8) | 
|---|
| 215 | #define __USER32_CS			(GDT_ENTRY_DEFAULT_USER32_CS*8 + 3) | 
|---|
| 216 | #define __USER_DS			(GDT_ENTRY_DEFAULT_USER_DS*8 + 3) | 
|---|
| 217 | #define __USER_CS			(GDT_ENTRY_DEFAULT_USER_CS*8 + 3) | 
|---|
| 218 | #define __CPUNODE_SEG			(GDT_ENTRY_CPUNODE*8 + 3) | 
|---|
| 219 |  | 
|---|
| 220 | #endif | 
|---|
| 221 |  | 
|---|
| 222 | #define IDT_ENTRIES			256 | 
|---|
| 223 | #define NUM_EXCEPTION_VECTORS		32 | 
|---|
| 224 |  | 
|---|
| 225 | /* Bitmask of exception vectors which push an error code on the stack: */ | 
|---|
| 226 | #define EXCEPTION_ERRCODE_MASK		0x20027d00 | 
|---|
| 227 |  | 
|---|
| 228 | #define GDT_SIZE			(GDT_ENTRIES*8) | 
|---|
| 229 | #define GDT_ENTRY_TLS_ENTRIES		3 | 
|---|
| 230 | #define TLS_SIZE			(GDT_ENTRY_TLS_ENTRIES* 8) | 
|---|
| 231 |  | 
|---|
| 232 | /* Bit size and mask of CPU number stored in the per CPU data (and TSC_AUX) */ | 
|---|
| 233 | #define VDSO_CPUNODE_BITS		12 | 
|---|
| 234 | #define VDSO_CPUNODE_MASK		0xfff | 
|---|
| 235 |  | 
|---|
| 236 | #ifndef __ASSEMBLER__ | 
|---|
| 237 |  | 
|---|
| 238 | /* Helper functions to store/load CPU and node numbers */ | 
|---|
| 239 |  | 
|---|
| 240 | static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node) | 
|---|
| 241 | { | 
|---|
| 242 | return (node << VDSO_CPUNODE_BITS) | cpu; | 
|---|
| 243 | } | 
|---|
| 244 |  | 
|---|
| 245 | static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) | 
|---|
| 246 | { | 
|---|
| 247 | unsigned long p; | 
|---|
| 248 |  | 
|---|
| 249 | /* | 
|---|
| 250 | * Load CPU and node number from the GDT.  LSL is faster than RDTSCP | 
|---|
| 251 | * and works on all CPUs.  This is volatile so that it orders | 
|---|
| 252 | * correctly with respect to barrier() and to keep GCC from cleverly | 
|---|
| 253 | * hoisting it out of the calling function. | 
|---|
| 254 | * | 
|---|
| 255 | * If RDPID is available, use it. | 
|---|
| 256 | */ | 
|---|
| 257 | alternative_io ( "lsl %[seg],%k[p]", | 
|---|
| 258 | "rdpid %[p]", | 
|---|
| 259 | X86_FEATURE_RDPID, | 
|---|
| 260 | [p] "=r"(p), [seg] "r"(__CPUNODE_SEG)); | 
|---|
| 261 |  | 
|---|
| 262 | if (cpu) | 
|---|
| 263 | *cpu = (p & VDSO_CPUNODE_MASK); | 
|---|
| 264 | if (node) | 
|---|
| 265 | *node = (p >> VDSO_CPUNODE_BITS); | 
|---|
| 266 | } | 
|---|
| 267 |  | 
|---|
| 268 | #endif /* !__ASSEMBLER__ */ | 
|---|
| 269 |  | 
|---|
| 270 | #ifdef __KERNEL__ | 
|---|
| 271 |  | 
|---|
| 272 | /* | 
|---|
| 273 | * early_idt_handler_array is an array of entry points referenced in the | 
|---|
| 274 | * early IDT.  For simplicity, it's a real array with one entry point | 
|---|
| 275 | * every nine bytes.  That leaves room for an optional 'push $0' if the | 
|---|
| 276 | * vector has no error code (two bytes), a 'push $vector_number' (two | 
|---|
| 277 | * bytes), and a jump to the common entry code (up to five bytes). | 
|---|
| 278 | */ | 
|---|
| 279 | #define EARLY_IDT_HANDLER_SIZE (9 + ENDBR_INSN_SIZE) | 
|---|
| 280 |  | 
|---|
| 281 | /* | 
|---|
| 282 | * xen_early_idt_handler_array is for Xen pv guests: for each entry in | 
|---|
| 283 | * early_idt_handler_array it contains a prequel in the form of | 
|---|
| 284 | * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to | 
|---|
| 285 | * max 8 bytes. | 
|---|
| 286 | */ | 
|---|
| 287 | #define XEN_EARLY_IDT_HANDLER_SIZE (8 + ENDBR_INSN_SIZE) | 
|---|
| 288 |  | 
|---|
| 289 | #ifndef __ASSEMBLER__ | 
|---|
| 290 |  | 
|---|
| 291 | extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; | 
|---|
| 292 | extern void early_ignore_irq(void); | 
|---|
| 293 |  | 
|---|
| 294 | #ifdef CONFIG_XEN_PV | 
|---|
| 295 | extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE]; | 
|---|
| 296 | #endif | 
|---|
| 297 |  | 
|---|
| 298 | /* | 
|---|
| 299 | * Load a segment. Fall back on loading the zero segment if something goes | 
|---|
| 300 | * wrong.  This variant assumes that loading zero fully clears the segment. | 
|---|
| 301 | * This is always the case on Intel CPUs and, even on 64-bit AMD CPUs, any | 
|---|
| 302 | * failure to fully clear the cached descriptor is only observable for | 
|---|
| 303 | * FS and GS. | 
|---|
| 304 | */ | 
|---|
| 305 | #define __loadsegment_simple(seg, value)				\ | 
|---|
| 306 | do {									\ | 
|---|
| 307 | unsigned short __val = (value);					\ | 
|---|
| 308 | \ | 
|---|
| 309 | asm volatile("						\n"	\ | 
|---|
| 310 | "1:	movl %k0,%%" #seg "		\n"	\ | 
|---|
| 311 | _ASM_EXTABLE_TYPE_REG(1b, 1b, EX_TYPE_ZERO_REG, %k0)\ | 
|---|
| 312 | : "+r" (__val) : : "memory");			\ | 
|---|
| 313 | } while (0) | 
|---|
| 314 |  | 
|---|
| 315 | #define __loadsegment_ss(value) __loadsegment_simple(ss, (value)) | 
|---|
| 316 | #define __loadsegment_ds(value) __loadsegment_simple(ds, (value)) | 
|---|
| 317 | #define __loadsegment_es(value) __loadsegment_simple(es, (value)) | 
|---|
| 318 |  | 
|---|
| 319 | #ifdef CONFIG_X86_32 | 
|---|
| 320 |  | 
|---|
| 321 | /* | 
|---|
| 322 | * On 32-bit systems, the hidden parts of FS and GS are unobservable if | 
|---|
| 323 | * the selector is NULL, so there's no funny business here. | 
|---|
| 324 | */ | 
|---|
| 325 | #define __loadsegment_fs(value) __loadsegment_simple(fs, (value)) | 
|---|
| 326 | #define __loadsegment_gs(value) __loadsegment_simple(gs, (value)) | 
|---|
| 327 |  | 
|---|
| 328 | #else | 
|---|
| 329 |  | 
|---|
| 330 | static inline void __loadsegment_fs(unsigned short value) | 
|---|
| 331 | { | 
|---|
| 332 | asm volatile( "						\n" | 
|---|
| 333 | "1:	movw %0, %%fs			\n" | 
|---|
| 334 | "2:					\n" | 
|---|
| 335 |  | 
|---|
| 336 | _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_CLEAR_FS) | 
|---|
| 337 |  | 
|---|
| 338 | : : "rm"(value) : "memory"); | 
|---|
| 339 | } | 
|---|
| 340 |  | 
|---|
| 341 | /* __loadsegment_gs is intentionally undefined.  Use load_gs_index instead. */ | 
|---|
| 342 |  | 
|---|
| 343 | #endif | 
|---|
| 344 |  | 
|---|
| 345 | #define loadsegment(seg, value) __loadsegment_ ## seg (value) | 
|---|
| 346 |  | 
|---|
| 347 | /* | 
|---|
| 348 | * Save a segment register away: | 
|---|
| 349 | */ | 
|---|
| 350 | #define savesegment(seg, value)				\ | 
|---|
| 351 | asm("mov %%" #seg ",%0":"=r" (value) : : "memory") | 
|---|
| 352 |  | 
|---|
| 353 | #endif /* !__ASSEMBLER__ */ | 
|---|
| 354 | #endif /* __KERNEL__ */ | 
|---|
| 355 |  | 
|---|
| 356 | #endif /* _ASM_X86_SEGMENT_H */ | 
|---|
| 357 |  | 
|---|