| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | 
|---|
| 2 |  | 
|---|
| 3 | #ifndef _ASM_X86_NOSPEC_BRANCH_H_ | 
|---|
| 4 | #define _ASM_X86_NOSPEC_BRANCH_H_ | 
|---|
| 5 |  | 
|---|
| 6 | #include <linux/static_key.h> | 
|---|
| 7 | #include <linux/objtool.h> | 
|---|
| 8 | #include <linux/linkage.h> | 
|---|
| 9 |  | 
|---|
| 10 | #include <asm/alternative.h> | 
|---|
| 11 | #include <asm/cpufeatures.h> | 
|---|
| 12 | #include <asm/msr-index.h> | 
|---|
| 13 | #include <asm/unwind_hints.h> | 
|---|
| 14 | #include <asm/percpu.h> | 
|---|
| 15 |  | 
|---|
| 16 | /* | 
|---|
| 17 | * Call depth tracking for Intel SKL CPUs to address the RSB underflow | 
|---|
| 18 | * issue in software. | 
|---|
| 19 | * | 
|---|
| 20 | * The tracking does not use a counter. It uses uses arithmetic shift | 
|---|
| 21 | * right on call entry and logical shift left on return. | 
|---|
| 22 | * | 
|---|
| 23 | * The depth tracking variable is initialized to 0x8000.... when the call | 
|---|
| 24 | * depth is zero. The arithmetic shift right sign extends the MSB and | 
|---|
| 25 | * saturates after the 12th call. The shift count is 5 for both directions | 
|---|
| 26 | * so the tracking covers 12 nested calls. | 
|---|
| 27 | * | 
|---|
| 28 | *  Call | 
|---|
| 29 | *  0: 0x8000000000000000	0x0000000000000000 | 
|---|
| 30 | *  1: 0xfc00000000000000	0xf000000000000000 | 
|---|
| 31 | * ... | 
|---|
| 32 | * 11: 0xfffffffffffffff8	0xfffffffffffffc00 | 
|---|
| 33 | * 12: 0xffffffffffffffff	0xffffffffffffffe0 | 
|---|
| 34 | * | 
|---|
| 35 | * After a return buffer fill the depth is credited 12 calls before the | 
|---|
| 36 | * next stuffing has to take place. | 
|---|
| 37 | * | 
|---|
| 38 | * There is a inaccuracy for situations like this: | 
|---|
| 39 | * | 
|---|
| 40 | *  10 calls | 
|---|
| 41 | *   5 returns | 
|---|
| 42 | *   3 calls | 
|---|
| 43 | *   4 returns | 
|---|
| 44 | *   3 calls | 
|---|
| 45 | *   .... | 
|---|
| 46 | * | 
|---|
| 47 | * The shift count might cause this to be off by one in either direction, | 
|---|
| 48 | * but there is still a cushion vs. the RSB depth. The algorithm does not | 
|---|
| 49 | * claim to be perfect and it can be speculated around by the CPU, but it | 
|---|
| 50 | * is considered that it obfuscates the problem enough to make exploitation | 
|---|
| 51 | * extremely difficult. | 
|---|
| 52 | */ | 
|---|
| 53 | #define RET_DEPTH_SHIFT			5 | 
|---|
| 54 | #define RSB_RET_STUFF_LOOPS		16 | 
|---|
| 55 | #define RET_DEPTH_INIT			0x8000000000000000ULL | 
|---|
| 56 | #define RET_DEPTH_INIT_FROM_CALL	0xfc00000000000000ULL | 
|---|
| 57 | #define RET_DEPTH_CREDIT		0xffffffffffffffffULL | 
|---|
| 58 |  | 
|---|
| 59 | #ifdef CONFIG_CALL_THUNKS_DEBUG | 
|---|
| 60 | # define CALL_THUNKS_DEBUG_INC_CALLS				\ | 
|---|
| 61 | incq	PER_CPU_VAR(__x86_call_count); | 
|---|
| 62 | # define CALL_THUNKS_DEBUG_INC_RETS				\ | 
|---|
| 63 | incq	PER_CPU_VAR(__x86_ret_count); | 
|---|
| 64 | # define CALL_THUNKS_DEBUG_INC_STUFFS				\ | 
|---|
| 65 | incq	PER_CPU_VAR(__x86_stuffs_count); | 
|---|
| 66 | # define CALL_THUNKS_DEBUG_INC_CTXSW				\ | 
|---|
| 67 | incq	PER_CPU_VAR(__x86_ctxsw_count); | 
|---|
| 68 | #else | 
|---|
| 69 | # define CALL_THUNKS_DEBUG_INC_CALLS | 
|---|
| 70 | # define CALL_THUNKS_DEBUG_INC_RETS | 
|---|
| 71 | # define CALL_THUNKS_DEBUG_INC_STUFFS | 
|---|
| 72 | # define CALL_THUNKS_DEBUG_INC_CTXSW | 
|---|
| 73 | #endif | 
|---|
| 74 |  | 
|---|
| 75 | #if defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS) | 
|---|
| 76 |  | 
|---|
| 77 | #include <asm/asm-offsets.h> | 
|---|
| 78 |  | 
|---|
| 79 | #define CREDIT_CALL_DEPTH					\ | 
|---|
| 80 | movq	$-1, PER_CPU_VAR(__x86_call_depth); | 
|---|
| 81 |  | 
|---|
| 82 | #define RESET_CALL_DEPTH					\ | 
|---|
| 83 | xor	%eax, %eax;					\ | 
|---|
| 84 | bts	$63, %rax;					\ | 
|---|
| 85 | movq	%rax, PER_CPU_VAR(__x86_call_depth); | 
|---|
| 86 |  | 
|---|
| 87 | #define RESET_CALL_DEPTH_FROM_CALL				\ | 
|---|
| 88 | movb	$0xfc, %al;					\ | 
|---|
| 89 | shl	$56, %rax;					\ | 
|---|
| 90 | movq	%rax, PER_CPU_VAR(__x86_call_depth);		\ | 
|---|
| 91 | CALL_THUNKS_DEBUG_INC_CALLS | 
|---|
| 92 |  | 
|---|
| 93 | #define INCREMENT_CALL_DEPTH					\ | 
|---|
| 94 | sarq	$5, PER_CPU_VAR(__x86_call_depth);		\ | 
|---|
| 95 | CALL_THUNKS_DEBUG_INC_CALLS | 
|---|
| 96 |  | 
|---|
| 97 | #else | 
|---|
| 98 | #define CREDIT_CALL_DEPTH | 
|---|
| 99 | #define RESET_CALL_DEPTH | 
|---|
| 100 | #define RESET_CALL_DEPTH_FROM_CALL | 
|---|
| 101 | #define INCREMENT_CALL_DEPTH | 
|---|
| 102 | #endif | 
|---|
| 103 |  | 
|---|
| 104 | /* | 
|---|
| 105 | * Fill the CPU return stack buffer. | 
|---|
| 106 | * | 
|---|
| 107 | * Each entry in the RSB, if used for a speculative 'ret', contains an | 
|---|
| 108 | * infinite 'pause; lfence; jmp' loop to capture speculative execution. | 
|---|
| 109 | * | 
|---|
| 110 | * This is required in various cases for retpoline and IBRS-based | 
|---|
| 111 | * mitigations for the Spectre variant 2 vulnerability. Sometimes to | 
|---|
| 112 | * eliminate potentially bogus entries from the RSB, and sometimes | 
|---|
| 113 | * purely to ensure that it doesn't get empty, which on some CPUs would | 
|---|
| 114 | * allow predictions from other (unwanted!) sources to be used. | 
|---|
| 115 | * | 
|---|
| 116 | * We define a CPP macro such that it can be used from both .S files and | 
|---|
| 117 | * inline assembly. It's possible to do a .macro and then include that | 
|---|
| 118 | * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. | 
|---|
| 119 | */ | 
|---|
| 120 |  | 
|---|
| 121 | #define RETPOLINE_THUNK_SIZE	32 | 
|---|
| 122 | #define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */ | 
|---|
| 123 |  | 
|---|
| 124 | /* | 
|---|
| 125 | * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN. | 
|---|
| 126 | */ | 
|---|
| 127 | #define __FILL_RETURN_SLOT			\ | 
|---|
| 128 | ANNOTATE_INTRA_FUNCTION_CALL;		\ | 
|---|
| 129 | call	772f;				\ | 
|---|
| 130 | int3;					\ | 
|---|
| 131 | 772: | 
|---|
| 132 |  | 
|---|
| 133 | /* | 
|---|
| 134 | * Stuff the entire RSB. | 
|---|
| 135 | * | 
|---|
| 136 | * Google experimented with loop-unrolling and this turned out to be | 
|---|
| 137 | * the optimal version - two calls, each with their own speculation | 
|---|
| 138 | * trap should their return address end up getting used, in a loop. | 
|---|
| 139 | */ | 
|---|
| 140 | #ifdef CONFIG_X86_64 | 
|---|
| 141 | #define __FILL_RETURN_BUFFER(reg, nr)			\ | 
|---|
| 142 | mov	$(nr/2), reg;				\ | 
|---|
| 143 | 771:							\ | 
|---|
| 144 | __FILL_RETURN_SLOT				\ | 
|---|
| 145 | __FILL_RETURN_SLOT				\ | 
|---|
| 146 | add	$(BITS_PER_LONG/8) * 2, %_ASM_SP;	\ | 
|---|
| 147 | dec	reg;					\ | 
|---|
| 148 | jnz	771b;					\ | 
|---|
| 149 | /* barrier for jnz misprediction */		\ | 
|---|
| 150 | lfence;						\ | 
|---|
| 151 | CREDIT_CALL_DEPTH				\ | 
|---|
| 152 | CALL_THUNKS_DEBUG_INC_CTXSW | 
|---|
| 153 | #else | 
|---|
| 154 | /* | 
|---|
| 155 | * i386 doesn't unconditionally have LFENCE, as such it can't | 
|---|
| 156 | * do a loop. | 
|---|
| 157 | */ | 
|---|
| 158 | #define __FILL_RETURN_BUFFER(reg, nr)			\ | 
|---|
| 159 | .rept nr;					\ | 
|---|
| 160 | __FILL_RETURN_SLOT;				\ | 
|---|
| 161 | .endr;						\ | 
|---|
| 162 | add	$(BITS_PER_LONG/8) * nr, %_ASM_SP; | 
|---|
| 163 | #endif | 
|---|
| 164 |  | 
|---|
| 165 | /* | 
|---|
| 166 | * Stuff a single RSB slot. | 
|---|
| 167 | * | 
|---|
| 168 | * To mitigate Post-Barrier RSB speculation, one CALL instruction must be | 
|---|
| 169 | * forced to retire before letting a RET instruction execute. | 
|---|
| 170 | * | 
|---|
| 171 | * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed | 
|---|
| 172 | * before this point. | 
|---|
| 173 | */ | 
|---|
| 174 | #define __FILL_ONE_RETURN				\ | 
|---|
| 175 | __FILL_RETURN_SLOT				\ | 
|---|
| 176 | add	$(BITS_PER_LONG/8), %_ASM_SP;		\ | 
|---|
| 177 | lfence; | 
|---|
| 178 |  | 
|---|
| 179 | #ifdef __ASSEMBLER__ | 
|---|
| 180 |  | 
|---|
| 181 | /* | 
|---|
| 182 | * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions | 
|---|
| 183 | * vs RETBleed validation. | 
|---|
| 184 | */ | 
|---|
| 185 | #define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE | 
|---|
| 186 |  | 
|---|
| 187 | /* | 
|---|
| 188 | * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should | 
|---|
| 189 | * eventually turn into its own annotation. | 
|---|
| 190 | */ | 
|---|
| 191 | .macro VALIDATE_UNRET_END | 
|---|
| 192 | #if defined(CONFIG_NOINSTR_VALIDATION) && \ | 
|---|
| 193 | (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) | 
|---|
| 194 | ANNOTATE_RETPOLINE_SAFE | 
|---|
| 195 | nop | 
|---|
| 196 | #endif | 
|---|
| 197 | .endm | 
|---|
| 198 |  | 
|---|
| 199 | /* | 
|---|
| 200 | * Emits a conditional CS prefix that is compatible with | 
|---|
| 201 | * -mindirect-branch-cs-prefix. | 
|---|
| 202 | */ | 
|---|
| 203 | .macro __CS_PREFIX reg:req | 
|---|
| 204 | .irp rs,r8,r9,r10,r11,r12,r13,r14,r15 | 
|---|
| 205 | .ifc \reg,\rs | 
|---|
| 206 | .byte 0x2e | 
|---|
| 207 | .endif | 
|---|
| 208 | .endr | 
|---|
| 209 | .endm | 
|---|
| 210 |  | 
|---|
| 211 | /* | 
|---|
| 212 | * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple | 
|---|
| 213 | * indirect jmp/call which may be susceptible to the Spectre variant 2 | 
|---|
| 214 | * attack. | 
|---|
| 215 | * | 
|---|
| 216 | * NOTE: these do not take kCFI into account and are thus not comparable to C | 
|---|
| 217 | * indirect calls, take care when using. The target of these should be an ENDBR | 
|---|
| 218 | * instruction irrespective of kCFI. | 
|---|
| 219 | */ | 
|---|
| 220 | .macro JMP_NOSPEC reg:req | 
|---|
| 221 | #ifdef CONFIG_MITIGATION_RETPOLINE | 
|---|
| 222 | __CS_PREFIX \reg | 
|---|
| 223 | jmp	__x86_indirect_thunk_\reg | 
|---|
| 224 | #else | 
|---|
| 225 | jmp	*%\reg | 
|---|
| 226 | int3 | 
|---|
| 227 | #endif | 
|---|
| 228 | .endm | 
|---|
| 229 |  | 
|---|
| 230 | .macro CALL_NOSPEC reg:req | 
|---|
| 231 | #ifdef CONFIG_MITIGATION_RETPOLINE | 
|---|
| 232 | __CS_PREFIX \reg | 
|---|
| 233 | call	__x86_indirect_thunk_\reg | 
|---|
| 234 | #else | 
|---|
| 235 | call	*%\reg | 
|---|
| 236 | #endif | 
|---|
| 237 | .endm | 
|---|
| 238 |  | 
|---|
| 239 | /* | 
|---|
| 240 | * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP | 
|---|
| 241 | * monstrosity above, manually. | 
|---|
| 242 | */ | 
|---|
| 243 | .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS) | 
|---|
| 244 | ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \ | 
|---|
| 245 | __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \ | 
|---|
| 246 | __stringify(nop;nop;__FILL_ONE_RETURN), \ftr2 | 
|---|
| 247 |  | 
|---|
| 248 | .Lskip_rsb_\@: | 
|---|
| 249 | .endm | 
|---|
| 250 |  | 
|---|
| 251 | /* | 
|---|
| 252 | * The CALL to srso_alias_untrain_ret() must be patched in directly at | 
|---|
| 253 | * the spot where untraining must be done, ie., srso_alias_untrain_ret() | 
|---|
| 254 | * must be the target of a CALL instruction instead of indirectly | 
|---|
| 255 | * jumping to a wrapper which then calls it. Therefore, this macro is | 
|---|
| 256 | * called outside of __UNTRAIN_RET below, for the time being, before the | 
|---|
| 257 | * kernel can support nested alternatives with arbitrary nesting. | 
|---|
| 258 | */ | 
|---|
| 259 | .macro CALL_UNTRAIN_RET | 
|---|
| 260 | #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) | 
|---|
| 261 | ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \ | 
|---|
| 262 | "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS | 
|---|
| 263 | #endif | 
|---|
| 264 | .endm | 
|---|
| 265 |  | 
|---|
| 266 | /* | 
|---|
| 267 | * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the | 
|---|
| 268 | * return thunk isn't mapped into the userspace tables (then again, AMD | 
|---|
| 269 | * typically has NO_MELTDOWN). | 
|---|
| 270 | * | 
|---|
| 271 | * While retbleed_untrain_ret() doesn't clobber anything but requires stack, | 
|---|
| 272 | * write_ibpb() will clobber AX, CX, DX. | 
|---|
| 273 | * | 
|---|
| 274 | * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point | 
|---|
| 275 | * where we have a stack but before any RET instruction. | 
|---|
| 276 | */ | 
|---|
| 277 | .macro __UNTRAIN_RET ibpb_feature, call_depth_insns | 
|---|
| 278 | #if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY) | 
|---|
| 279 | VALIDATE_UNRET_END | 
|---|
| 280 | CALL_UNTRAIN_RET | 
|---|
| 281 | ALTERNATIVE_2 "",						\ | 
|---|
| 282 | "call write_ibpb", \ibpb_feature,			\ | 
|---|
| 283 | __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH | 
|---|
| 284 | #endif | 
|---|
| 285 | .endm | 
|---|
| 286 |  | 
|---|
| 287 | #define UNTRAIN_RET \ | 
|---|
| 288 | __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH) | 
|---|
| 289 |  | 
|---|
| 290 | #define UNTRAIN_RET_VM \ | 
|---|
| 291 | __UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH) | 
|---|
| 292 |  | 
|---|
| 293 | #define UNTRAIN_RET_FROM_CALL \ | 
|---|
| 294 | __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL) | 
|---|
| 295 |  | 
|---|
| 296 |  | 
|---|
| 297 | .macro CALL_DEPTH_ACCOUNT | 
|---|
| 298 | #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING | 
|---|
| 299 | ALTERNATIVE "",							\ | 
|---|
| 300 | __stringify(INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH | 
|---|
| 301 | #endif | 
|---|
| 302 | .endm | 
|---|
| 303 |  | 
|---|
| 304 | /* | 
|---|
| 305 | * Macro to execute VERW insns that mitigate transient data sampling | 
|---|
| 306 | * attacks such as MDS or TSA. On affected systems a microcode update | 
|---|
| 307 | * overloaded VERW insns to also clear the CPU buffers. VERW clobbers | 
|---|
| 308 | * CFLAGS.ZF. | 
|---|
| 309 | * Note: Only the memory operand variant of VERW clears the CPU buffers. | 
|---|
| 310 | */ | 
|---|
| 311 | .macro __CLEAR_CPU_BUFFERS feature | 
|---|
| 312 | #ifdef CONFIG_X86_64 | 
|---|
| 313 | ALTERNATIVE "", "verw x86_verw_sel(%rip)", \feature | 
|---|
| 314 | #else | 
|---|
| 315 | /* | 
|---|
| 316 | * In 32bit mode, the memory operand must be a %cs reference. The data | 
|---|
| 317 | * segments may not be usable (vm86 mode), and the stack segment may not | 
|---|
| 318 | * be flat (ESPFIX32). | 
|---|
| 319 | */ | 
|---|
| 320 | ALTERNATIVE "", "verw %cs:x86_verw_sel", \feature | 
|---|
| 321 | #endif | 
|---|
| 322 | .endm | 
|---|
| 323 |  | 
|---|
| 324 | #define CLEAR_CPU_BUFFERS \ | 
|---|
| 325 | __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF | 
|---|
| 326 |  | 
|---|
| 327 | #define VM_CLEAR_CPU_BUFFERS \ | 
|---|
| 328 | __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF_VM | 
|---|
| 329 |  | 
|---|
| 330 | #ifdef CONFIG_X86_64 | 
|---|
| 331 | .macro CLEAR_BRANCH_HISTORY | 
|---|
| 332 | ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP | 
|---|
| 333 | .endm | 
|---|
| 334 |  | 
|---|
| 335 | .macro CLEAR_BRANCH_HISTORY_VMEXIT | 
|---|
| 336 | ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_VMEXIT | 
|---|
| 337 | .endm | 
|---|
| 338 | #else | 
|---|
| 339 | #define CLEAR_BRANCH_HISTORY | 
|---|
| 340 | #define CLEAR_BRANCH_HISTORY_VMEXIT | 
|---|
| 341 | #endif | 
|---|
| 342 |  | 
|---|
| 343 | #else /* __ASSEMBLER__ */ | 
|---|
| 344 |  | 
|---|
| 345 | #define ITS_THUNK_SIZE	64 | 
|---|
| 346 |  | 
|---|
| 347 | typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; | 
|---|
| 348 | typedef u8 its_thunk_t[ITS_THUNK_SIZE]; | 
|---|
| 349 | extern retpoline_thunk_t __x86_indirect_thunk_array[]; | 
|---|
| 350 | extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; | 
|---|
| 351 | extern retpoline_thunk_t __x86_indirect_jump_thunk_array[]; | 
|---|
| 352 | extern its_thunk_t	 __x86_indirect_its_thunk_array[]; | 
|---|
| 353 |  | 
|---|
| 354 | #ifdef CONFIG_MITIGATION_RETHUNK | 
|---|
| 355 | extern void __x86_return_thunk(void); | 
|---|
| 356 | #else | 
|---|
| 357 | static inline void __x86_return_thunk(void) {} | 
|---|
| 358 | #endif | 
|---|
| 359 |  | 
|---|
| 360 | #ifdef CONFIG_MITIGATION_UNRET_ENTRY | 
|---|
| 361 | extern void retbleed_return_thunk(void); | 
|---|
| 362 | #else | 
|---|
| 363 | static inline void retbleed_return_thunk(void) {} | 
|---|
| 364 | #endif | 
|---|
| 365 |  | 
|---|
| 366 | extern void srso_alias_untrain_ret(void); | 
|---|
| 367 |  | 
|---|
| 368 | #ifdef CONFIG_MITIGATION_SRSO | 
|---|
| 369 | extern void srso_return_thunk(void); | 
|---|
| 370 | extern void srso_alias_return_thunk(void); | 
|---|
| 371 | #else | 
|---|
| 372 | static inline void srso_return_thunk(void) {} | 
|---|
| 373 | static inline void srso_alias_return_thunk(void) {} | 
|---|
| 374 | #endif | 
|---|
| 375 |  | 
|---|
| 376 | #ifdef CONFIG_MITIGATION_ITS | 
|---|
| 377 | extern void its_return_thunk(void); | 
|---|
| 378 | #else | 
|---|
| 379 | static inline void its_return_thunk(void) {} | 
|---|
| 380 | #endif | 
|---|
| 381 |  | 
|---|
| 382 | extern void retbleed_return_thunk(void); | 
|---|
| 383 | extern void srso_return_thunk(void); | 
|---|
| 384 | extern void srso_alias_return_thunk(void); | 
|---|
| 385 |  | 
|---|
| 386 | extern void entry_untrain_ret(void); | 
|---|
| 387 | extern void write_ibpb(void); | 
|---|
| 388 |  | 
|---|
| 389 | #ifdef CONFIG_X86_64 | 
|---|
| 390 | extern void clear_bhb_loop(void); | 
|---|
| 391 | #endif | 
|---|
| 392 |  | 
|---|
| 393 | extern void (*x86_return_thunk)(void); | 
|---|
| 394 |  | 
|---|
| 395 | extern void __warn_thunk(void); | 
|---|
| 396 |  | 
|---|
| 397 | #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING | 
|---|
| 398 | extern void call_depth_return_thunk(void); | 
|---|
| 399 |  | 
|---|
| 400 | #define CALL_DEPTH_ACCOUNT					\ | 
|---|
| 401 | ALTERNATIVE("",						\ | 
|---|
| 402 | __stringify(INCREMENT_CALL_DEPTH),		\ | 
|---|
| 403 | X86_FEATURE_CALL_DEPTH) | 
|---|
| 404 |  | 
|---|
| 405 | DECLARE_PER_CPU_CACHE_HOT(u64, __x86_call_depth); | 
|---|
| 406 |  | 
|---|
| 407 | #ifdef CONFIG_CALL_THUNKS_DEBUG | 
|---|
| 408 | DECLARE_PER_CPU(u64, __x86_call_count); | 
|---|
| 409 | DECLARE_PER_CPU(u64, __x86_ret_count); | 
|---|
| 410 | DECLARE_PER_CPU(u64, __x86_stuffs_count); | 
|---|
| 411 | DECLARE_PER_CPU(u64, __x86_ctxsw_count); | 
|---|
| 412 | #endif | 
|---|
| 413 | #else /* !CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ | 
|---|
| 414 |  | 
|---|
| 415 | static inline void call_depth_return_thunk(void) {} | 
|---|
| 416 | #define CALL_DEPTH_ACCOUNT "" | 
|---|
| 417 |  | 
|---|
| 418 | #endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ | 
|---|
| 419 |  | 
|---|
| 420 | #ifdef CONFIG_MITIGATION_RETPOLINE | 
|---|
| 421 |  | 
|---|
| 422 | #define GEN(reg) \ | 
|---|
| 423 | extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; | 
|---|
| 424 | #include <asm/GEN-for-each-reg.h> | 
|---|
| 425 | #undef GEN | 
|---|
| 426 |  | 
|---|
| 427 | #define GEN(reg)						\ | 
|---|
| 428 | extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg; | 
|---|
| 429 | #include <asm/GEN-for-each-reg.h> | 
|---|
| 430 | #undef GEN | 
|---|
| 431 |  | 
|---|
| 432 | #define GEN(reg)						\ | 
|---|
| 433 | extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg; | 
|---|
| 434 | #include <asm/GEN-for-each-reg.h> | 
|---|
| 435 | #undef GEN | 
|---|
| 436 |  | 
|---|
| 437 | #ifdef CONFIG_X86_64 | 
|---|
| 438 |  | 
|---|
| 439 | /* | 
|---|
| 440 | * Emits a conditional CS prefix that is compatible with | 
|---|
| 441 | * -mindirect-branch-cs-prefix. | 
|---|
| 442 | */ | 
|---|
| 443 | #define __CS_PREFIX(reg)				\ | 
|---|
| 444 | ".irp rs,r8,r9,r10,r11,r12,r13,r14,r15\n"	\ | 
|---|
| 445 | ".ifc \\rs," reg "\n"				\ | 
|---|
| 446 | ".byte 0x2e\n"					\ | 
|---|
| 447 | ".endif\n"					\ | 
|---|
| 448 | ".endr\n" | 
|---|
| 449 |  | 
|---|
| 450 | /* | 
|---|
| 451 | * Inline asm uses the %V modifier which is only in newer GCC | 
|---|
| 452 | * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined. | 
|---|
| 453 | */ | 
|---|
| 454 | #define CALL_NOSPEC	__CS_PREFIX("%V[thunk_target]")	\ | 
|---|
| 455 | "call __x86_indirect_thunk_%V[thunk_target]\n" | 
|---|
| 456 |  | 
|---|
| 457 | # define THUNK_TARGET(addr) [thunk_target] "r" (addr) | 
|---|
| 458 |  | 
|---|
| 459 | #else /* CONFIG_X86_32 */ | 
|---|
| 460 | /* | 
|---|
| 461 | * For i386 we use the original ret-equivalent retpoline, because | 
|---|
| 462 | * otherwise we'll run out of registers. We don't care about CET | 
|---|
| 463 | * here, anyway. | 
|---|
| 464 | */ | 
|---|
| 465 | # define CALL_NOSPEC						\ | 
|---|
| 466 | ALTERNATIVE_2(						\ | 
|---|
| 467 | ANNOTATE_RETPOLINE_SAFE					\ | 
|---|
| 468 | "call *%[thunk_target]\n",				\ | 
|---|
| 469 | "       jmp    904f;\n"					\ | 
|---|
| 470 | "       .align 16\n"					\ | 
|---|
| 471 | "901:	call   903f;\n"					\ | 
|---|
| 472 | "902:	pause;\n"					\ | 
|---|
| 473 | "    	lfence;\n"					\ | 
|---|
| 474 | "       jmp    902b;\n"					\ | 
|---|
| 475 | "       .align 16\n"					\ | 
|---|
| 476 | "903:	lea    4(%%esp), %%esp;\n"			\ | 
|---|
| 477 | "       pushl  %[thunk_target];\n"			\ | 
|---|
| 478 | "       ret;\n"						\ | 
|---|
| 479 | "       .align 16\n"					\ | 
|---|
| 480 | "904:	call   901b;\n",				\ | 
|---|
| 481 | X86_FEATURE_RETPOLINE,					\ | 
|---|
| 482 | "lfence;\n"						\ | 
|---|
| 483 | ANNOTATE_RETPOLINE_SAFE					\ | 
|---|
| 484 | "call *%[thunk_target]\n",				\ | 
|---|
| 485 | X86_FEATURE_RETPOLINE_LFENCE) | 
|---|
| 486 |  | 
|---|
| 487 | # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) | 
|---|
| 488 | #endif | 
|---|
| 489 | #else /* No retpoline for C / inline asm */ | 
|---|
| 490 | # define CALL_NOSPEC "call *%[thunk_target]\n" | 
|---|
| 491 | # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) | 
|---|
| 492 | #endif | 
|---|
| 493 |  | 
|---|
| 494 | /* The Spectre V2 mitigation variants */ | 
|---|
| 495 | enum spectre_v2_mitigation { | 
|---|
| 496 | SPECTRE_V2_NONE, | 
|---|
| 497 | SPECTRE_V2_RETPOLINE, | 
|---|
| 498 | SPECTRE_V2_LFENCE, | 
|---|
| 499 | SPECTRE_V2_EIBRS, | 
|---|
| 500 | SPECTRE_V2_EIBRS_RETPOLINE, | 
|---|
| 501 | SPECTRE_V2_EIBRS_LFENCE, | 
|---|
| 502 | SPECTRE_V2_IBRS, | 
|---|
| 503 | }; | 
|---|
| 504 |  | 
|---|
| 505 | /* The indirect branch speculation control variants */ | 
|---|
| 506 | enum spectre_v2_user_mitigation { | 
|---|
| 507 | SPECTRE_V2_USER_NONE, | 
|---|
| 508 | SPECTRE_V2_USER_STRICT, | 
|---|
| 509 | SPECTRE_V2_USER_STRICT_PREFERRED, | 
|---|
| 510 | SPECTRE_V2_USER_PRCTL, | 
|---|
| 511 | SPECTRE_V2_USER_SECCOMP, | 
|---|
| 512 | }; | 
|---|
| 513 |  | 
|---|
| 514 | /* The Speculative Store Bypass disable variants */ | 
|---|
| 515 | enum ssb_mitigation { | 
|---|
| 516 | SPEC_STORE_BYPASS_NONE, | 
|---|
| 517 | SPEC_STORE_BYPASS_AUTO, | 
|---|
| 518 | SPEC_STORE_BYPASS_DISABLE, | 
|---|
| 519 | SPEC_STORE_BYPASS_PRCTL, | 
|---|
| 520 | SPEC_STORE_BYPASS_SECCOMP, | 
|---|
| 521 | }; | 
|---|
| 522 |  | 
|---|
| 523 | static __always_inline | 
|---|
| 524 | void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) | 
|---|
| 525 | { | 
|---|
| 526 | asm volatile(ALTERNATIVE( "", "wrmsr", %c[feature]) | 
|---|
| 527 | : : "c"(msr), | 
|---|
| 528 | "a"((u32)val), | 
|---|
| 529 | "d"((u32)(val >> 32)), | 
|---|
| 530 | [feature] "i"(feature) | 
|---|
| 531 | : "memory"); | 
|---|
| 532 | } | 
|---|
| 533 |  | 
|---|
| 534 | DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user); | 
|---|
| 535 |  | 
|---|
| 536 | static inline void indirect_branch_prediction_barrier(void) | 
|---|
| 537 | { | 
|---|
| 538 | asm_inline volatile(ALTERNATIVE( "", "call write_ibpb", X86_FEATURE_IBPB) | 
|---|
| 539 | : ASM_CALL_CONSTRAINT | 
|---|
| 540 | :: "rax", "rcx", "rdx", "memory"); | 
|---|
| 541 | } | 
|---|
| 542 |  | 
|---|
| 543 | /* The Intel SPEC CTRL MSR base value cache */ | 
|---|
| 544 | extern u64 x86_spec_ctrl_base; | 
|---|
| 545 | DECLARE_PER_CPU(u64, x86_spec_ctrl_current); | 
|---|
| 546 | extern void update_spec_ctrl_cond(u64 val); | 
|---|
| 547 | extern u64 spec_ctrl_current(void); | 
|---|
| 548 |  | 
|---|
| 549 | /* | 
|---|
| 550 | * With retpoline, we must use IBRS to restrict branch prediction | 
|---|
| 551 | * before calling into firmware. | 
|---|
| 552 | * | 
|---|
| 553 | * (Implemented as CPP macros due to header hell.) | 
|---|
| 554 | */ | 
|---|
| 555 | #define firmware_restrict_branch_speculation_start()			\ | 
|---|
| 556 | do {									\ | 
|---|
| 557 | preempt_disable();						\ | 
|---|
| 558 | alternative_msr_write(MSR_IA32_SPEC_CTRL,			\ | 
|---|
| 559 | spec_ctrl_current() | SPEC_CTRL_IBRS,	\ | 
|---|
| 560 | X86_FEATURE_USE_IBRS_FW);			\ | 
|---|
| 561 | alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,		\ | 
|---|
| 562 | X86_FEATURE_USE_IBPB_FW);			\ | 
|---|
| 563 | } while (0) | 
|---|
| 564 |  | 
|---|
| 565 | #define firmware_restrict_branch_speculation_end()			\ | 
|---|
| 566 | do {									\ | 
|---|
| 567 | alternative_msr_write(MSR_IA32_SPEC_CTRL,			\ | 
|---|
| 568 | spec_ctrl_current(),			\ | 
|---|
| 569 | X86_FEATURE_USE_IBRS_FW);			\ | 
|---|
| 570 | preempt_enable();						\ | 
|---|
| 571 | } while (0) | 
|---|
| 572 |  | 
|---|
| 573 | DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); | 
|---|
| 574 | DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); | 
|---|
| 575 | DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); | 
|---|
| 576 |  | 
|---|
| 577 | DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb); | 
|---|
| 578 |  | 
|---|
| 579 | DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear); | 
|---|
| 580 |  | 
|---|
| 581 | DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); | 
|---|
| 582 |  | 
|---|
| 583 | DECLARE_STATIC_KEY_FALSE(cpu_buf_vm_clear); | 
|---|
| 584 |  | 
|---|
| 585 | extern u16 x86_verw_sel; | 
|---|
| 586 |  | 
|---|
| 587 | #include <asm/segment.h> | 
|---|
| 588 |  | 
|---|
| 589 | /** | 
|---|
| 590 | * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns | 
|---|
| 591 | * | 
|---|
| 592 | * This uses the otherwise unused and obsolete VERW instruction in | 
|---|
| 593 | * combination with microcode which triggers a CPU buffer flush when the | 
|---|
| 594 | * instruction is executed. | 
|---|
| 595 | */ | 
|---|
| 596 | static __always_inline void x86_clear_cpu_buffers(void) | 
|---|
| 597 | { | 
|---|
| 598 | static const u16 ds = __KERNEL_DS; | 
|---|
| 599 |  | 
|---|
| 600 | /* | 
|---|
| 601 | * Has to be the memory-operand variant because only that | 
|---|
| 602 | * guarantees the CPU buffer flush functionality according to | 
|---|
| 603 | * documentation. The register-operand variant does not. | 
|---|
| 604 | * Works with any segment selector, but a valid writable | 
|---|
| 605 | * data segment is the fastest variant. | 
|---|
| 606 | * | 
|---|
| 607 | * "cc" clobber is required because VERW modifies ZF. | 
|---|
| 608 | */ | 
|---|
| 609 | asm volatile( "verw %[ds]": : [ds] "m"(ds) : "cc"); | 
|---|
| 610 | } | 
|---|
| 611 |  | 
|---|
| 612 | /** | 
|---|
| 613 | * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS | 
|---|
| 614 | * and TSA vulnerabilities. | 
|---|
| 615 | * | 
|---|
| 616 | * Clear CPU buffers if the corresponding static key is enabled | 
|---|
| 617 | */ | 
|---|
| 618 | static __always_inline void x86_idle_clear_cpu_buffers(void) | 
|---|
| 619 | { | 
|---|
| 620 | if (static_branch_likely(&cpu_buf_idle_clear)) | 
|---|
| 621 | x86_clear_cpu_buffers(); | 
|---|
| 622 | } | 
|---|
| 623 |  | 
|---|
| 624 | #endif /* __ASSEMBLER__ */ | 
|---|
| 625 |  | 
|---|
| 626 | #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ | 
|---|
| 627 |  | 
|---|