| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | 
|---|
| 2 | /* | 
|---|
| 3 | *  linux/arch/x86/kernel/head_64.S -- start in 32bit and switch to 64bit | 
|---|
| 4 | * | 
|---|
| 5 | *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | 
|---|
| 6 | *  Copyright (C) 2000 Pavel Machek <pavel@suse.cz> | 
|---|
| 7 | *  Copyright (C) 2000 Karsten Keil <kkeil@suse.de> | 
|---|
| 8 | *  Copyright (C) 2001,2002 Andi Kleen <ak@suse.de> | 
|---|
| 9 | *  Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com> | 
|---|
| 10 | */ | 
|---|
| 11 |  | 
|---|
| 12 | #include <linux/export.h> | 
|---|
| 13 | #include <linux/linkage.h> | 
|---|
| 14 | #include <linux/threads.h> | 
|---|
| 15 | #include <linux/init.h> | 
|---|
| 16 | #include <linux/pgtable.h> | 
|---|
| 17 | #include <asm/segment.h> | 
|---|
| 18 | #include <asm/page.h> | 
|---|
| 19 | #include <asm/msr.h> | 
|---|
| 20 | #include <asm/cache.h> | 
|---|
| 21 | #include <asm/processor-flags.h> | 
|---|
| 22 | #include <asm/percpu.h> | 
|---|
| 23 | #include <asm/nops.h> | 
|---|
| 24 | #include "../entry/calling.h" | 
|---|
| 25 | #include <asm/nospec-branch.h> | 
|---|
| 26 | #include <asm/apicdef.h> | 
|---|
| 27 | #include <asm/fixmap.h> | 
|---|
| 28 | #include <asm/smp.h> | 
|---|
| 29 | #include <asm/thread_info.h> | 
|---|
| 30 |  | 
|---|
| 31 | /* | 
|---|
| 32 | * We are not able to switch in one step to the final KERNEL ADDRESS SPACE | 
|---|
| 33 | * because we need identity-mapped pages. | 
|---|
| 34 | */ | 
|---|
| 35 |  | 
|---|
| 36 | __INIT | 
|---|
| 37 | .code64 | 
|---|
| 38 | SYM_CODE_START_NOALIGN(startup_64) | 
|---|
| 39 | UNWIND_HINT_END_OF_STACK | 
|---|
| 40 | /* | 
|---|
| 41 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, | 
|---|
| 42 | * and someone has loaded an identity mapped page table | 
|---|
| 43 | * for us.  These identity mapped page tables map all of the | 
|---|
| 44 | * kernel pages and possibly all of memory. | 
|---|
| 45 | * | 
|---|
| 46 | * %RSI holds the physical address of the boot_params structure | 
|---|
| 47 | * provided by the bootloader. Preserve it in %R15 so C function calls | 
|---|
| 48 | * will not clobber it. | 
|---|
| 49 | * | 
|---|
| 50 | * We come here either directly from a 64bit bootloader, or from | 
|---|
| 51 | * arch/x86/boot/compressed/head_64.S. | 
|---|
| 52 | * | 
|---|
| 53 | * We only come here initially at boot nothing else comes here. | 
|---|
| 54 | * | 
|---|
| 55 | * Since we may be loaded at an address different from what we were | 
|---|
| 56 | * compiled to run at we first fixup the physical addresses in our page | 
|---|
| 57 | * tables and then reload them. | 
|---|
| 58 | */ | 
|---|
| 59 | mov	%rsi, %r15 | 
|---|
| 60 |  | 
|---|
| 61 | /* Set up the stack for verify_cpu() */ | 
|---|
| 62 | leaq	__top_init_kernel_stack(%rip), %rsp | 
|---|
| 63 |  | 
|---|
| 64 | /* | 
|---|
| 65 | * Set up GSBASE. | 
|---|
| 66 | * Note that on SMP the boot CPU uses the init data section until | 
|---|
| 67 | * the per-CPU areas are set up. | 
|---|
| 68 | */ | 
|---|
| 69 | movl	$MSR_GS_BASE, %ecx | 
|---|
| 70 | xorl	%eax, %eax | 
|---|
| 71 | xorl	%edx, %edx | 
|---|
| 72 | wrmsr | 
|---|
| 73 |  | 
|---|
| 74 | call	__pi_startup_64_setup_gdt_idt | 
|---|
| 75 |  | 
|---|
| 76 | /* Now switch to __KERNEL_CS so IRET works reliably */ | 
|---|
| 77 | pushq	$__KERNEL_CS | 
|---|
| 78 | leaq	.Lon_kernel_cs(%rip), %rax | 
|---|
| 79 | pushq	%rax | 
|---|
| 80 | lretq | 
|---|
| 81 |  | 
|---|
| 82 | .Lon_kernel_cs: | 
|---|
| 83 | ANNOTATE_NOENDBR | 
|---|
| 84 | UNWIND_HINT_END_OF_STACK | 
|---|
| 85 |  | 
|---|
| 86 | #ifdef CONFIG_AMD_MEM_ENCRYPT | 
|---|
| 87 | /* | 
|---|
| 88 | * Activate SEV/SME memory encryption if supported/enabled. This needs to | 
|---|
| 89 | * be done now, since this also includes setup of the SEV-SNP CPUID table, | 
|---|
| 90 | * which needs to be done before any CPUID instructions are executed in | 
|---|
| 91 | * subsequent code. Pass the boot_params pointer as the first argument. | 
|---|
| 92 | */ | 
|---|
| 93 | movq	%r15, %rdi | 
|---|
| 94 | call	__pi_sme_enable | 
|---|
| 95 | #endif | 
|---|
| 96 |  | 
|---|
| 97 | /* Sanitize CPU configuration */ | 
|---|
| 98 | call verify_cpu | 
|---|
| 99 |  | 
|---|
| 100 | /* | 
|---|
| 101 | * Derive the kernel's physical-to-virtual offset from the physical and | 
|---|
| 102 | * virtual addresses of common_startup_64(). | 
|---|
| 103 | */ | 
|---|
| 104 | leaq	common_startup_64(%rip), %rdi | 
|---|
| 105 | subq	.Lcommon_startup_64(%rip), %rdi | 
|---|
| 106 |  | 
|---|
| 107 | /* | 
|---|
| 108 | * Perform pagetable fixups. Additionally, if SME is active, encrypt | 
|---|
| 109 | * the kernel and retrieve the modifier (SME encryption mask if SME | 
|---|
| 110 | * is active) to be added to the initial pgdir entry that will be | 
|---|
| 111 | * programmed into CR3. | 
|---|
| 112 | */ | 
|---|
| 113 | movq	%r15, %rsi | 
|---|
| 114 | call	__pi___startup_64 | 
|---|
| 115 |  | 
|---|
| 116 | /* Form the CR3 value being sure to include the CR3 modifier */ | 
|---|
| 117 | leaq	early_top_pgt(%rip), %rcx | 
|---|
| 118 | addq	%rcx, %rax | 
|---|
| 119 |  | 
|---|
| 120 | #ifdef CONFIG_AMD_MEM_ENCRYPT | 
|---|
| 121 | mov	%rax, %rdi | 
|---|
| 122 |  | 
|---|
| 123 | /* | 
|---|
| 124 | * For SEV guests: Verify that the C-bit is correct. A malicious | 
|---|
| 125 | * hypervisor could lie about the C-bit position to perform a ROP | 
|---|
| 126 | * attack on the guest by writing to the unencrypted stack and wait for | 
|---|
| 127 | * the next RET instruction. | 
|---|
| 128 | */ | 
|---|
| 129 | call	sev_verify_cbit | 
|---|
| 130 | #endif | 
|---|
| 131 |  | 
|---|
| 132 | /* | 
|---|
| 133 | * Switch to early_top_pgt which still has the identity mappings | 
|---|
| 134 | * present. | 
|---|
| 135 | */ | 
|---|
| 136 | movq	%rax, %cr3 | 
|---|
| 137 |  | 
|---|
| 138 | /* Branch to the common startup code at its kernel virtual address */ | 
|---|
| 139 | ANNOTATE_RETPOLINE_SAFE | 
|---|
| 140 | jmp	*.Lcommon_startup_64(%rip) | 
|---|
| 141 | SYM_CODE_END(startup_64) | 
|---|
| 142 |  | 
|---|
| 143 | __INITRODATA | 
|---|
| 144 | SYM_DATA_LOCAL(.Lcommon_startup_64, .quad common_startup_64) | 
|---|
| 145 |  | 
|---|
| 146 | .text | 
|---|
| 147 | SYM_CODE_START(secondary_startup_64) | 
|---|
| 148 | UNWIND_HINT_END_OF_STACK | 
|---|
| 149 | ANNOTATE_NOENDBR | 
|---|
| 150 | /* | 
|---|
| 151 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, | 
|---|
| 152 | * and someone has loaded a mapped page table. | 
|---|
| 153 | * | 
|---|
| 154 | * We come here either from startup_64 (using physical addresses) | 
|---|
| 155 | * or from trampoline.S (using virtual addresses). | 
|---|
| 156 | * | 
|---|
| 157 | * Using virtual addresses from trampoline.S removes the need | 
|---|
| 158 | * to have any identity mapped pages in the kernel page table | 
|---|
| 159 | * after the boot processor executes this code. | 
|---|
| 160 | */ | 
|---|
| 161 |  | 
|---|
| 162 | /* Sanitize CPU configuration */ | 
|---|
| 163 | call verify_cpu | 
|---|
| 164 |  | 
|---|
| 165 | /* | 
|---|
| 166 | * The secondary_startup_64_no_verify entry point is only used by | 
|---|
| 167 | * SEV-ES guests. In those guests the call to verify_cpu() would cause | 
|---|
| 168 | * #VC exceptions which can not be handled at this stage of secondary | 
|---|
| 169 | * CPU bringup. | 
|---|
| 170 | * | 
|---|
| 171 | * All non SEV-ES systems, especially Intel systems, need to execute | 
|---|
| 172 | * verify_cpu() above to make sure NX is enabled. | 
|---|
| 173 | */ | 
|---|
| 174 | SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) | 
|---|
| 175 | UNWIND_HINT_END_OF_STACK | 
|---|
| 176 | ANNOTATE_NOENDBR | 
|---|
| 177 |  | 
|---|
| 178 | /* Clear %R15 which holds the boot_params pointer on the boot CPU */ | 
|---|
| 179 | xorl	%r15d, %r15d | 
|---|
| 180 |  | 
|---|
| 181 | /* Derive the runtime physical address of init_top_pgt[] */ | 
|---|
| 182 | movq	phys_base(%rip), %rax | 
|---|
| 183 | addq	$(init_top_pgt - __START_KERNEL_map), %rax | 
|---|
| 184 |  | 
|---|
| 185 | /* | 
|---|
| 186 | * Retrieve the modifier (SME encryption mask if SME is active) to be | 
|---|
| 187 | * added to the initial pgdir entry that will be programmed into CR3. | 
|---|
| 188 | */ | 
|---|
| 189 | #ifdef CONFIG_AMD_MEM_ENCRYPT | 
|---|
| 190 | addq	sme_me_mask(%rip), %rax | 
|---|
| 191 | #endif | 
|---|
| 192 | /* | 
|---|
| 193 | * Switch to the init_top_pgt here, away from the trampoline_pgd and | 
|---|
| 194 | * unmap the identity mapped ranges. | 
|---|
| 195 | */ | 
|---|
| 196 | movq	%rax, %cr3 | 
|---|
| 197 |  | 
|---|
| 198 | SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL) | 
|---|
| 199 | UNWIND_HINT_END_OF_STACK | 
|---|
| 200 | ANNOTATE_NOENDBR | 
|---|
| 201 |  | 
|---|
| 202 | /* | 
|---|
| 203 | * Create a mask of CR4 bits to preserve. Omit PGE in order to flush | 
|---|
| 204 | * global 1:1 translations from the TLBs. | 
|---|
| 205 | * | 
|---|
| 206 | * From the SDM: | 
|---|
| 207 | * "If CR4.PGE is changing from 0 to 1, there were no global TLB | 
|---|
| 208 | *  entries before the execution; if CR4.PGE is changing from 1 to 0, | 
|---|
| 209 | *  there will be no global TLB entries after the execution." | 
|---|
| 210 | */ | 
|---|
| 211 | movl	$(X86_CR4_PAE | X86_CR4_LA57), %edx | 
|---|
| 212 | #ifdef CONFIG_X86_MCE | 
|---|
| 213 | /* | 
|---|
| 214 | * Preserve CR4.MCE if the kernel will enable #MC support. | 
|---|
| 215 | * Clearing MCE may fault in some environments (that also force #MC | 
|---|
| 216 | * support). Any machine check that occurs before #MC support is fully | 
|---|
| 217 | * configured will crash the system regardless of the CR4.MCE value set | 
|---|
| 218 | * here. | 
|---|
| 219 | */ | 
|---|
| 220 | orl	$X86_CR4_MCE, %edx | 
|---|
| 221 | #endif | 
|---|
| 222 | movq	%cr4, %rcx | 
|---|
| 223 | andl	%edx, %ecx | 
|---|
| 224 |  | 
|---|
| 225 | /* Even if ignored in long mode, set PSE uniformly on all logical CPUs. */ | 
|---|
| 226 | btsl	$X86_CR4_PSE_BIT, %ecx | 
|---|
| 227 | movq	%rcx, %cr4 | 
|---|
| 228 |  | 
|---|
| 229 | /* | 
|---|
| 230 | * Set CR4.PGE to re-enable global translations. | 
|---|
| 231 | */ | 
|---|
| 232 | btsl	$X86_CR4_PGE_BIT, %ecx | 
|---|
| 233 | movq	%rcx, %cr4 | 
|---|
| 234 |  | 
|---|
| 235 | #ifdef CONFIG_SMP | 
|---|
| 236 | /* | 
|---|
| 237 | * For parallel boot, the APIC ID is read from the APIC, and then | 
|---|
| 238 | * used to look up the CPU number.  For booting a single CPU, the | 
|---|
| 239 | * CPU number is encoded in smpboot_control. | 
|---|
| 240 | * | 
|---|
| 241 | * Bit 31	STARTUP_READ_APICID (Read APICID from APIC) | 
|---|
| 242 | * Bit 0-23	CPU# if STARTUP_xx flags are not set | 
|---|
| 243 | */ | 
|---|
| 244 | movl	smpboot_control(%rip), %ecx | 
|---|
| 245 | testl	$STARTUP_READ_APICID, %ecx | 
|---|
| 246 | jnz	.Lread_apicid | 
|---|
| 247 | /* | 
|---|
| 248 | * No control bit set, single CPU bringup. CPU number is provided | 
|---|
| 249 | * in bit 0-23. This is also the boot CPU case (CPU number 0). | 
|---|
| 250 | */ | 
|---|
| 251 | andl	$(~STARTUP_PARALLEL_MASK), %ecx | 
|---|
| 252 | jmp	.Lsetup_cpu | 
|---|
| 253 |  | 
|---|
| 254 | .Lread_apicid: | 
|---|
| 255 | /* Check whether X2APIC mode is already enabled */ | 
|---|
| 256 | mov	$MSR_IA32_APICBASE, %ecx | 
|---|
| 257 | rdmsr | 
|---|
| 258 | testl	$X2APIC_ENABLE, %eax | 
|---|
| 259 | jnz	.Lread_apicid_msr | 
|---|
| 260 |  | 
|---|
| 261 | #ifdef CONFIG_X86_X2APIC | 
|---|
| 262 | /* | 
|---|
| 263 | * If system is in X2APIC mode then MMIO base might not be | 
|---|
| 264 | * mapped causing the MMIO read below to fault. Faults can't | 
|---|
| 265 | * be handled at that point. | 
|---|
| 266 | */ | 
|---|
| 267 | cmpl	$0, x2apic_mode(%rip) | 
|---|
| 268 | jz	.Lread_apicid_mmio | 
|---|
| 269 |  | 
|---|
| 270 | /* Force the AP into X2APIC mode. */ | 
|---|
| 271 | orl	$X2APIC_ENABLE, %eax | 
|---|
| 272 | wrmsr | 
|---|
| 273 | jmp	.Lread_apicid_msr | 
|---|
| 274 | #endif | 
|---|
| 275 |  | 
|---|
| 276 | .Lread_apicid_mmio: | 
|---|
| 277 | /* Read the APIC ID from the fix-mapped MMIO space. */ | 
|---|
| 278 | movq	apic_mmio_base(%rip), %rcx | 
|---|
| 279 | addq	$APIC_ID, %rcx | 
|---|
| 280 | movl	(%rcx), %eax | 
|---|
| 281 | shr	$24, %eax | 
|---|
| 282 | jmp	.Llookup_AP | 
|---|
| 283 |  | 
|---|
| 284 | .Lread_apicid_msr: | 
|---|
| 285 | mov	$APIC_X2APIC_ID_MSR, %ecx | 
|---|
| 286 | rdmsr | 
|---|
| 287 |  | 
|---|
| 288 | .Llookup_AP: | 
|---|
| 289 | /* EAX contains the APIC ID of the current CPU */ | 
|---|
| 290 | xorl	%ecx, %ecx | 
|---|
| 291 | leaq	cpuid_to_apicid(%rip), %rbx | 
|---|
| 292 |  | 
|---|
| 293 | .Lfind_cpunr: | 
|---|
| 294 | cmpl	(%rbx,%rcx,4), %eax | 
|---|
| 295 | jz	.Lsetup_cpu | 
|---|
| 296 | inc	%ecx | 
|---|
| 297 | #ifdef CONFIG_FORCE_NR_CPUS | 
|---|
| 298 | cmpl	$NR_CPUS, %ecx | 
|---|
| 299 | #else | 
|---|
| 300 | cmpl	nr_cpu_ids(%rip), %ecx | 
|---|
| 301 | #endif | 
|---|
| 302 | jb	.Lfind_cpunr | 
|---|
| 303 |  | 
|---|
| 304 | /*  APIC ID not found in the table. Drop the trampoline lock and bail. */ | 
|---|
| 305 | movq	trampoline_lock(%rip), %rax | 
|---|
| 306 | movl	$0, (%rax) | 
|---|
| 307 |  | 
|---|
| 308 | 1:	cli | 
|---|
| 309 | hlt | 
|---|
| 310 | jmp	1b | 
|---|
| 311 |  | 
|---|
| 312 | .Lsetup_cpu: | 
|---|
| 313 | /* Get the per cpu offset for the given CPU# which is in ECX */ | 
|---|
| 314 | movq	__per_cpu_offset(,%rcx,8), %rdx | 
|---|
| 315 | #else | 
|---|
| 316 | xorl	%edx, %edx /* zero-extended to clear all of RDX */ | 
|---|
| 317 | #endif /* CONFIG_SMP */ | 
|---|
| 318 |  | 
|---|
| 319 | /* | 
|---|
| 320 | * Setup a boot time stack - Any secondary CPU will have lost its stack | 
|---|
| 321 | * by now because the cr3-switch above unmaps the real-mode stack. | 
|---|
| 322 | * | 
|---|
| 323 | * RDX contains the per-cpu offset | 
|---|
| 324 | */ | 
|---|
| 325 | movq	current_task(%rdx), %rax | 
|---|
| 326 | movq	TASK_threadsp(%rax), %rsp | 
|---|
| 327 |  | 
|---|
| 328 | /* | 
|---|
| 329 | * Now that this CPU is running on its own stack, drop the realmode | 
|---|
| 330 | * protection. For the boot CPU the pointer is NULL! | 
|---|
| 331 | */ | 
|---|
| 332 | movq	trampoline_lock(%rip), %rax | 
|---|
| 333 | testq	%rax, %rax | 
|---|
| 334 | jz	.Lsetup_gdt | 
|---|
| 335 | movl	$0, (%rax) | 
|---|
| 336 |  | 
|---|
| 337 | .Lsetup_gdt: | 
|---|
| 338 | /* | 
|---|
| 339 | * We must switch to a new descriptor in kernel space for the GDT | 
|---|
| 340 | * because soon the kernel won't have access anymore to the userspace | 
|---|
| 341 | * addresses where we're currently running on. We have to do that here | 
|---|
| 342 | * because in 32bit we couldn't load a 64bit linear address. | 
|---|
| 343 | */ | 
|---|
| 344 | subq	$16, %rsp | 
|---|
| 345 | movw	$(GDT_SIZE-1), (%rsp) | 
|---|
| 346 | leaq	gdt_page(%rdx), %rax | 
|---|
| 347 | movq	%rax, 2(%rsp) | 
|---|
| 348 | lgdt	(%rsp) | 
|---|
| 349 | addq	$16, %rsp | 
|---|
| 350 |  | 
|---|
| 351 | /* set up data segments */ | 
|---|
| 352 | xorl %eax,%eax | 
|---|
| 353 | movl %eax,%ds | 
|---|
| 354 | movl %eax,%ss | 
|---|
| 355 | movl %eax,%es | 
|---|
| 356 |  | 
|---|
| 357 | /* | 
|---|
| 358 | * We don't really need to load %fs or %gs, but load them anyway | 
|---|
| 359 | * to kill any stale realmode selectors.  This allows execution | 
|---|
| 360 | * under VT hardware. | 
|---|
| 361 | */ | 
|---|
| 362 | movl %eax,%fs | 
|---|
| 363 | movl %eax,%gs | 
|---|
| 364 |  | 
|---|
| 365 | /* | 
|---|
| 366 | * Set up GSBASE. | 
|---|
| 367 | * Note that, on SMP, the boot cpu uses init data section until | 
|---|
| 368 | * the per cpu areas are set up. | 
|---|
| 369 | */ | 
|---|
| 370 | movl	$MSR_GS_BASE,%ecx | 
|---|
| 371 | movl	%edx, %eax | 
|---|
| 372 | shrq	$32, %rdx | 
|---|
| 373 | wrmsr | 
|---|
| 374 |  | 
|---|
| 375 | /* Setup and Load IDT */ | 
|---|
| 376 | call	early_setup_idt | 
|---|
| 377 |  | 
|---|
| 378 | /* Check if nx is implemented */ | 
|---|
| 379 | movl	$0x80000001, %eax | 
|---|
| 380 | cpuid | 
|---|
| 381 | movl	%edx,%edi | 
|---|
| 382 |  | 
|---|
| 383 | /* Setup EFER (Extended Feature Enable Register) */ | 
|---|
| 384 | movl	$MSR_EFER, %ecx | 
|---|
| 385 | rdmsr | 
|---|
| 386 | /* | 
|---|
| 387 | * Preserve current value of EFER for comparison and to skip | 
|---|
| 388 | * EFER writes if no change was made (for TDX guest) | 
|---|
| 389 | */ | 
|---|
| 390 | movl    %eax, %edx | 
|---|
| 391 | btsl	$_EFER_SCE, %eax	/* Enable System Call */ | 
|---|
| 392 | btl	$20,%edi		/* No Execute supported? */ | 
|---|
| 393 | jnc     1f | 
|---|
| 394 | btsl	$_EFER_NX, %eax | 
|---|
| 395 | btsq	$_PAGE_BIT_NX,early_pmd_flags(%rip) | 
|---|
| 396 |  | 
|---|
| 397 | /* Avoid writing EFER if no change was made (for TDX guest) */ | 
|---|
| 398 | 1:	cmpl	%edx, %eax | 
|---|
| 399 | je	1f | 
|---|
| 400 | xor	%edx, %edx | 
|---|
| 401 | wrmsr				/* Make changes effective */ | 
|---|
| 402 | 1: | 
|---|
| 403 | /* Setup cr0 */ | 
|---|
| 404 | movl	$CR0_STATE, %eax | 
|---|
| 405 | /* Make changes effective */ | 
|---|
| 406 | movq	%rax, %cr0 | 
|---|
| 407 |  | 
|---|
| 408 | /* zero EFLAGS after setting rsp */ | 
|---|
| 409 | pushq $0 | 
|---|
| 410 | popfq | 
|---|
| 411 |  | 
|---|
| 412 | /* Pass the boot_params pointer as first argument */ | 
|---|
| 413 | movq	%r15, %rdi | 
|---|
| 414 |  | 
|---|
| 415 | .Ljump_to_C_code: | 
|---|
| 416 | xorl	%ebp, %ebp	# clear frame pointer | 
|---|
| 417 | ANNOTATE_RETPOLINE_SAFE | 
|---|
| 418 | callq	*initial_code(%rip) | 
|---|
| 419 | ud2 | 
|---|
| 420 | SYM_CODE_END(secondary_startup_64) | 
|---|
| 421 |  | 
|---|
| 422 | #include "verify_cpu.S" | 
|---|
| 423 | #include "sev_verify_cbit.S" | 
|---|
| 424 |  | 
|---|
| 425 | #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_AMD_MEM_ENCRYPT) | 
|---|
| 426 | /* | 
|---|
| 427 | * Entry point for soft restart of a CPU. Invoked from xxx_play_dead() for | 
|---|
| 428 | * restarting the boot CPU or for restarting SEV guest CPUs after CPU hot | 
|---|
| 429 | * unplug. Everything is set up already except the stack. | 
|---|
| 430 | */ | 
|---|
| 431 | SYM_CODE_START(soft_restart_cpu) | 
|---|
| 432 | ANNOTATE_NOENDBR | 
|---|
| 433 | UNWIND_HINT_END_OF_STACK | 
|---|
| 434 |  | 
|---|
| 435 | /* Find the idle task stack */ | 
|---|
| 436 | movq	PER_CPU_VAR(current_task), %rcx | 
|---|
| 437 | movq	TASK_threadsp(%rcx), %rsp | 
|---|
| 438 |  | 
|---|
| 439 | jmp	.Ljump_to_C_code | 
|---|
| 440 | SYM_CODE_END(soft_restart_cpu) | 
|---|
| 441 | #endif | 
|---|
| 442 |  | 
|---|
| 443 | #ifdef CONFIG_AMD_MEM_ENCRYPT | 
|---|
| 444 | /* | 
|---|
| 445 | * VC Exception handler used during early boot when running on kernel | 
|---|
| 446 | * addresses, but before the switch to the idt_table can be made. | 
|---|
| 447 | * The early_idt_handler_array can't be used here because it calls into a lot | 
|---|
| 448 | * of __init code and this handler is also used during CPU offlining/onlining. | 
|---|
| 449 | * Therefore this handler ends up in the .text section so that it stays around | 
|---|
| 450 | * when .init.text is freed. | 
|---|
| 451 | */ | 
|---|
| 452 | SYM_CODE_START_NOALIGN(vc_boot_ghcb) | 
|---|
| 453 | UNWIND_HINT_IRET_REGS offset=8 | 
|---|
| 454 | ENDBR | 
|---|
| 455 |  | 
|---|
| 456 | /* Build pt_regs */ | 
|---|
| 457 | PUSH_AND_CLEAR_REGS | 
|---|
| 458 |  | 
|---|
| 459 | /* Call C handler */ | 
|---|
| 460 | movq    %rsp, %rdi | 
|---|
| 461 | movq	ORIG_RAX(%rsp), %rsi | 
|---|
| 462 | movq	initial_vc_handler(%rip), %rax | 
|---|
| 463 | ANNOTATE_RETPOLINE_SAFE | 
|---|
| 464 | call	*%rax | 
|---|
| 465 |  | 
|---|
| 466 | /* Unwind pt_regs */ | 
|---|
| 467 | POP_REGS | 
|---|
| 468 |  | 
|---|
| 469 | /* Remove Error Code */ | 
|---|
| 470 | addq    $8, %rsp | 
|---|
| 471 |  | 
|---|
| 472 | iretq | 
|---|
| 473 | SYM_CODE_END(vc_boot_ghcb) | 
|---|
| 474 | #endif | 
|---|
| 475 |  | 
|---|
| 476 | /* Both SMP bootup and ACPI suspend change these variables */ | 
|---|
| 477 | __REFDATA | 
|---|
| 478 | .balign	8 | 
|---|
| 479 | SYM_DATA(initial_code,	.quad x86_64_start_kernel) | 
|---|
| 480 | #ifdef CONFIG_AMD_MEM_ENCRYPT | 
|---|
| 481 | SYM_DATA(initial_vc_handler,	.quad handle_vc_boot_ghcb) | 
|---|
| 482 | #endif | 
|---|
| 483 |  | 
|---|
| 484 | SYM_DATA(trampoline_lock, .quad 0); | 
|---|
| 485 | __FINITDATA | 
|---|
| 486 |  | 
|---|
| 487 | __INIT | 
|---|
| 488 | SYM_CODE_START(early_idt_handler_array) | 
|---|
| 489 | i = 0 | 
|---|
| 490 | .rept NUM_EXCEPTION_VECTORS | 
|---|
| 491 | .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 | 
|---|
| 492 | UNWIND_HINT_IRET_REGS | 
|---|
| 493 | ENDBR | 
|---|
| 494 | pushq $0	# Dummy error code, to make stack frame uniform | 
|---|
| 495 | .else | 
|---|
| 496 | UNWIND_HINT_IRET_REGS offset=8 | 
|---|
| 497 | ENDBR | 
|---|
| 498 | .endif | 
|---|
| 499 | pushq $i		# 72(%rsp) Vector number | 
|---|
| 500 | jmp early_idt_handler_common | 
|---|
| 501 | UNWIND_HINT_IRET_REGS | 
|---|
| 502 | i = i + 1 | 
|---|
| 503 | .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc | 
|---|
| 504 | .endr | 
|---|
| 505 | SYM_CODE_END(early_idt_handler_array) | 
|---|
| 506 | ANNOTATE_NOENDBR // early_idt_handler_array[NUM_EXCEPTION_VECTORS] | 
|---|
| 507 |  | 
|---|
| 508 | SYM_CODE_START_LOCAL(early_idt_handler_common) | 
|---|
| 509 | UNWIND_HINT_IRET_REGS offset=16 | 
|---|
| 510 | /* | 
|---|
| 511 | * The stack is the hardware frame, an error code or zero, and the | 
|---|
| 512 | * vector number. | 
|---|
| 513 | */ | 
|---|
| 514 | cld | 
|---|
| 515 |  | 
|---|
| 516 | incl early_recursion_flag(%rip) | 
|---|
| 517 |  | 
|---|
| 518 | /* The vector number is currently in the pt_regs->di slot. */ | 
|---|
| 519 | pushq %rsi				/* pt_regs->si */ | 
|---|
| 520 | movq 8(%rsp), %rsi			/* RSI = vector number */ | 
|---|
| 521 | movq %rdi, 8(%rsp)			/* pt_regs->di = RDI */ | 
|---|
| 522 | pushq %rdx				/* pt_regs->dx */ | 
|---|
| 523 | pushq %rcx				/* pt_regs->cx */ | 
|---|
| 524 | pushq %rax				/* pt_regs->ax */ | 
|---|
| 525 | pushq %r8				/* pt_regs->r8 */ | 
|---|
| 526 | pushq %r9				/* pt_regs->r9 */ | 
|---|
| 527 | pushq %r10				/* pt_regs->r10 */ | 
|---|
| 528 | pushq %r11				/* pt_regs->r11 */ | 
|---|
| 529 | pushq %rbx				/* pt_regs->bx */ | 
|---|
| 530 | pushq %rbp				/* pt_regs->bp */ | 
|---|
| 531 | pushq %r12				/* pt_regs->r12 */ | 
|---|
| 532 | pushq %r13				/* pt_regs->r13 */ | 
|---|
| 533 | pushq %r14				/* pt_regs->r14 */ | 
|---|
| 534 | pushq %r15				/* pt_regs->r15 */ | 
|---|
| 535 | UNWIND_HINT_REGS | 
|---|
| 536 |  | 
|---|
| 537 | movq %rsp,%rdi		/* RDI = pt_regs; RSI is already trapnr */ | 
|---|
| 538 | call do_early_exception | 
|---|
| 539 |  | 
|---|
| 540 | decl early_recursion_flag(%rip) | 
|---|
| 541 | jmp restore_regs_and_return_to_kernel | 
|---|
| 542 | SYM_CODE_END(early_idt_handler_common) | 
|---|
| 543 |  | 
|---|
| 544 | #ifdef CONFIG_AMD_MEM_ENCRYPT | 
|---|
| 545 | /* | 
|---|
| 546 | * VC Exception handler used during very early boot. The | 
|---|
| 547 | * early_idt_handler_array can't be used because it returns via the | 
|---|
| 548 | * paravirtualized INTERRUPT_RETURN and pv-ops don't work that early. | 
|---|
| 549 | * | 
|---|
| 550 | * XXX it does, fix this. | 
|---|
| 551 | * | 
|---|
| 552 | * This handler will end up in the .init.text section and not be | 
|---|
| 553 | * available to boot secondary CPUs. | 
|---|
| 554 | */ | 
|---|
| 555 | SYM_CODE_START_NOALIGN(vc_no_ghcb) | 
|---|
| 556 | UNWIND_HINT_IRET_REGS offset=8 | 
|---|
| 557 | ENDBR | 
|---|
| 558 |  | 
|---|
| 559 | /* Build pt_regs */ | 
|---|
| 560 | PUSH_AND_CLEAR_REGS | 
|---|
| 561 |  | 
|---|
| 562 | /* Call C handler */ | 
|---|
| 563 | movq    %rsp, %rdi | 
|---|
| 564 | movq	ORIG_RAX(%rsp), %rsi | 
|---|
| 565 | call    __pi_do_vc_no_ghcb | 
|---|
| 566 |  | 
|---|
| 567 | /* Unwind pt_regs */ | 
|---|
| 568 | POP_REGS | 
|---|
| 569 |  | 
|---|
| 570 | /* Remove Error Code */ | 
|---|
| 571 | addq    $8, %rsp | 
|---|
| 572 |  | 
|---|
| 573 | /* Pure iret required here - don't use INTERRUPT_RETURN */ | 
|---|
| 574 | iretq | 
|---|
| 575 | SYM_CODE_END(vc_no_ghcb) | 
|---|
| 576 | SYM_PIC_ALIAS(vc_no_ghcb); | 
|---|
| 577 | #endif | 
|---|
| 578 |  | 
|---|
| 579 | #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION | 
|---|
| 580 | /* | 
|---|
| 581 | * Each PGD needs to be 8k long and 8k aligned.  We do not | 
|---|
| 582 | * ever go out to userspace with these, so we do not | 
|---|
| 583 | * strictly *need* the second page, but this allows us to | 
|---|
| 584 | * have a single set_pgd() implementation that does not | 
|---|
| 585 | * need to worry about whether it has 4k or 8k to work | 
|---|
| 586 | * with. | 
|---|
| 587 | * | 
|---|
| 588 | * This ensures PGDs are 8k long: | 
|---|
| 589 | */ | 
|---|
| 590 | #define PTI_USER_PGD_FILL	512 | 
|---|
| 591 | /* This ensures they are 8k-aligned: */ | 
|---|
| 592 | #define SYM_DATA_START_PTI_ALIGNED(name) \ | 
|---|
| 593 | SYM_START(name, SYM_L_GLOBAL, .balign 2 * PAGE_SIZE) | 
|---|
| 594 | #else | 
|---|
| 595 | #define SYM_DATA_START_PTI_ALIGNED(name) \ | 
|---|
| 596 | SYM_DATA_START_PAGE_ALIGNED(name) | 
|---|
| 597 | #define PTI_USER_PGD_FILL	0 | 
|---|
| 598 | #endif | 
|---|
| 599 |  | 
|---|
| 600 | __INITDATA | 
|---|
| 601 | .balign 4 | 
|---|
| 602 |  | 
|---|
| 603 | SYM_DATA_START_PTI_ALIGNED(early_top_pgt) | 
|---|
| 604 | .fill	511,8,0 | 
|---|
| 605 | .quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC | 
|---|
| 606 | .fill	PTI_USER_PGD_FILL,8,0 | 
|---|
| 607 | SYM_DATA_END(early_top_pgt) | 
|---|
| 608 | SYM_PIC_ALIAS(early_top_pgt) | 
|---|
| 609 |  | 
|---|
| 610 | SYM_DATA_START_PAGE_ALIGNED(early_dynamic_pgts) | 
|---|
| 611 | .fill	512*EARLY_DYNAMIC_PAGE_TABLES,8,0 | 
|---|
| 612 | SYM_DATA_END(early_dynamic_pgts) | 
|---|
| 613 | SYM_PIC_ALIAS(early_dynamic_pgts); | 
|---|
| 614 |  | 
|---|
| 615 | SYM_DATA(early_recursion_flag, .long 0) | 
|---|
| 616 |  | 
|---|
| 617 | .data | 
|---|
| 618 |  | 
|---|
| 619 | #if defined(CONFIG_XEN_PV) || defined(CONFIG_PVH) | 
|---|
| 620 | SYM_DATA_START_PTI_ALIGNED(init_top_pgt) | 
|---|
| 621 | .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC | 
|---|
| 622 | .org    init_top_pgt + L4_PAGE_OFFSET*8, 0 | 
|---|
| 623 | .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC | 
|---|
| 624 | .org    init_top_pgt + L4_START_KERNEL*8, 0 | 
|---|
| 625 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | 
|---|
| 626 | .quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC | 
|---|
| 627 | .fill	PTI_USER_PGD_FILL,8,0 | 
|---|
| 628 | SYM_DATA_END(init_top_pgt) | 
|---|
| 629 |  | 
|---|
| 630 | SYM_DATA_START_PAGE_ALIGNED(level3_ident_pgt) | 
|---|
| 631 | .quad	level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC | 
|---|
| 632 | .fill	511, 8, 0 | 
|---|
| 633 | SYM_DATA_END(level3_ident_pgt) | 
|---|
| 634 | SYM_DATA_START_PAGE_ALIGNED(level2_ident_pgt) | 
|---|
| 635 | /* | 
|---|
| 636 | * Since I easily can, map the first 1G. | 
|---|
| 637 | * Don't set NX because code runs from these pages. | 
|---|
| 638 | * | 
|---|
| 639 | * Note: This sets _PAGE_GLOBAL despite whether | 
|---|
| 640 | * the CPU supports it or it is enabled.  But, | 
|---|
| 641 | * the CPU should ignore the bit. | 
|---|
| 642 | */ | 
|---|
| 643 | PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) | 
|---|
| 644 | SYM_DATA_END(level2_ident_pgt) | 
|---|
| 645 | #else | 
|---|
| 646 | SYM_DATA_START_PTI_ALIGNED(init_top_pgt) | 
|---|
| 647 | .fill	512,8,0 | 
|---|
| 648 | .fill	PTI_USER_PGD_FILL,8,0 | 
|---|
| 649 | SYM_DATA_END(init_top_pgt) | 
|---|
| 650 | #endif | 
|---|
| 651 |  | 
|---|
| 652 | SYM_DATA_START_PAGE_ALIGNED(level4_kernel_pgt) | 
|---|
| 653 | .fill	511,8,0 | 
|---|
| 654 | .quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC | 
|---|
| 655 | SYM_DATA_END(level4_kernel_pgt) | 
|---|
| 656 | SYM_PIC_ALIAS(level4_kernel_pgt) | 
|---|
| 657 |  | 
|---|
| 658 | SYM_DATA_START_PAGE_ALIGNED(level3_kernel_pgt) | 
|---|
| 659 | .fill	L3_START_KERNEL,8,0 | 
|---|
| 660 | /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ | 
|---|
| 661 | .quad	level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC | 
|---|
| 662 | .quad	level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC | 
|---|
| 663 | SYM_DATA_END(level3_kernel_pgt) | 
|---|
| 664 | SYM_PIC_ALIAS(level3_kernel_pgt) | 
|---|
| 665 |  | 
|---|
| 666 | SYM_DATA_START_PAGE_ALIGNED(level2_kernel_pgt) | 
|---|
| 667 | /* | 
|---|
| 668 | * Kernel high mapping. | 
|---|
| 669 | * | 
|---|
| 670 | * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in | 
|---|
| 671 | * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled, | 
|---|
| 672 | * 512 MiB otherwise. | 
|---|
| 673 | * | 
|---|
| 674 | * (NOTE: after that starts the module area, see MODULES_VADDR.) | 
|---|
| 675 | * | 
|---|
| 676 | * This table is eventually used by the kernel during normal runtime. | 
|---|
| 677 | * Care must be taken to clear out undesired bits later, like _PAGE_RW | 
|---|
| 678 | * or _PAGE_GLOBAL in some cases. | 
|---|
| 679 | */ | 
|---|
| 680 | PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE/PMD_SIZE) | 
|---|
| 681 | SYM_DATA_END(level2_kernel_pgt) | 
|---|
| 682 | SYM_PIC_ALIAS(level2_kernel_pgt) | 
|---|
| 683 |  | 
|---|
| 684 | SYM_DATA_START_PAGE_ALIGNED(level2_fixmap_pgt) | 
|---|
| 685 | .fill	(512 - 4 - FIXMAP_PMD_NUM),8,0 | 
|---|
| 686 | pgtno = 0 | 
|---|
| 687 | .rept (FIXMAP_PMD_NUM) | 
|---|
| 688 | .quad level1_fixmap_pgt + (pgtno << PAGE_SHIFT) - __START_KERNEL_map \ | 
|---|
| 689 | + _PAGE_TABLE_NOENC; | 
|---|
| 690 | pgtno = pgtno + 1 | 
|---|
| 691 | .endr | 
|---|
| 692 | /* 6 MB reserved space + a 2MB hole */ | 
|---|
| 693 | .fill	4,8,0 | 
|---|
| 694 | SYM_DATA_END(level2_fixmap_pgt) | 
|---|
| 695 | SYM_PIC_ALIAS(level2_fixmap_pgt) | 
|---|
| 696 |  | 
|---|
| 697 | SYM_DATA_START_PAGE_ALIGNED(level1_fixmap_pgt) | 
|---|
| 698 | .rept (FIXMAP_PMD_NUM) | 
|---|
| 699 | .fill	512,8,0 | 
|---|
| 700 | .endr | 
|---|
| 701 | SYM_DATA_END(level1_fixmap_pgt) | 
|---|
| 702 |  | 
|---|
| 703 | .data | 
|---|
| 704 | .align 16 | 
|---|
| 705 |  | 
|---|
| 706 | SYM_DATA(smpboot_control,		.long 0) | 
|---|
| 707 |  | 
|---|
| 708 | .align 16 | 
|---|
| 709 | /* This must match the first entry in level2_kernel_pgt */ | 
|---|
| 710 | SYM_DATA(phys_base, .quad 0x0) | 
|---|
| 711 | SYM_PIC_ALIAS(phys_base); | 
|---|
| 712 | EXPORT_SYMBOL(phys_base) | 
|---|
| 713 |  | 
|---|
| 714 | #include "../xen/xen-head.S" | 
|---|
| 715 |  | 
|---|
| 716 | __PAGE_ALIGNED_BSS | 
|---|
| 717 | SYM_DATA_START_PAGE_ALIGNED(empty_zero_page) | 
|---|
| 718 | .skip PAGE_SIZE | 
|---|
| 719 | SYM_DATA_END(empty_zero_page) | 
|---|
| 720 | EXPORT_SYMBOL(empty_zero_page) | 
|---|
| 721 |  | 
|---|
| 722 |  | 
|---|