1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * relocate_kernel.S - put the kernel image in place to boot
4 * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
5 */
6
7#include <linux/linkage.h>
8#include <linux/stringify.h>
9#include <asm/alternative.h>
10#include <asm/page_types.h>
11#include <asm/kexec.h>
12#include <asm/processor-flags.h>
13#include <asm/pgtable_types.h>
14#include <asm/nospec-branch.h>
15#include <asm/unwind_hints.h>
16#include <asm/asm-offsets.h>
17
18/*
19 * Must be relocatable PIC code callable as a C function, in particular
20 * there must be a plain RET and not jump to return thunk.
21 */
22
23#define PTR(x) (x << 3)
24#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
25
26/*
27 * The .text..relocate_kernel and .data..relocate_kernel sections are copied
28 * into the control page, and the remainder of the page is used as the stack.
29 */
30
31 .section .data..relocate_kernel,"a";
32/* Minimal CPU state */
33SYM_DATA_LOCAL(saved_rsp, .quad 0)
34SYM_DATA_LOCAL(saved_cr0, .quad 0)
35SYM_DATA_LOCAL(saved_cr3, .quad 0)
36SYM_DATA_LOCAL(saved_cr4, .quad 0)
37 /* other data */
38SYM_DATA(kexec_va_control_page, .quad 0)
39SYM_DATA(kexec_pa_table_page, .quad 0)
40SYM_DATA(kexec_pa_swap_page, .quad 0)
41SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)
42SYM_DATA(kexec_debug_8250_mmio32, .quad 0)
43SYM_DATA(kexec_debug_8250_port, .word 0)
44
45 .balign 16
46SYM_DATA_START_LOCAL(kexec_debug_gdt)
47 .word kexec_debug_gdt_end - kexec_debug_gdt - 1
48 .long 0
49 .word 0
50 .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
51 .quad 0x00af9a000000ffff /* __KERNEL_CS */
52 .quad 0x00cf92000000ffff /* __KERNEL_DS */
53SYM_DATA_END_LABEL(kexec_debug_gdt, SYM_L_LOCAL, kexec_debug_gdt_end)
54
55 .balign 8
56SYM_DATA_START(kexec_debug_idt)
57 .skip 0x100, 0x00
58SYM_DATA_END(kexec_debug_idt)
59
60 .section .text..relocate_kernel,"ax";
61 .code64
62SYM_CODE_START_NOALIGN(relocate_kernel)
63 UNWIND_HINT_END_OF_STACK
64 ANNOTATE_NOENDBR
65 /*
66 * %rdi indirection_page
67 * %rsi pa_control_page
68 * %rdx start address
69 * %rcx flags: RELOC_KERNEL_*
70 */
71
72 /* Save the CPU context, used for jumping back */
73 pushq %rbx
74 pushq %rbp
75 pushq %r12
76 pushq %r13
77 pushq %r14
78 pushq %r15
79 pushf
80
81 /* Invalidate GDT/IDT, zero out flags */
82 pushq $0
83 pushq $0
84
85 lidt (%rsp)
86 lgdt (%rsp)
87 addq $8, %rsp
88 popfq
89
90 /* Switch to the identity mapped page tables */
91 movq %cr3, %rax
92 movq kexec_pa_table_page(%rip), %r9
93 movq %r9, %cr3
94
95 /* Leave CR4 in %r13 to enable the right paging mode later. */
96 movq %cr4, %r13
97
98 /* Disable global pages immediately to ensure this mapping is RWX */
99 movq %r13, %r12
100 andq $~(X86_CR4_PGE), %r12
101 movq %r12, %cr4
102
103 /* Save %rsp and CRs. */
104 movq %r13, saved_cr4(%rip)
105 movq %rsp, saved_rsp(%rip)
106 movq %rax, saved_cr3(%rip)
107 movq %cr0, %rax
108 movq %rax, saved_cr0(%rip)
109
110 /* save indirection list for jumping back */
111 movq %rdi, pa_backup_pages_map(%rip)
112
113 /* Save the flags to %r11 as swap_pages clobbers %rcx. */
114 movq %rcx, %r11
115
116 /* setup a new stack at the end of the physical control page */
117 lea PAGE_SIZE(%rsi), %rsp
118
119 /* jump to identity mapped page */
1200: addq $identity_mapped - 0b, %rsi
121 subq $__relocate_kernel_start - 0b, %rsi
122 ANNOTATE_RETPOLINE_SAFE
123 jmp *%rsi
124SYM_CODE_END(relocate_kernel)
125
126SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
127 UNWIND_HINT_END_OF_STACK
128 /*
129 * %rdi indirection page
130 * %rdx start address
131 * %r9 page table page
132 * %r11 flags: RELOC_KERNEL_*
133 * %r13 original CR4 when relocate_kernel() was invoked
134 */
135
136 /* store the start address on the stack */
137 pushq %rdx
138
139 /* Create a GDTR (16 bits limit, 64 bits addr) on stack */
140 leaq kexec_debug_gdt(%rip), %rax
141 pushq %rax
142 pushw (%rax)
143
144 /* Load the GDT, put the stack back */
145 lgdt (%rsp)
146 addq $10, %rsp
147
148 /* Test that we can load segments */
149 movq %ds, %rax
150 movq %rax, %ds
151
152 /* Now an IDTR on the stack to load the IDT the kernel created */
153 leaq kexec_debug_idt(%rip), %rsi
154 pushq %rsi
155 pushw $0xff
156 lidt (%rsp)
157 addq $10, %rsp
158
159 //int3
160
161 /*
162 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
163 * below.
164 */
165 movq %cr4, %rax
166 andq $~(X86_CR4_CET), %rax
167 movq %rax, %cr4
168
169 /*
170 * Set cr0 to a known state:
171 * - Paging enabled
172 * - Alignment check disabled
173 * - Write protect disabled
174 * - No task switch
175 * - Don't do FP software emulation.
176 * - Protected mode enabled
177 */
178 movq %cr0, %rax
179 andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax
180 orl $(X86_CR0_PG | X86_CR0_PE), %eax
181 movq %rax, %cr0
182
183 /*
184 * Set cr4 to a known state:
185 * - physical address extension enabled
186 * - 5-level paging, if it was enabled before
187 * - Machine check exception on TDX guest, if it was enabled before.
188 * Clearing MCE might not be allowed in TDX guests, depending on setup.
189 *
190 * Use R13 that contains the original CR4 value, read in relocate_kernel().
191 * PAE is always set in the original CR4.
192 */
193 andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d
194 ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST
195 movq %r13, %cr4
196
197 /* Flush the TLB (needed?) */
198 movq %r9, %cr3
199
200 /*
201 * If the memory cache is in incoherent state, e.g., due to
202 * memory encryption, do WBINVD to flush cache.
203 *
204 * If SME is active, there could be old encrypted cache line
205 * entries that will conflict with the now unencrypted memory
206 * used by kexec. Flush the caches before copying the kernel.
207 *
208 * Note SME sets this flag to true when the platform supports
209 * SME, so the WBINVD is performed even SME is not activated
210 * by the kernel. But this has no harm.
211 */
212 testb $RELOC_KERNEL_CACHE_INCOHERENT, %r11b
213 jz .Lnowbinvd
214 wbinvd
215.Lnowbinvd:
216
217 call swap_pages
218
219 /*
220 * To be certain of avoiding problems with self-modifying code
221 * I need to execute a serializing instruction here.
222 * So I flush the TLB by reloading %cr3 here, it's handy,
223 * and not processor dependent.
224 */
225 movq %cr3, %rax
226 movq %rax, %cr3
227
228 testb $RELOC_KERNEL_PRESERVE_CONTEXT, %r11b
229 jnz .Lrelocate
230
231 /*
232 * set all of the registers to known values
233 * leave %rsp alone
234 */
235
236 xorl %eax, %eax
237 xorl %ebx, %ebx
238 xorl %ecx, %ecx
239 xorl %edx, %edx
240 xorl %esi, %esi
241 xorl %edi, %edi
242 xorl %ebp, %ebp
243 xorl %r8d, %r8d
244 xorl %r9d, %r9d
245 xorl %r10d, %r10d
246 xorl %r11d, %r11d
247 xorl %r12d, %r12d
248 xorl %r13d, %r13d
249 xorl %r14d, %r14d
250 xorl %r15d, %r15d
251
252 ANNOTATE_UNRET_SAFE
253 ret
254 int3
255
256.Lrelocate:
257 popq %rdx
258
259 /* Use the swap page for the callee's stack */
260 movq kexec_pa_swap_page(%rip), %r10
261 leaq PAGE_SIZE(%r10), %rsp
262
263 /* push the existing entry point onto the callee's stack */
264 pushq %rdx
265
266 ANNOTATE_RETPOLINE_SAFE
267 call *%rdx
268
269 /* get the re-entry point of the peer system */
270 popq %rbp
271 movq kexec_pa_swap_page(%rip), %r10
272 movq pa_backup_pages_map(%rip), %rdi
273 movq kexec_pa_table_page(%rip), %rax
274 movq %rax, %cr3
275
276 /* Find start (and end) of this physical mapping of control page */
277 leaq (%rip), %r8
278 ANNOTATE_NOENDBR
279 andq $PAGE_MASK, %r8
280 lea PAGE_SIZE(%r8), %rsp
281 /*
282 * Ensure RELOC_KERNEL_PRESERVE_CONTEXT flag is set so that
283 * swap_pages() can swap pages correctly. Note all other
284 * RELOC_KERNEL_* flags passed to relocate_kernel() are not
285 * restored.
286 */
287 movl $RELOC_KERNEL_PRESERVE_CONTEXT, %r11d
288 call swap_pages
289 movq kexec_va_control_page(%rip), %rax
2900: addq $virtual_mapped - 0b, %rax
291 subq $__relocate_kernel_start - 0b, %rax
292 pushq %rax
293 ANNOTATE_UNRET_SAFE
294 ret
295 int3
296SYM_CODE_END(identity_mapped)
297
298SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
299 UNWIND_HINT_END_OF_STACK
300 ANNOTATE_NOENDBR // RET target, above
301 movq saved_rsp(%rip), %rsp
302 movq saved_cr4(%rip), %rax
303 movq %rax, %cr4
304 movq saved_cr3(%rip), %rax
305 movq saved_cr0(%rip), %r8
306 movq %rax, %cr3
307 movq %r8, %cr0
308
309#ifdef CONFIG_KEXEC_JUMP
310 /* Saved in save_processor_state. */
311 movq $saved_context, %rax
312 lgdt saved_context_gdt_desc(%rax)
313#endif
314
315 /* relocate_kernel() returns the re-entry point for next time */
316 movq %rbp, %rax
317
318 popf
319 popq %r15
320 popq %r14
321 popq %r13
322 popq %r12
323 popq %rbp
324 popq %rbx
325 ANNOTATE_UNRET_SAFE
326 ret
327 int3
328SYM_CODE_END(virtual_mapped)
329
330 /* Do the copies */
331SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
332 UNWIND_HINT_END_OF_STACK
333 /*
334 * %rdi indirection page
335 * %r11 flags: RELOC_KERNEL_*
336 */
337 movq %rdi, %rcx /* Put the indirection_page in %rcx */
338 xorl %edi, %edi
339 xorl %esi, %esi
340 jmp .Lstart /* Should start with an indirection record */
341
342.Lloop: /* top, read another word for the indirection page */
343
344 movq (%rbx), %rcx
345 addq $8, %rbx
346.Lstart:
347 testb $0x1, %cl /* is it a destination page? */
348 jz .Lnotdest
349 movq %rcx, %rdi
350 andq $0xfffffffffffff000, %rdi
351 jmp .Lloop
352.Lnotdest:
353 testb $0x2, %cl /* is it an indirection page? */
354 jz .Lnotind
355 movq %rcx, %rbx
356 andq $0xfffffffffffff000, %rbx
357 jmp .Lloop
358.Lnotind:
359 testb $0x4, %cl /* is it the done indicator? */
360 jz .Lnotdone
361 jmp .Ldone
362.Lnotdone:
363 testb $0x8, %cl /* is it the source indicator? */
364 jz .Lloop /* Ignore it otherwise */
365 movq %rcx, %rsi /* For ever source page do a copy */
366 andq $0xfffffffffffff000, %rsi
367
368 movq %rdi, %rdx /* Save destination page to %rdx */
369 movq %rsi, %rax /* Save source page to %rax */
370
371 /* Only actually swap for ::preserve_context */
372 testb $RELOC_KERNEL_PRESERVE_CONTEXT, %r11b
373 jz .Lnoswap
374
375 /* copy source page to swap page */
376 movq kexec_pa_swap_page(%rip), %rdi
377 movl $512, %ecx
378 rep movsq
379
380 /* copy destination page to source page */
381 movq %rax, %rdi
382 movq %rdx, %rsi
383 movl $512, %ecx
384 rep movsq
385
386 /* copy swap page to destination page */
387 movq %rdx, %rdi
388 movq kexec_pa_swap_page(%rip), %rsi
389.Lnoswap:
390 movl $512, %ecx
391 rep movsq
392
393 lea PAGE_SIZE(%rax), %rsi
394 jmp .Lloop
395.Ldone:
396 ANNOTATE_UNRET_SAFE
397 ret
398 int3
399SYM_CODE_END(swap_pages)
400
401/*
402 * Generic 'print character' routine
403 * - %al: Character to be printed (may clobber %rax)
404 * - %rdx: MMIO address or port.
405 */
406#define XMTRDY 0x20
407
408#define TXR 0 /* Transmit register (WRITE) */
409#define LSR 5 /* Line Status */
410
411SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250)
412 UNWIND_HINT_FUNC
413 ANNOTATE_NOENDBR
414 addw $LSR, %dx
415 xchg %al, %ah
416.Lxmtrdy_loop:
417 inb %dx, %al
418 testb $XMTRDY, %al
419 jnz .Lready
420 pause
421 jmp .Lxmtrdy_loop
422
423.Lready:
424 subw $LSR, %dx
425 xchg %al, %ah
426 outb %al, %dx
427pr_char_null:
428 ANNOTATE_NOENDBR
429
430 ANNOTATE_UNRET_SAFE
431 ret
432SYM_CODE_END(pr_char_8250)
433
434SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250_mmio32)
435 UNWIND_HINT_FUNC
436 ANNOTATE_NOENDBR
437.Lxmtrdy_loop_mmio:
438 movb (LSR*4)(%rdx), %ah
439 testb $XMTRDY, %ah
440 jnz .Lready_mmio
441 pause
442 jmp .Lxmtrdy_loop_mmio
443
444.Lready_mmio:
445 movb %al, (%rdx)
446 ANNOTATE_UNRET_SAFE
447 ret
448SYM_CODE_END(pr_char_8250_mmio32)
449
450/*
451 * Load pr_char function pointer into %rsi and load %rdx with whatever
452 * that function wants to see there (typically port/MMIO address).
453 */
454.macro pr_setup
455 leaq pr_char_8250(%rip), %rsi
456 movw kexec_debug_8250_port(%rip), %dx
457 testw %dx, %dx
458 jnz 1f
459
460 leaq pr_char_8250_mmio32(%rip), %rsi
461 movq kexec_debug_8250_mmio32(%rip), %rdx
462 testq %rdx, %rdx
463 jnz 1f
464
465 leaq pr_char_null(%rip), %rsi
4661:
467.endm
468
469/* Print the nybble in %bl, clobber %rax */
470SYM_CODE_START_LOCAL_NOALIGN(pr_nybble)
471 UNWIND_HINT_FUNC
472 movb %bl, %al
473 nop
474 andb $0x0f, %al
475 addb $0x30, %al
476 cmpb $0x3a, %al
477 jb 1f
478 addb $('a' - '0' - 10), %al
479 ANNOTATE_RETPOLINE_SAFE
4801: jmp *%rsi
481SYM_CODE_END(pr_nybble)
482
483SYM_CODE_START_LOCAL_NOALIGN(pr_qword)
484 UNWIND_HINT_FUNC
485 movq $16, %rcx
4861: rolq $4, %rbx
487 call pr_nybble
488 loop 1b
489 movb $'\n', %al
490 ANNOTATE_RETPOLINE_SAFE
491 jmp *%rsi
492SYM_CODE_END(pr_qword)
493
494.macro print_reg a, b, c, d, r
495 movb $\a, %al
496 ANNOTATE_RETPOLINE_SAFE
497 call *%rsi
498 movb $\b, %al
499 ANNOTATE_RETPOLINE_SAFE
500 call *%rsi
501 movb $\c, %al
502 ANNOTATE_RETPOLINE_SAFE
503 call *%rsi
504 movb $\d, %al
505 ANNOTATE_RETPOLINE_SAFE
506 call *%rsi
507 movq \r, %rbx
508 call pr_qword
509.endm
510
511SYM_CODE_START_NOALIGN(kexec_debug_exc_vectors)
512 /* Each of these is 6 bytes. */
513.macro vec_err exc
514 UNWIND_HINT_ENTRY
515 . = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE)
516 nop
517 nop
518 pushq $\exc
519 jmp exc_handler
520.endm
521
522.macro vec_noerr exc
523 UNWIND_HINT_ENTRY
524 . = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE)
525 pushq $0
526 pushq $\exc
527 jmp exc_handler
528.endm
529
530 ANNOTATE_NOENDBR
531 vec_noerr 0 // #DE
532 vec_noerr 1 // #DB
533 vec_noerr 2 // #NMI
534 vec_noerr 3 // #BP
535 vec_noerr 4 // #OF
536 vec_noerr 5 // #BR
537 vec_noerr 6 // #UD
538 vec_noerr 7 // #NM
539 vec_err 8 // #DF
540 vec_noerr 9
541 vec_err 10 // #TS
542 vec_err 11 // #NP
543 vec_err 12 // #SS
544 vec_err 13 // #GP
545 vec_err 14 // #PF
546 vec_noerr 15
547SYM_CODE_END(kexec_debug_exc_vectors)
548
549SYM_CODE_START_LOCAL_NOALIGN(exc_handler)
550 /* No need for RET mitigations during kexec */
551 VALIDATE_UNRET_END
552
553 pushq %rax
554 pushq %rbx
555 pushq %rcx
556 pushq %rdx
557 pushq %rsi
558
559 /* Stack frame */
560#define EXC_SS 0x58 /* Architectural... */
561#define EXC_RSP 0x50
562#define EXC_EFLAGS 0x48
563#define EXC_CS 0x40
564#define EXC_RIP 0x38
565#define EXC_ERRORCODE 0x30 /* Either architectural or zero pushed by handler */
566#define EXC_EXCEPTION 0x28 /* Pushed by handler entry point */
567#define EXC_RAX 0x20 /* Pushed just above in exc_handler */
568#define EXC_RBX 0x18
569#define EXC_RCX 0x10
570#define EXC_RDX 0x08
571#define EXC_RSI 0x00
572
573 /* Set up %rdx/%rsi for debug output */
574 pr_setup
575
576 /* rip and exception info */
577 print_reg 'E', 'x', 'c', ':', EXC_EXCEPTION(%rsp)
578 print_reg 'E', 'r', 'r', ':', EXC_ERRORCODE(%rsp)
579 print_reg 'r', 'i', 'p', ':', EXC_RIP(%rsp)
580 print_reg 'r', 's', 'p', ':', EXC_RSP(%rsp)
581
582 /* We spilled these to the stack */
583 print_reg 'r', 'a', 'x', ':', EXC_RAX(%rsp)
584 print_reg 'r', 'b', 'x', ':', EXC_RBX(%rsp)
585 print_reg 'r', 'c', 'x', ':', EXC_RCX(%rsp)
586 print_reg 'r', 'd', 'x', ':', EXC_RDX(%rsp)
587 print_reg 'r', 's', 'i', ':', EXC_RSI(%rsp)
588
589 /* Other registers untouched */
590 print_reg 'r', 'd', 'i', ':', %rdi
591 print_reg 'r', '8', ' ', ':', %r8
592 print_reg 'r', '9', ' ', ':', %r9
593 print_reg 'r', '1', '0', ':', %r10
594 print_reg 'r', '1', '1', ':', %r11
595 print_reg 'r', '1', '2', ':', %r12
596 print_reg 'r', '1', '3', ':', %r13
597 print_reg 'r', '1', '4', ':', %r14
598 print_reg 'r', '1', '5', ':', %r15
599 print_reg 'c', 'r', '2', ':', %cr2
600
601 /* Only return from INT3 */
602 cmpq $3, EXC_EXCEPTION(%rsp)
603 jne .Ldie
604
605 popq %rsi
606 popq %rdx
607 popq %rcx
608 popq %rbx
609 popq %rax
610
611 addq $16, %rsp
612 iretq
613
614.Ldie:
615 hlt
616 jmp .Ldie
617
618SYM_CODE_END(exc_handler)
619