1/* SPDX-License-Identifier: GPL-2.0-only */
2#include <linux/export.h>
3#include <linux/linkage.h>
4#include <linux/cfi_types.h>
5#include <linux/objtool.h>
6#include <asm/asm.h>
7
8/*
9 * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
10 * recommended to use this when possible and we do use them by default.
11 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
12 * Otherwise, use original.
13 */
14
15/*
16 * Zero a page.
17 * %rdi - page
18 */
19SYM_TYPED_FUNC_START(clear_page_rep)
20 movl $4096/8,%ecx
21 xorl %eax,%eax
22 rep stosq
23 RET
24SYM_FUNC_END(clear_page_rep)
25EXPORT_SYMBOL_GPL(clear_page_rep)
26
27SYM_TYPED_FUNC_START(clear_page_orig)
28 xorl %eax,%eax
29 movl $4096/64,%ecx
30 .p2align 4
31.Lloop:
32 decl %ecx
33#define PUT(x) movq %rax,x*8(%rdi)
34 movq %rax,(%rdi)
35 PUT(1)
36 PUT(2)
37 PUT(3)
38 PUT(4)
39 PUT(5)
40 PUT(6)
41 PUT(7)
42 leaq 64(%rdi),%rdi
43 jnz .Lloop
44 nop
45 RET
46SYM_FUNC_END(clear_page_orig)
47EXPORT_SYMBOL_GPL(clear_page_orig)
48
49SYM_TYPED_FUNC_START(clear_page_erms)
50 movl $4096,%ecx
51 xorl %eax,%eax
52 rep stosb
53 RET
54SYM_FUNC_END(clear_page_erms)
55EXPORT_SYMBOL_GPL(clear_page_erms)
56
57/*
58 * Default clear user-space.
59 * Input:
60 * rdi destination
61 * rcx count
62 * rax is zero
63 *
64 * Output:
65 * rcx: uncleared bytes or 0 if successful.
66 */
67SYM_FUNC_START(rep_stos_alternative)
68 ANNOTATE_NOENDBR
69 cmpq $64,%rcx
70 jae .Lunrolled
71
72 cmp $8,%ecx
73 jae .Lword
74
75 testl %ecx,%ecx
76 je .Lexit
77
78.Lclear_user_tail:
790: movb %al,(%rdi)
80 inc %rdi
81 dec %rcx
82 jnz .Lclear_user_tail
83.Lexit:
84 RET
85
86 _ASM_EXTABLE_UA( 0b, .Lexit)
87
88.Lword:
891: movq %rax,(%rdi)
90 addq $8,%rdi
91 sub $8,%ecx
92 je .Lexit
93 cmp $8,%ecx
94 jae .Lword
95 jmp .Lclear_user_tail
96
97 .p2align 4
98.Lunrolled:
9910: movq %rax,(%rdi)
10011: movq %rax,8(%rdi)
10112: movq %rax,16(%rdi)
10213: movq %rax,24(%rdi)
10314: movq %rax,32(%rdi)
10415: movq %rax,40(%rdi)
10516: movq %rax,48(%rdi)
10617: movq %rax,56(%rdi)
107 addq $64,%rdi
108 subq $64,%rcx
109 cmpq $64,%rcx
110 jae .Lunrolled
111 cmpl $8,%ecx
112 jae .Lword
113 testl %ecx,%ecx
114 jne .Lclear_user_tail
115 RET
116
117 /*
118 * If we take an exception on any of the
119 * word stores, we know that %rcx isn't zero,
120 * so we can just go to the tail clearing to
121 * get the exact count.
122 *
123 * The unrolled case might end up clearing
124 * some bytes twice. Don't care.
125 *
126 * We could use the value in %rdi to avoid
127 * a second fault on the exact count case,
128 * but do we really care? No.
129 *
130 * Finally, we could try to align %rdi at the
131 * top of the unrolling. But unaligned stores
132 * just aren't that common or expensive.
133 */
134 _ASM_EXTABLE_UA( 1b, .Lclear_user_tail)
135 _ASM_EXTABLE_UA(10b, .Lclear_user_tail)
136 _ASM_EXTABLE_UA(11b, .Lclear_user_tail)
137 _ASM_EXTABLE_UA(12b, .Lclear_user_tail)
138 _ASM_EXTABLE_UA(13b, .Lclear_user_tail)
139 _ASM_EXTABLE_UA(14b, .Lclear_user_tail)
140 _ASM_EXTABLE_UA(15b, .Lclear_user_tail)
141 _ASM_EXTABLE_UA(16b, .Lclear_user_tail)
142 _ASM_EXTABLE_UA(17b, .Lclear_user_tail)
143SYM_FUNC_END(rep_stos_alternative)
144EXPORT_SYMBOL(rep_stos_alternative)
145