1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Normally compiler builtins are used, but sometimes the compiler calls out
4 * of line code. Based on asm-i386/string.h.
5 *
6 * This assembly file is re-written from memmove_64.c file.
7 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
8 */
9#include <linux/export.h>
10#include <linux/linkage.h>
11#include <linux/cfi_types.h>
12#include <asm/cpufeatures.h>
13#include <asm/alternative.h>
14
15#undef memmove
16
17.section .noinstr.text, "ax"
18
19/*
20 * Implement memmove(). This can handle overlap between src and dst.
21 *
22 * Input:
23 * rdi: dest
24 * rsi: src
25 * rdx: count
26 *
27 * Output:
28 * rax: dest
29 */
30SYM_TYPED_FUNC_START(__memmove)
31
32 mov %rdi, %rax
33
34 /* Decide forward/backward copy mode */
35 cmp %rdi, %rsi
36 jge .Lmemmove_begin_forward
37 mov %rsi, %r8
38 add %rdx, %r8
39 cmp %rdi, %r8
40 jg 2f
41
42#define CHECK_LEN cmp $0x20, %rdx; jb 1f
43#define MEMMOVE_BYTES movq %rdx, %rcx; rep movsb; RET
44.Lmemmove_begin_forward:
45 ALTERNATIVE_2 __stringify(CHECK_LEN), \
46 __stringify(CHECK_LEN; MEMMOVE_BYTES), X86_FEATURE_ERMS, \
47 __stringify(MEMMOVE_BYTES), X86_FEATURE_FSRM
48
49 /*
50 * movsq instruction have many startup latency
51 * so we handle small size by general register.
52 */
53 cmp $680, %rdx
54 jb 3f
55 /*
56 * movsq instruction is only good for aligned case.
57 */
58
59 cmpb %dil, %sil
60 je 4f
613:
62 sub $0x20, %rdx
63 /*
64 * We gobble 32 bytes forward in each loop.
65 */
665:
67 sub $0x20, %rdx
68 movq 0*8(%rsi), %r11
69 movq 1*8(%rsi), %r10
70 movq 2*8(%rsi), %r9
71 movq 3*8(%rsi), %r8
72 leaq 4*8(%rsi), %rsi
73
74 movq %r11, 0*8(%rdi)
75 movq %r10, 1*8(%rdi)
76 movq %r9, 2*8(%rdi)
77 movq %r8, 3*8(%rdi)
78 leaq 4*8(%rdi), %rdi
79 jae 5b
80 addq $0x20, %rdx
81 jmp 1f
82 /*
83 * Handle data forward by movsq.
84 */
85 .p2align 4
864:
87 movq %rdx, %rcx
88 movq -8(%rsi, %rdx), %r11
89 lea -8(%rdi, %rdx), %r10
90 shrq $3, %rcx
91 rep movsq
92 movq %r11, (%r10)
93 jmp 13f
94.Lmemmove_end_forward:
95
96 /*
97 * Handle data backward by movsq.
98 */
99 .p2align 4
1007:
101 movq %rdx, %rcx
102 movq (%rsi), %r11
103 movq %rdi, %r10
104 leaq -8(%rsi, %rdx), %rsi
105 leaq -8(%rdi, %rdx), %rdi
106 shrq $3, %rcx
107 std
108 rep movsq
109 cld
110 movq %r11, (%r10)
111 jmp 13f
112
113 /*
114 * Start to prepare for backward copy.
115 */
116 .p2align 4
1172:
118 cmp $0x20, %rdx
119 jb 1f
120 cmp $680, %rdx
121 jb 6f
122 cmp %dil, %sil
123 je 7b
1246:
125 /*
126 * Calculate copy position to tail.
127 */
128 addq %rdx, %rsi
129 addq %rdx, %rdi
130 subq $0x20, %rdx
131 /*
132 * We gobble 32 bytes backward in each loop.
133 */
1348:
135 subq $0x20, %rdx
136 movq -1*8(%rsi), %r11
137 movq -2*8(%rsi), %r10
138 movq -3*8(%rsi), %r9
139 movq -4*8(%rsi), %r8
140 leaq -4*8(%rsi), %rsi
141
142 movq %r11, -1*8(%rdi)
143 movq %r10, -2*8(%rdi)
144 movq %r9, -3*8(%rdi)
145 movq %r8, -4*8(%rdi)
146 leaq -4*8(%rdi), %rdi
147 jae 8b
148 /*
149 * Calculate copy position to head.
150 */
151 addq $0x20, %rdx
152 subq %rdx, %rsi
153 subq %rdx, %rdi
1541:
155 cmpq $16, %rdx
156 jb 9f
157 /*
158 * Move data from 16 bytes to 31 bytes.
159 */
160 movq 0*8(%rsi), %r11
161 movq 1*8(%rsi), %r10
162 movq -2*8(%rsi, %rdx), %r9
163 movq -1*8(%rsi, %rdx), %r8
164 movq %r11, 0*8(%rdi)
165 movq %r10, 1*8(%rdi)
166 movq %r9, -2*8(%rdi, %rdx)
167 movq %r8, -1*8(%rdi, %rdx)
168 jmp 13f
169 .p2align 4
1709:
171 cmpq $8, %rdx
172 jb 10f
173 /*
174 * Move data from 8 bytes to 15 bytes.
175 */
176 movq 0*8(%rsi), %r11
177 movq -1*8(%rsi, %rdx), %r10
178 movq %r11, 0*8(%rdi)
179 movq %r10, -1*8(%rdi, %rdx)
180 jmp 13f
18110:
182 cmpq $4, %rdx
183 jb 11f
184 /*
185 * Move data from 4 bytes to 7 bytes.
186 */
187 movl (%rsi), %r11d
188 movl -4(%rsi, %rdx), %r10d
189 movl %r11d, (%rdi)
190 movl %r10d, -4(%rdi, %rdx)
191 jmp 13f
19211:
193 cmp $2, %rdx
194 jb 12f
195 /*
196 * Move data from 2 bytes to 3 bytes.
197 */
198 movw (%rsi), %r11w
199 movw -2(%rsi, %rdx), %r10w
200 movw %r11w, (%rdi)
201 movw %r10w, -2(%rdi, %rdx)
202 jmp 13f
20312:
204 cmp $1, %rdx
205 jb 13f
206 /*
207 * Move data for 1 byte.
208 */
209 movb (%rsi), %r11b
210 movb %r11b, (%rdi)
21113:
212 RET
213SYM_FUNC_END(__memmove)
214EXPORT_SYMBOL(__memmove)
215
216SYM_FUNC_ALIAS_MEMFUNC(memmove, __memmove)
217EXPORT_SYMBOL(memmove)
218