| 1 | /* SPDX-License-Identifier: GPL-2.0-only */ | 
|---|
| 2 | /* | 
|---|
| 3 | * Copyright 2023 Linus Torvalds <torvalds@linux-foundation.org> | 
|---|
| 4 | */ | 
|---|
| 5 |  | 
|---|
| 6 | #include <linux/export.h> | 
|---|
| 7 | #include <linux/linkage.h> | 
|---|
| 8 | #include <linux/objtool.h> | 
|---|
| 9 | #include <asm/asm.h> | 
|---|
| 10 |  | 
|---|
| 11 | /* | 
|---|
| 12 | * copy_user_nocache - Uncached memory copy with exception handling | 
|---|
| 13 | * | 
|---|
| 14 | * This copies from user space into kernel space, but the kernel | 
|---|
| 15 | * space accesses can take a machine check exception, so they too | 
|---|
| 16 | * need exception handling. | 
|---|
| 17 | * | 
|---|
| 18 | * Note: only 32-bit and 64-bit stores have non-temporal versions, | 
|---|
| 19 | * and we only use aligned versions. Any unaligned parts at the | 
|---|
| 20 | * start or end of the copy will be done using normal cached stores. | 
|---|
| 21 | * | 
|---|
| 22 | * Input: | 
|---|
| 23 | * rdi destination | 
|---|
| 24 | * rsi source | 
|---|
| 25 | * edx count | 
|---|
| 26 | * | 
|---|
| 27 | * Output: | 
|---|
| 28 | * rax uncopied bytes or 0 if successful. | 
|---|
| 29 | */ | 
|---|
| 30 | SYM_FUNC_START(__copy_user_nocache) | 
|---|
| 31 | ANNOTATE_NOENDBR | 
|---|
| 32 | /* If destination is not 7-byte aligned, we'll have to align it */ | 
|---|
| 33 | testb $7,%dil | 
|---|
| 34 | jne .Lalign | 
|---|
| 35 |  | 
|---|
| 36 | .Lis_aligned: | 
|---|
| 37 | cmp $64,%edx | 
|---|
| 38 | jb .Lquadwords | 
|---|
| 39 |  | 
|---|
| 40 | .p2align 4,0x90 | 
|---|
| 41 | .Lunrolled: | 
|---|
| 42 | 10:	movq (%rsi),%r8 | 
|---|
| 43 | 11:	movq 8(%rsi),%r9 | 
|---|
| 44 | 12:	movq 16(%rsi),%r10 | 
|---|
| 45 | 13:	movq 24(%rsi),%r11 | 
|---|
| 46 | 20:	movnti %r8,(%rdi) | 
|---|
| 47 | 21:	movnti %r9,8(%rdi) | 
|---|
| 48 | 22:	movnti %r10,16(%rdi) | 
|---|
| 49 | 23:	movnti %r11,24(%rdi) | 
|---|
| 50 | 30:	movq 32(%rsi),%r8 | 
|---|
| 51 | 31:	movq 40(%rsi),%r9 | 
|---|
| 52 | 32:	movq 48(%rsi),%r10 | 
|---|
| 53 | 33:	movq 56(%rsi),%r11 | 
|---|
| 54 | 40:	movnti %r8,32(%rdi) | 
|---|
| 55 | 41:	movnti %r9,40(%rdi) | 
|---|
| 56 | 42:	movnti %r10,48(%rdi) | 
|---|
| 57 | 43:	movnti %r11,56(%rdi) | 
|---|
| 58 |  | 
|---|
| 59 | addq $64,%rsi | 
|---|
| 60 | addq $64,%rdi | 
|---|
| 61 | sub $64,%edx | 
|---|
| 62 | cmp $64,%edx | 
|---|
| 63 | jae .Lunrolled | 
|---|
| 64 |  | 
|---|
| 65 | /* | 
|---|
| 66 | * First set of user mode loads have been done | 
|---|
| 67 | * without any stores, so if they fail, we can | 
|---|
| 68 | * just try the non-unrolled loop. | 
|---|
| 69 | */ | 
|---|
| 70 | _ASM_EXTABLE_UA(10b, .Lquadwords) | 
|---|
| 71 | _ASM_EXTABLE_UA(11b, .Lquadwords) | 
|---|
| 72 | _ASM_EXTABLE_UA(12b, .Lquadwords) | 
|---|
| 73 | _ASM_EXTABLE_UA(13b, .Lquadwords) | 
|---|
| 74 |  | 
|---|
| 75 | /* | 
|---|
| 76 | * The second set of user mode loads have been | 
|---|
| 77 | * done with 32 bytes stored to the destination, | 
|---|
| 78 | * so we need to take that into account before | 
|---|
| 79 | * falling back to the unrolled loop. | 
|---|
| 80 | */ | 
|---|
| 81 | _ASM_EXTABLE_UA(30b, .Lfixup32) | 
|---|
| 82 | _ASM_EXTABLE_UA(31b, .Lfixup32) | 
|---|
| 83 | _ASM_EXTABLE_UA(32b, .Lfixup32) | 
|---|
| 84 | _ASM_EXTABLE_UA(33b, .Lfixup32) | 
|---|
| 85 |  | 
|---|
| 86 | /* | 
|---|
| 87 | * An exception on a write means that we're | 
|---|
| 88 | * done, but we need to update the count | 
|---|
| 89 | * depending on where in the unrolled loop | 
|---|
| 90 | * we were. | 
|---|
| 91 | */ | 
|---|
| 92 | _ASM_EXTABLE_UA(20b, .Ldone0) | 
|---|
| 93 | _ASM_EXTABLE_UA(21b, .Ldone8) | 
|---|
| 94 | _ASM_EXTABLE_UA(22b, .Ldone16) | 
|---|
| 95 | _ASM_EXTABLE_UA(23b, .Ldone24) | 
|---|
| 96 | _ASM_EXTABLE_UA(40b, .Ldone32) | 
|---|
| 97 | _ASM_EXTABLE_UA(41b, .Ldone40) | 
|---|
| 98 | _ASM_EXTABLE_UA(42b, .Ldone48) | 
|---|
| 99 | _ASM_EXTABLE_UA(43b, .Ldone56) | 
|---|
| 100 |  | 
|---|
| 101 | .Lquadwords: | 
|---|
| 102 | cmp $8,%edx | 
|---|
| 103 | jb .Llong | 
|---|
| 104 | 50:	movq (%rsi),%rax | 
|---|
| 105 | 51:	movnti %rax,(%rdi) | 
|---|
| 106 | addq $8,%rsi | 
|---|
| 107 | addq $8,%rdi | 
|---|
| 108 | sub $8,%edx | 
|---|
| 109 | jmp .Lquadwords | 
|---|
| 110 |  | 
|---|
| 111 | /* | 
|---|
| 112 | * If we fail on the last full quadword, we will | 
|---|
| 113 | * not try to do any byte-wise cached accesses. | 
|---|
| 114 | * We will try to do one more 4-byte uncached | 
|---|
| 115 | * one, though. | 
|---|
| 116 | */ | 
|---|
| 117 | _ASM_EXTABLE_UA(50b, .Llast4) | 
|---|
| 118 | _ASM_EXTABLE_UA(51b, .Ldone0) | 
|---|
| 119 |  | 
|---|
| 120 | .Llong: | 
|---|
| 121 | test $4,%dl | 
|---|
| 122 | je .Lword | 
|---|
| 123 | 60:	movl (%rsi),%eax | 
|---|
| 124 | 61:	movnti %eax,(%rdi) | 
|---|
| 125 | addq $4,%rsi | 
|---|
| 126 | addq $4,%rdi | 
|---|
| 127 | sub $4,%edx | 
|---|
| 128 | .Lword: | 
|---|
| 129 | sfence | 
|---|
| 130 | test $2,%dl | 
|---|
| 131 | je .Lbyte | 
|---|
| 132 | 70:	movw (%rsi),%ax | 
|---|
| 133 | 71:	movw %ax,(%rdi) | 
|---|
| 134 | addq $2,%rsi | 
|---|
| 135 | addq $2,%rdi | 
|---|
| 136 | sub $2,%edx | 
|---|
| 137 | .Lbyte: | 
|---|
| 138 | test $1,%dl | 
|---|
| 139 | je .Ldone | 
|---|
| 140 | 80:	movb (%rsi),%al | 
|---|
| 141 | 81:	movb %al,(%rdi) | 
|---|
| 142 | dec %edx | 
|---|
| 143 | .Ldone: | 
|---|
| 144 | mov %edx,%eax | 
|---|
| 145 | RET | 
|---|
| 146 |  | 
|---|
| 147 | /* | 
|---|
| 148 | * If we fail on the last four bytes, we won't | 
|---|
| 149 | * bother with any fixups. It's dead, Jim. Note | 
|---|
| 150 | * that there's no need for 'sfence' for any | 
|---|
| 151 | * of this, since the exception will have been | 
|---|
| 152 | * serializing. | 
|---|
| 153 | */ | 
|---|
| 154 | _ASM_EXTABLE_UA(60b, .Ldone) | 
|---|
| 155 | _ASM_EXTABLE_UA(61b, .Ldone) | 
|---|
| 156 | _ASM_EXTABLE_UA(70b, .Ldone) | 
|---|
| 157 | _ASM_EXTABLE_UA(71b, .Ldone) | 
|---|
| 158 | _ASM_EXTABLE_UA(80b, .Ldone) | 
|---|
| 159 | _ASM_EXTABLE_UA(81b, .Ldone) | 
|---|
| 160 |  | 
|---|
| 161 | /* | 
|---|
| 162 | * This is the "head needs aliging" case when | 
|---|
| 163 | * the destination isn't 8-byte aligned. The | 
|---|
| 164 | * 4-byte case can be done uncached, but any | 
|---|
| 165 | * smaller alignment is done with regular stores. | 
|---|
| 166 | */ | 
|---|
| 167 | .Lalign: | 
|---|
| 168 | test $1,%dil | 
|---|
| 169 | je .Lalign_word | 
|---|
| 170 | test %edx,%edx | 
|---|
| 171 | je .Ldone | 
|---|
| 172 | 90:	movb (%rsi),%al | 
|---|
| 173 | 91:	movb %al,(%rdi) | 
|---|
| 174 | inc %rsi | 
|---|
| 175 | inc %rdi | 
|---|
| 176 | dec %edx | 
|---|
| 177 | .Lalign_word: | 
|---|
| 178 | test $2,%dil | 
|---|
| 179 | je .Lalign_long | 
|---|
| 180 | cmp $2,%edx | 
|---|
| 181 | jb .Lbyte | 
|---|
| 182 | 92:	movw (%rsi),%ax | 
|---|
| 183 | 93:	movw %ax,(%rdi) | 
|---|
| 184 | addq $2,%rsi | 
|---|
| 185 | addq $2,%rdi | 
|---|
| 186 | sub $2,%edx | 
|---|
| 187 | .Lalign_long: | 
|---|
| 188 | test $4,%dil | 
|---|
| 189 | je .Lis_aligned | 
|---|
| 190 | cmp $4,%edx | 
|---|
| 191 | jb .Lword | 
|---|
| 192 | 94:	movl (%rsi),%eax | 
|---|
| 193 | 95:	movnti %eax,(%rdi) | 
|---|
| 194 | addq $4,%rsi | 
|---|
| 195 | addq $4,%rdi | 
|---|
| 196 | sub $4,%edx | 
|---|
| 197 | jmp .Lis_aligned | 
|---|
| 198 |  | 
|---|
| 199 | /* | 
|---|
| 200 | * If we fail on the initial alignment accesses, | 
|---|
| 201 | * we're all done. Again, no point in trying to | 
|---|
| 202 | * do byte-by-byte probing if the 4-byte load | 
|---|
| 203 | * fails - we're not doing any uncached accesses | 
|---|
| 204 | * any more. | 
|---|
| 205 | */ | 
|---|
| 206 | _ASM_EXTABLE_UA(90b, .Ldone) | 
|---|
| 207 | _ASM_EXTABLE_UA(91b, .Ldone) | 
|---|
| 208 | _ASM_EXTABLE_UA(92b, .Ldone) | 
|---|
| 209 | _ASM_EXTABLE_UA(93b, .Ldone) | 
|---|
| 210 | _ASM_EXTABLE_UA(94b, .Ldone) | 
|---|
| 211 | _ASM_EXTABLE_UA(95b, .Ldone) | 
|---|
| 212 |  | 
|---|
| 213 | /* | 
|---|
| 214 | * Exception table fixups for faults in the middle | 
|---|
| 215 | */ | 
|---|
| 216 | .Ldone56: sub $8,%edx | 
|---|
| 217 | .Ldone48: sub $8,%edx | 
|---|
| 218 | .Ldone40: sub $8,%edx | 
|---|
| 219 | .Ldone32: sub $8,%edx | 
|---|
| 220 | .Ldone24: sub $8,%edx | 
|---|
| 221 | .Ldone16: sub $8,%edx | 
|---|
| 222 | .Ldone8: sub $8,%edx | 
|---|
| 223 | .Ldone0: | 
|---|
| 224 | mov %edx,%eax | 
|---|
| 225 | RET | 
|---|
| 226 |  | 
|---|
| 227 | .Lfixup32: | 
|---|
| 228 | addq $32,%rsi | 
|---|
| 229 | addq $32,%rdi | 
|---|
| 230 | sub $32,%edx | 
|---|
| 231 | jmp .Lquadwords | 
|---|
| 232 |  | 
|---|
| 233 | .Llast4: | 
|---|
| 234 | 52:	movl (%rsi),%eax | 
|---|
| 235 | 53:	movnti %eax,(%rdi) | 
|---|
| 236 | sfence | 
|---|
| 237 | sub $4,%edx | 
|---|
| 238 | mov %edx,%eax | 
|---|
| 239 | RET | 
|---|
| 240 | _ASM_EXTABLE_UA(52b, .Ldone0) | 
|---|
| 241 | _ASM_EXTABLE_UA(53b, .Ldone0) | 
|---|
| 242 |  | 
|---|
| 243 | SYM_FUNC_END(__copy_user_nocache) | 
|---|
| 244 | EXPORT_SYMBOL(__copy_user_nocache) | 
|---|
| 245 |  | 
|---|