| 1 | // SPDX-License-Identifier: GPL-2.0-only | 
|---|
| 2 | /* | 
|---|
| 3 | * User address space access functions. | 
|---|
| 4 | * | 
|---|
| 5 | * Copyright 1997 Andi Kleen <ak@muc.de> | 
|---|
| 6 | * Copyright 1997 Linus Torvalds | 
|---|
| 7 | * Copyright 2002 Andi Kleen <ak@suse.de> | 
|---|
| 8 | */ | 
|---|
| 9 | #include <linux/export.h> | 
|---|
| 10 | #include <linux/uaccess.h> | 
|---|
| 11 | #include <linux/highmem.h> | 
|---|
| 12 | #include <linux/libnvdimm.h> | 
|---|
| 13 |  | 
|---|
| 14 | /* | 
|---|
| 15 | * Zero Userspace | 
|---|
| 16 | */ | 
|---|
| 17 |  | 
|---|
| 18 | #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE | 
|---|
| 19 | /** | 
|---|
| 20 | * clean_cache_range - write back a cache range with CLWB | 
|---|
| 21 | * @addr:	virtual start address | 
|---|
| 22 | * @size:	number of bytes to write back | 
|---|
| 23 | * | 
|---|
| 24 | * Write back a cache range using the CLWB (cache line write back) | 
|---|
| 25 | * instruction. Note that @size is internally rounded up to be cache | 
|---|
| 26 | * line size aligned. | 
|---|
| 27 | */ | 
|---|
| 28 | static void clean_cache_range(void *addr, size_t size) | 
|---|
| 29 | { | 
|---|
| 30 | u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; | 
|---|
| 31 | unsigned long clflush_mask = x86_clflush_size - 1; | 
|---|
| 32 | void *vend = addr + size; | 
|---|
| 33 | void *p; | 
|---|
| 34 |  | 
|---|
| 35 | for (p = (void *)((unsigned long)addr & ~clflush_mask); | 
|---|
| 36 | p < vend; p += x86_clflush_size) | 
|---|
| 37 | clwb(p: p); | 
|---|
| 38 | } | 
|---|
| 39 |  | 
|---|
| 40 | void arch_wb_cache_pmem(void *addr, size_t size) | 
|---|
| 41 | { | 
|---|
| 42 | clean_cache_range(addr, size); | 
|---|
| 43 | } | 
|---|
| 44 | EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); | 
|---|
| 45 |  | 
|---|
| 46 | long __copy_user_flushcache(void *dst, const void __user *src, unsigned size) | 
|---|
| 47 | { | 
|---|
| 48 | unsigned long flushed, dest = (unsigned long) dst; | 
|---|
| 49 | long rc; | 
|---|
| 50 |  | 
|---|
| 51 | stac(); | 
|---|
| 52 | rc = __copy_user_nocache(dst, src, size); | 
|---|
| 53 | clac(); | 
|---|
| 54 |  | 
|---|
| 55 | /* | 
|---|
| 56 | * __copy_user_nocache() uses non-temporal stores for the bulk | 
|---|
| 57 | * of the transfer, but we need to manually flush if the | 
|---|
| 58 | * transfer is unaligned. A cached memory copy is used when | 
|---|
| 59 | * destination or size is not naturally aligned. That is: | 
|---|
| 60 | *   - Require 8-byte alignment when size is 8 bytes or larger. | 
|---|
| 61 | *   - Require 4-byte alignment when size is 4 bytes. | 
|---|
| 62 | */ | 
|---|
| 63 | if (size < 8) { | 
|---|
| 64 | if (!IS_ALIGNED(dest, 4) || size != 4) | 
|---|
| 65 | clean_cache_range(addr: dst, size); | 
|---|
| 66 | } else { | 
|---|
| 67 | if (!IS_ALIGNED(dest, 8)) { | 
|---|
| 68 | dest = ALIGN(dest, boot_cpu_data.x86_clflush_size); | 
|---|
| 69 | clean_cache_range(addr: dst, size: 1); | 
|---|
| 70 | } | 
|---|
| 71 |  | 
|---|
| 72 | flushed = dest - (unsigned long) dst; | 
|---|
| 73 | if (size > flushed && !IS_ALIGNED(size - flushed, 8)) | 
|---|
| 74 | clean_cache_range(addr: dst + size - 1, size: 1); | 
|---|
| 75 | } | 
|---|
| 76 |  | 
|---|
| 77 | return rc; | 
|---|
| 78 | } | 
|---|
| 79 |  | 
|---|
| 80 | void __memcpy_flushcache(void *_dst, const void *_src, size_t size) | 
|---|
| 81 | { | 
|---|
| 82 | unsigned long dest = (unsigned long) _dst; | 
|---|
| 83 | unsigned long source = (unsigned long) _src; | 
|---|
| 84 |  | 
|---|
| 85 | /* cache copy and flush to align dest */ | 
|---|
| 86 | if (!IS_ALIGNED(dest, 8)) { | 
|---|
| 87 | size_t len = min_t(size_t, size, ALIGN(dest, 8) - dest); | 
|---|
| 88 |  | 
|---|
| 89 | memcpy(to: (void *) dest, from: (void *) source, len); | 
|---|
| 90 | clean_cache_range(addr: (void *) dest, size: len); | 
|---|
| 91 | dest += len; | 
|---|
| 92 | source += len; | 
|---|
| 93 | size -= len; | 
|---|
| 94 | if (!size) | 
|---|
| 95 | return; | 
|---|
| 96 | } | 
|---|
| 97 |  | 
|---|
| 98 | /* 4x8 movnti loop */ | 
|---|
| 99 | while (size >= 32) { | 
|---|
| 100 | asm( "movq    (%0), %%r8\n" | 
|---|
| 101 | "movq   8(%0), %%r9\n" | 
|---|
| 102 | "movq  16(%0), %%r10\n" | 
|---|
| 103 | "movq  24(%0), %%r11\n" | 
|---|
| 104 | "movnti  %%r8,   (%1)\n" | 
|---|
| 105 | "movnti  %%r9,  8(%1)\n" | 
|---|
| 106 | "movnti %%r10, 16(%1)\n" | 
|---|
| 107 | "movnti %%r11, 24(%1)\n" | 
|---|
| 108 | :: "r"(source), "r"(dest) | 
|---|
| 109 | : "memory", "r8", "r9", "r10", "r11"); | 
|---|
| 110 | dest += 32; | 
|---|
| 111 | source += 32; | 
|---|
| 112 | size -= 32; | 
|---|
| 113 | } | 
|---|
| 114 |  | 
|---|
| 115 | /* 1x8 movnti loop */ | 
|---|
| 116 | while (size >= 8) { | 
|---|
| 117 | asm( "movq    (%0), %%r8\n" | 
|---|
| 118 | "movnti  %%r8,   (%1)\n" | 
|---|
| 119 | :: "r"(source), "r"(dest) | 
|---|
| 120 | : "memory", "r8"); | 
|---|
| 121 | dest += 8; | 
|---|
| 122 | source += 8; | 
|---|
| 123 | size -= 8; | 
|---|
| 124 | } | 
|---|
| 125 |  | 
|---|
| 126 | /* 1x4 movnti loop */ | 
|---|
| 127 | while (size >= 4) { | 
|---|
| 128 | asm( "movl    (%0), %%r8d\n" | 
|---|
| 129 | "movnti  %%r8d,   (%1)\n" | 
|---|
| 130 | :: "r"(source), "r"(dest) | 
|---|
| 131 | : "memory", "r8"); | 
|---|
| 132 | dest += 4; | 
|---|
| 133 | source += 4; | 
|---|
| 134 | size -= 4; | 
|---|
| 135 | } | 
|---|
| 136 |  | 
|---|
| 137 | /* cache copy for remaining bytes */ | 
|---|
| 138 | if (size) { | 
|---|
| 139 | memcpy(to: (void *) dest, from: (void *) source, len: size); | 
|---|
| 140 | clean_cache_range(addr: (void *) dest, size); | 
|---|
| 141 | } | 
|---|
| 142 | } | 
|---|
| 143 | EXPORT_SYMBOL_GPL(__memcpy_flushcache); | 
|---|
| 144 | #endif | 
|---|
| 145 |  | 
|---|