| 1 | /* | 
|---|
| 2 | * Copyright © 2016 Intel Corporation | 
|---|
| 3 | * | 
|---|
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | 
|---|
| 5 | * copy of this software and associated documentation files (the "Software"), | 
|---|
| 6 | * to deal in the Software without restriction, including without limitation | 
|---|
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | 
|---|
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | 
|---|
| 9 | * Software is furnished to do so, subject to the following conditions: | 
|---|
| 10 | * | 
|---|
| 11 | * The above copyright notice and this permission notice (including the next | 
|---|
| 12 | * paragraph) shall be included in all copies or substantial portions of the | 
|---|
| 13 | * Software. | 
|---|
| 14 | * | 
|---|
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
|---|
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
|---|
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL | 
|---|
| 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
|---|
| 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 
|---|
| 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | 
|---|
| 21 | * IN THE SOFTWARE. | 
|---|
| 22 | * | 
|---|
| 23 | */ | 
|---|
| 24 |  | 
|---|
| 25 | #include <linux/kernel.h> | 
|---|
| 26 | #include <linux/string.h> | 
|---|
| 27 | #include <linux/cpufeature.h> | 
|---|
| 28 | #include <linux/bug.h> | 
|---|
| 29 | #include <linux/build_bug.h> | 
|---|
| 30 | #include <asm/fpu/api.h> | 
|---|
| 31 |  | 
|---|
| 32 | #include "i915_memcpy.h" | 
|---|
| 33 |  | 
|---|
| 34 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) | 
|---|
| 35 | #define CI_BUG_ON(expr) BUG_ON(expr) | 
|---|
| 36 | #else | 
|---|
| 37 | #define CI_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) | 
|---|
| 38 | #endif | 
|---|
| 39 |  | 
|---|
| 40 | static DEFINE_STATIC_KEY_FALSE(has_movntdqa); | 
|---|
| 41 |  | 
|---|
| 42 | static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len) | 
|---|
| 43 | { | 
|---|
| 44 | kernel_fpu_begin(); | 
|---|
| 45 |  | 
|---|
| 46 | while (len >= 4) { | 
|---|
| 47 | asm( "movntdqa   (%0), %%xmm0\n" | 
|---|
| 48 | "movntdqa 16(%0), %%xmm1\n" | 
|---|
| 49 | "movntdqa 32(%0), %%xmm2\n" | 
|---|
| 50 | "movntdqa 48(%0), %%xmm3\n" | 
|---|
| 51 | "movaps %%xmm0,   (%1)\n" | 
|---|
| 52 | "movaps %%xmm1, 16(%1)\n" | 
|---|
| 53 | "movaps %%xmm2, 32(%1)\n" | 
|---|
| 54 | "movaps %%xmm3, 48(%1)\n" | 
|---|
| 55 | :: "r"(src), "r"(dst) : "memory"); | 
|---|
| 56 | src += 64; | 
|---|
| 57 | dst += 64; | 
|---|
| 58 | len -= 4; | 
|---|
| 59 | } | 
|---|
| 60 | while (len--) { | 
|---|
| 61 | asm( "movntdqa (%0), %%xmm0\n" | 
|---|
| 62 | "movaps %%xmm0, (%1)\n" | 
|---|
| 63 | :: "r"(src), "r"(dst) : "memory"); | 
|---|
| 64 | src += 16; | 
|---|
| 65 | dst += 16; | 
|---|
| 66 | } | 
|---|
| 67 |  | 
|---|
| 68 | kernel_fpu_end(); | 
|---|
| 69 | } | 
|---|
| 70 |  | 
|---|
| 71 | static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len) | 
|---|
| 72 | { | 
|---|
| 73 | kernel_fpu_begin(); | 
|---|
| 74 |  | 
|---|
| 75 | while (len >= 4) { | 
|---|
| 76 | asm( "movntdqa   (%0), %%xmm0\n" | 
|---|
| 77 | "movntdqa 16(%0), %%xmm1\n" | 
|---|
| 78 | "movntdqa 32(%0), %%xmm2\n" | 
|---|
| 79 | "movntdqa 48(%0), %%xmm3\n" | 
|---|
| 80 | "movups %%xmm0,   (%1)\n" | 
|---|
| 81 | "movups %%xmm1, 16(%1)\n" | 
|---|
| 82 | "movups %%xmm2, 32(%1)\n" | 
|---|
| 83 | "movups %%xmm3, 48(%1)\n" | 
|---|
| 84 | :: "r"(src), "r"(dst) : "memory"); | 
|---|
| 85 | src += 64; | 
|---|
| 86 | dst += 64; | 
|---|
| 87 | len -= 4; | 
|---|
| 88 | } | 
|---|
| 89 | while (len--) { | 
|---|
| 90 | asm( "movntdqa (%0), %%xmm0\n" | 
|---|
| 91 | "movups %%xmm0, (%1)\n" | 
|---|
| 92 | :: "r"(src), "r"(dst) : "memory"); | 
|---|
| 93 | src += 16; | 
|---|
| 94 | dst += 16; | 
|---|
| 95 | } | 
|---|
| 96 |  | 
|---|
| 97 | kernel_fpu_end(); | 
|---|
| 98 | } | 
|---|
| 99 |  | 
|---|
| 100 | /** | 
|---|
| 101 | * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC | 
|---|
| 102 | * @dst: destination pointer | 
|---|
| 103 | * @src: source pointer | 
|---|
| 104 | * @len: how many bytes to copy | 
|---|
| 105 | * | 
|---|
| 106 | * i915_memcpy_from_wc copies @len bytes from @src to @dst using | 
|---|
| 107 | * non-temporal instructions where available. Note that all arguments | 
|---|
| 108 | * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple | 
|---|
| 109 | * of 16. | 
|---|
| 110 | * | 
|---|
| 111 | * To test whether accelerated reads from WC are supported, use | 
|---|
| 112 | * i915_memcpy_from_wc(NULL, NULL, 0); | 
|---|
| 113 | * | 
|---|
| 114 | * Returns true if the copy was successful, false if the preconditions | 
|---|
| 115 | * are not met. | 
|---|
| 116 | */ | 
|---|
| 117 | bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len) | 
|---|
| 118 | { | 
|---|
| 119 | if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15)) | 
|---|
| 120 | return false; | 
|---|
| 121 |  | 
|---|
| 122 | if (static_branch_likely(&has_movntdqa)) { | 
|---|
| 123 | if (likely(len)) | 
|---|
| 124 | __memcpy_ntdqa(dst, src, len: len >> 4); | 
|---|
| 125 | return true; | 
|---|
| 126 | } | 
|---|
| 127 |  | 
|---|
| 128 | return false; | 
|---|
| 129 | } | 
|---|
| 130 |  | 
|---|
| 131 | /** | 
|---|
| 132 | * i915_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC | 
|---|
| 133 | * @dst: destination pointer | 
|---|
| 134 | * @src: source pointer | 
|---|
| 135 | * @len: how many bytes to copy | 
|---|
| 136 | * | 
|---|
| 137 | * Like i915_memcpy_from_wc(), the unaligned variant copies @len bytes from | 
|---|
| 138 | * @src to @dst using * non-temporal instructions where available, but | 
|---|
| 139 | * accepts that its arguments may not be aligned, but are valid for the | 
|---|
| 140 | * potential 16-byte read past the end. | 
|---|
| 141 | */ | 
|---|
| 142 | void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len) | 
|---|
| 143 | { | 
|---|
| 144 | unsigned long addr; | 
|---|
| 145 |  | 
|---|
| 146 | CI_BUG_ON(!i915_has_memcpy_from_wc()); | 
|---|
| 147 |  | 
|---|
| 148 | addr = (unsigned long)src; | 
|---|
| 149 | if (!IS_ALIGNED(addr, 16)) { | 
|---|
| 150 | unsigned long x = min(ALIGN(addr, 16) - addr, len); | 
|---|
| 151 |  | 
|---|
| 152 | memcpy(to: dst, from: src, len: x); | 
|---|
| 153 |  | 
|---|
| 154 | len -= x; | 
|---|
| 155 | dst += x; | 
|---|
| 156 | src += x; | 
|---|
| 157 | } | 
|---|
| 158 |  | 
|---|
| 159 | if (likely(len)) | 
|---|
| 160 | __memcpy_ntdqu(dst, src, DIV_ROUND_UP(len, 16)); | 
|---|
| 161 | } | 
|---|
| 162 |  | 
|---|
| 163 | void i915_memcpy_init_early(struct drm_i915_private *dev_priv) | 
|---|
| 164 | { | 
|---|
| 165 | /* | 
|---|
| 166 | * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions | 
|---|
| 167 | * emulation. So don't enable movntdqa in hypervisor guest. | 
|---|
| 168 | */ | 
|---|
| 169 | if (static_cpu_has(X86_FEATURE_XMM4_1) && | 
|---|
| 170 | !boot_cpu_has(X86_FEATURE_HYPERVISOR)) | 
|---|
| 171 | static_branch_enable(&has_movntdqa); | 
|---|
| 172 | } | 
|---|
| 173 |  | 
|---|