| 1 | // SPDX-License-Identifier: MIT | 
|---|
| 2 | /* | 
|---|
| 3 | * Copyright © 2020 Intel Corporation | 
|---|
| 4 | */ | 
|---|
| 5 |  | 
|---|
| 6 | #include <linux/log2.h> | 
|---|
| 7 |  | 
|---|
| 8 | #include "gem/i915_gem_internal.h" | 
|---|
| 9 | #include "gem/i915_gem_lmem.h" | 
|---|
| 10 |  | 
|---|
| 11 | #include "gen8_ppgtt.h" | 
|---|
| 12 | #include "i915_scatterlist.h" | 
|---|
| 13 | #include "i915_trace.h" | 
|---|
| 14 | #include "i915_pvinfo.h" | 
|---|
| 15 | #include "i915_vgpu.h" | 
|---|
| 16 | #include "intel_gt.h" | 
|---|
| 17 | #include "intel_gtt.h" | 
|---|
| 18 |  | 
|---|
| 19 | static u64 gen8_pde_encode(const dma_addr_t addr, | 
|---|
| 20 | const enum i915_cache_level level) | 
|---|
| 21 | { | 
|---|
| 22 | u64 pde = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; | 
|---|
| 23 |  | 
|---|
| 24 | if (level != I915_CACHE_NONE) | 
|---|
| 25 | pde |= PPAT_CACHED_PDE; | 
|---|
| 26 | else | 
|---|
| 27 | pde |= PPAT_UNCACHED; | 
|---|
| 28 |  | 
|---|
| 29 | return pde; | 
|---|
| 30 | } | 
|---|
| 31 |  | 
|---|
| 32 | static u64 gen8_pte_encode(dma_addr_t addr, | 
|---|
| 33 | unsigned int pat_index, | 
|---|
| 34 | u32 flags) | 
|---|
| 35 | { | 
|---|
| 36 | gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; | 
|---|
| 37 |  | 
|---|
| 38 | if (unlikely(flags & PTE_READ_ONLY)) | 
|---|
| 39 | pte &= ~GEN8_PAGE_RW; | 
|---|
| 40 |  | 
|---|
| 41 | /* | 
|---|
| 42 | * For pre-gen12 platforms pat_index is the same as enum | 
|---|
| 43 | * i915_cache_level, so the switch-case here is still valid. | 
|---|
| 44 | * See translation table defined by LEGACY_CACHELEVEL. | 
|---|
| 45 | */ | 
|---|
| 46 | switch (pat_index) { | 
|---|
| 47 | case I915_CACHE_NONE: | 
|---|
| 48 | pte |= PPAT_UNCACHED; | 
|---|
| 49 | break; | 
|---|
| 50 | case I915_CACHE_WT: | 
|---|
| 51 | pte |= PPAT_DISPLAY_ELLC; | 
|---|
| 52 | break; | 
|---|
| 53 | default: | 
|---|
| 54 | pte |= PPAT_CACHED; | 
|---|
| 55 | break; | 
|---|
| 56 | } | 
|---|
| 57 |  | 
|---|
| 58 | return pte; | 
|---|
| 59 | } | 
|---|
| 60 |  | 
|---|
| 61 | static u64 gen12_pte_encode(dma_addr_t addr, | 
|---|
| 62 | unsigned int pat_index, | 
|---|
| 63 | u32 flags) | 
|---|
| 64 | { | 
|---|
| 65 | gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; | 
|---|
| 66 |  | 
|---|
| 67 | if (unlikely(flags & PTE_READ_ONLY)) | 
|---|
| 68 | pte &= ~GEN8_PAGE_RW; | 
|---|
| 69 |  | 
|---|
| 70 | if (flags & PTE_LM) | 
|---|
| 71 | pte |= GEN12_PPGTT_PTE_LM; | 
|---|
| 72 |  | 
|---|
| 73 | if (pat_index & BIT(0)) | 
|---|
| 74 | pte |= GEN12_PPGTT_PTE_PAT0; | 
|---|
| 75 |  | 
|---|
| 76 | if (pat_index & BIT(1)) | 
|---|
| 77 | pte |= GEN12_PPGTT_PTE_PAT1; | 
|---|
| 78 |  | 
|---|
| 79 | if (pat_index & BIT(2)) | 
|---|
| 80 | pte |= GEN12_PPGTT_PTE_PAT2; | 
|---|
| 81 |  | 
|---|
| 82 | if (pat_index & BIT(3)) | 
|---|
| 83 | pte |= MTL_PPGTT_PTE_PAT3; | 
|---|
| 84 |  | 
|---|
| 85 | return pte; | 
|---|
| 86 | } | 
|---|
| 87 |  | 
|---|
| 88 | static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) | 
|---|
| 89 | { | 
|---|
| 90 | struct drm_i915_private *i915 = ppgtt->vm.i915; | 
|---|
| 91 | struct intel_uncore *uncore = ppgtt->vm.gt->uncore; | 
|---|
| 92 | enum vgt_g2v_type msg; | 
|---|
| 93 | int i; | 
|---|
| 94 |  | 
|---|
| 95 | if (create) | 
|---|
| 96 | atomic_inc(px_used(ppgtt->pd)); /* never remove */ | 
|---|
| 97 | else | 
|---|
| 98 | atomic_dec(px_used(ppgtt->pd)); | 
|---|
| 99 |  | 
|---|
| 100 | mutex_lock(lock: &i915->vgpu.lock); | 
|---|
| 101 |  | 
|---|
| 102 | if (i915_vm_is_4lvl(vm: &ppgtt->vm)) { | 
|---|
| 103 | const u64 daddr = px_dma(ppgtt->pd); | 
|---|
| 104 |  | 
|---|
| 105 | intel_uncore_write(uncore, | 
|---|
| 106 | vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); | 
|---|
| 107 | intel_uncore_write(uncore, | 
|---|
| 108 | vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); | 
|---|
| 109 |  | 
|---|
| 110 | msg = create ? | 
|---|
| 111 | VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : | 
|---|
| 112 | VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY; | 
|---|
| 113 | } else { | 
|---|
| 114 | for (i = 0; i < GEN8_3LVL_PDPES; i++) { | 
|---|
| 115 | const u64 daddr = i915_page_dir_dma_addr(ppgtt, n: i); | 
|---|
| 116 |  | 
|---|
| 117 | intel_uncore_write(uncore, | 
|---|
| 118 | vgtif_reg(pdp[i].lo), | 
|---|
| 119 | lower_32_bits(daddr)); | 
|---|
| 120 | intel_uncore_write(uncore, | 
|---|
| 121 | vgtif_reg(pdp[i].hi), | 
|---|
| 122 | upper_32_bits(daddr)); | 
|---|
| 123 | } | 
|---|
| 124 |  | 
|---|
| 125 | msg = create ? | 
|---|
| 126 | VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : | 
|---|
| 127 | VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY; | 
|---|
| 128 | } | 
|---|
| 129 |  | 
|---|
| 130 | /* g2v_notify atomically (via hv trap) consumes the message packet. */ | 
|---|
| 131 | intel_uncore_write(uncore, vgtif_reg(g2v_notify), val: msg); | 
|---|
| 132 |  | 
|---|
| 133 | mutex_unlock(lock: &i915->vgpu.lock); | 
|---|
| 134 | } | 
|---|
| 135 |  | 
|---|
| 136 | /* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */ | 
|---|
| 137 | #define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */ | 
|---|
| 138 | #define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE)) | 
|---|
| 139 | #define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64)) | 
|---|
| 140 | #define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES)) | 
|---|
| 141 | #define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl)) | 
|---|
| 142 | #define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl)) | 
|---|
| 143 | #define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl)) | 
|---|
| 144 |  | 
|---|
| 145 | #define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt) | 
|---|
| 146 |  | 
|---|
| 147 | static unsigned int | 
|---|
| 148 | gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx) | 
|---|
| 149 | { | 
|---|
| 150 | const int shift = gen8_pd_shift(lvl); | 
|---|
| 151 | const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); | 
|---|
| 152 |  | 
|---|
| 153 | GEM_BUG_ON(start >= end); | 
|---|
| 154 | end += ~mask >> gen8_pd_shift(1); | 
|---|
| 155 |  | 
|---|
| 156 | *idx = i915_pde_index(addr: start, shift); | 
|---|
| 157 | if ((start ^ end) & mask) | 
|---|
| 158 | return GEN8_PDES - *idx; | 
|---|
| 159 | else | 
|---|
| 160 | return i915_pde_index(addr: end, shift) - *idx; | 
|---|
| 161 | } | 
|---|
| 162 |  | 
|---|
| 163 | static bool gen8_pd_contains(u64 start, u64 end, int lvl) | 
|---|
| 164 | { | 
|---|
| 165 | const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); | 
|---|
| 166 |  | 
|---|
| 167 | GEM_BUG_ON(start >= end); | 
|---|
| 168 | return (start ^ end) & mask && (start & ~mask) == 0; | 
|---|
| 169 | } | 
|---|
| 170 |  | 
|---|
| 171 | static unsigned int gen8_pt_count(u64 start, u64 end) | 
|---|
| 172 | { | 
|---|
| 173 | GEM_BUG_ON(start >= end); | 
|---|
| 174 | if ((start ^ end) >> gen8_pd_shift(1)) | 
|---|
| 175 | return GEN8_PDES - (start & (GEN8_PDES - 1)); | 
|---|
| 176 | else | 
|---|
| 177 | return end - start; | 
|---|
| 178 | } | 
|---|
| 179 |  | 
|---|
| 180 | static unsigned int gen8_pd_top_count(const struct i915_address_space *vm) | 
|---|
| 181 | { | 
|---|
| 182 | unsigned int shift = __gen8_pte_shift(vm->top); | 
|---|
| 183 |  | 
|---|
| 184 | return (vm->total + (1ull << shift) - 1) >> shift; | 
|---|
| 185 | } | 
|---|
| 186 |  | 
|---|
| 187 | static struct i915_page_directory * | 
|---|
| 188 | gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx) | 
|---|
| 189 | { | 
|---|
| 190 | struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); | 
|---|
| 191 |  | 
|---|
| 192 | if (vm->top == 2) | 
|---|
| 193 | return ppgtt->pd; | 
|---|
| 194 | else | 
|---|
| 195 | return i915_pd_entry(pdp: ppgtt->pd, gen8_pd_index(idx, vm->top)); | 
|---|
| 196 | } | 
|---|
| 197 |  | 
|---|
| 198 | static struct i915_page_directory * | 
|---|
| 199 | gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr) | 
|---|
| 200 | { | 
|---|
| 201 | return gen8_pdp_for_page_index(vm, idx: addr >> GEN8_PTE_SHIFT); | 
|---|
| 202 | } | 
|---|
| 203 |  | 
|---|
| 204 | static void __gen8_ppgtt_cleanup(struct i915_address_space *vm, | 
|---|
| 205 | struct i915_page_directory *pd, | 
|---|
| 206 | int count, int lvl) | 
|---|
| 207 | { | 
|---|
| 208 | if (lvl) { | 
|---|
| 209 | void **pde = pd->entry; | 
|---|
| 210 |  | 
|---|
| 211 | do { | 
|---|
| 212 | if (!*pde) | 
|---|
| 213 | continue; | 
|---|
| 214 |  | 
|---|
| 215 | __gen8_ppgtt_cleanup(vm, pd: *pde, GEN8_PDES, lvl: lvl - 1); | 
|---|
| 216 | } while (pde++, --count); | 
|---|
| 217 | } | 
|---|
| 218 |  | 
|---|
| 219 | free_px(vm, pt: &pd->pt, lvl); | 
|---|
| 220 | } | 
|---|
| 221 |  | 
|---|
| 222 | static void gen8_ppgtt_cleanup(struct i915_address_space *vm) | 
|---|
| 223 | { | 
|---|
| 224 | struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); | 
|---|
| 225 |  | 
|---|
| 226 | if (vm->rsvd.obj) | 
|---|
| 227 | i915_gem_object_put(obj: vm->rsvd.obj); | 
|---|
| 228 |  | 
|---|
| 229 | if (intel_vgpu_active(i915: vm->i915)) | 
|---|
| 230 | gen8_ppgtt_notify_vgt(ppgtt, create: false); | 
|---|
| 231 |  | 
|---|
| 232 | if (ppgtt->pd) | 
|---|
| 233 | __gen8_ppgtt_cleanup(vm, pd: ppgtt->pd, | 
|---|
| 234 | count: gen8_pd_top_count(vm), lvl: vm->top); | 
|---|
| 235 |  | 
|---|
| 236 | free_scratch(vm); | 
|---|
| 237 | } | 
|---|
| 238 |  | 
|---|
| 239 | static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, | 
|---|
| 240 | struct i915_page_directory * const pd, | 
|---|
| 241 | u64 start, const u64 end, int lvl) | 
|---|
| 242 | { | 
|---|
| 243 | const struct drm_i915_gem_object * const scratch = vm->scratch[lvl]; | 
|---|
| 244 | unsigned int idx, len; | 
|---|
| 245 |  | 
|---|
| 246 | GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); | 
|---|
| 247 |  | 
|---|
| 248 | len = gen8_pd_range(start, end, lvl: lvl--, idx: &idx); | 
|---|
| 249 | GTT_TRACE( "%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", | 
|---|
| 250 | __func__, vm, lvl + 1, start, end, | 
|---|
| 251 | idx, len, atomic_read(px_used(pd))); | 
|---|
| 252 | GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); | 
|---|
| 253 |  | 
|---|
| 254 | do { | 
|---|
| 255 | struct i915_page_table *pt = pd->entry[idx]; | 
|---|
| 256 |  | 
|---|
| 257 | if (atomic_fetch_inc(v: &pt->used) >> gen8_pd_shift(1) && | 
|---|
| 258 | gen8_pd_contains(start, end, lvl)) { | 
|---|
| 259 | GTT_TRACE( "%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", | 
|---|
| 260 | __func__, vm, lvl + 1, idx, start, end); | 
|---|
| 261 | clear_pd_entry(pd, idx, scratch); | 
|---|
| 262 | __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); | 
|---|
| 263 | start += (u64)I915_PDES << gen8_pd_shift(lvl); | 
|---|
| 264 | continue; | 
|---|
| 265 | } | 
|---|
| 266 |  | 
|---|
| 267 | if (lvl) { | 
|---|
| 268 | start = __gen8_ppgtt_clear(vm, as_pd(pt), | 
|---|
| 269 | start, end, lvl); | 
|---|
| 270 | } else { | 
|---|
| 271 | unsigned int count; | 
|---|
| 272 | unsigned int pte = gen8_pd_index(start, 0); | 
|---|
| 273 | unsigned int num_ptes; | 
|---|
| 274 | u64 *vaddr; | 
|---|
| 275 |  | 
|---|
| 276 | count = gen8_pt_count(start, end); | 
|---|
| 277 | GTT_TRACE( "%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", | 
|---|
| 278 | __func__, vm, lvl, start, end, | 
|---|
| 279 | gen8_pd_index(start, 0), count, | 
|---|
| 280 | atomic_read(&pt->used)); | 
|---|
| 281 | GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); | 
|---|
| 282 |  | 
|---|
| 283 | num_ptes = count; | 
|---|
| 284 | if (pt->is_compact) { | 
|---|
| 285 | GEM_BUG_ON(num_ptes % 16); | 
|---|
| 286 | GEM_BUG_ON(pte % 16); | 
|---|
| 287 | num_ptes /= 16; | 
|---|
| 288 | pte /= 16; | 
|---|
| 289 | } | 
|---|
| 290 |  | 
|---|
| 291 | vaddr = px_vaddr(pt); | 
|---|
| 292 | memset64(s: vaddr + pte, | 
|---|
| 293 | v: vm->scratch[0]->encode, | 
|---|
| 294 | n: num_ptes); | 
|---|
| 295 |  | 
|---|
| 296 | atomic_sub(i: count, v: &pt->used); | 
|---|
| 297 | start += count; | 
|---|
| 298 | } | 
|---|
| 299 |  | 
|---|
| 300 | if (release_pd_entry(pd, idx, pt, scratch)) | 
|---|
| 301 | free_px(vm, pt, lvl); | 
|---|
| 302 | } while (idx++, --len); | 
|---|
| 303 |  | 
|---|
| 304 | return start; | 
|---|
| 305 | } | 
|---|
| 306 |  | 
|---|
| 307 | static void gen8_ppgtt_clear(struct i915_address_space *vm, | 
|---|
| 308 | u64 start, u64 length) | 
|---|
| 309 | { | 
|---|
| 310 | GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); | 
|---|
| 311 | GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); | 
|---|
| 312 | GEM_BUG_ON(range_overflows(start, length, vm->total)); | 
|---|
| 313 |  | 
|---|
| 314 | start >>= GEN8_PTE_SHIFT; | 
|---|
| 315 | length >>= GEN8_PTE_SHIFT; | 
|---|
| 316 | GEM_BUG_ON(length == 0); | 
|---|
| 317 |  | 
|---|
| 318 | __gen8_ppgtt_clear(vm, pd: i915_vm_to_ppgtt(vm)->pd, | 
|---|
| 319 | start, end: start + length, lvl: vm->top); | 
|---|
| 320 | } | 
|---|
| 321 |  | 
|---|
| 322 | static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, | 
|---|
| 323 | struct i915_vm_pt_stash *stash, | 
|---|
| 324 | struct i915_page_directory * const pd, | 
|---|
| 325 | u64 * const start, const u64 end, int lvl) | 
|---|
| 326 | { | 
|---|
| 327 | unsigned int idx, len; | 
|---|
| 328 |  | 
|---|
| 329 | GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); | 
|---|
| 330 |  | 
|---|
| 331 | len = gen8_pd_range(start: *start, end, lvl: lvl--, idx: &idx); | 
|---|
| 332 | GTT_TRACE( "%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", | 
|---|
| 333 | __func__, vm, lvl + 1, *start, end, | 
|---|
| 334 | idx, len, atomic_read(px_used(pd))); | 
|---|
| 335 | GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1)); | 
|---|
| 336 |  | 
|---|
| 337 | spin_lock(lock: &pd->lock); | 
|---|
| 338 | GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */ | 
|---|
| 339 | do { | 
|---|
| 340 | struct i915_page_table *pt = pd->entry[idx]; | 
|---|
| 341 |  | 
|---|
| 342 | if (!pt) { | 
|---|
| 343 | spin_unlock(lock: &pd->lock); | 
|---|
| 344 |  | 
|---|
| 345 | GTT_TRACE( "%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", | 
|---|
| 346 | __func__, vm, lvl + 1, idx); | 
|---|
| 347 |  | 
|---|
| 348 | pt = stash->pt[!!lvl]; | 
|---|
| 349 | __i915_gem_object_pin_pages(obj: pt->base); | 
|---|
| 350 |  | 
|---|
| 351 | fill_px(pt, vm->scratch[lvl]->encode); | 
|---|
| 352 |  | 
|---|
| 353 | spin_lock(lock: &pd->lock); | 
|---|
| 354 | if (likely(!pd->entry[idx])) { | 
|---|
| 355 | stash->pt[!!lvl] = pt->stash; | 
|---|
| 356 | atomic_set(v: &pt->used, i: 0); | 
|---|
| 357 | set_pd_entry(pd, idx, pt); | 
|---|
| 358 | } else { | 
|---|
| 359 | pt = pd->entry[idx]; | 
|---|
| 360 | } | 
|---|
| 361 | } | 
|---|
| 362 |  | 
|---|
| 363 | if (lvl) { | 
|---|
| 364 | atomic_inc(v: &pt->used); | 
|---|
| 365 | spin_unlock(lock: &pd->lock); | 
|---|
| 366 |  | 
|---|
| 367 | __gen8_ppgtt_alloc(vm, stash, | 
|---|
| 368 | as_pd(pt), start, end, lvl); | 
|---|
| 369 |  | 
|---|
| 370 | spin_lock(lock: &pd->lock); | 
|---|
| 371 | atomic_dec(v: &pt->used); | 
|---|
| 372 | GEM_BUG_ON(!atomic_read(&pt->used)); | 
|---|
| 373 | } else { | 
|---|
| 374 | unsigned int count = gen8_pt_count(start: *start, end); | 
|---|
| 375 |  | 
|---|
| 376 | GTT_TRACE( "%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", | 
|---|
| 377 | __func__, vm, lvl, *start, end, | 
|---|
| 378 | gen8_pd_index(*start, 0), count, | 
|---|
| 379 | atomic_read(&pt->used)); | 
|---|
| 380 |  | 
|---|
| 381 | atomic_add(i: count, v: &pt->used); | 
|---|
| 382 | /* All other pdes may be simultaneously removed */ | 
|---|
| 383 | GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES); | 
|---|
| 384 | *start += count; | 
|---|
| 385 | } | 
|---|
| 386 | } while (idx++, --len); | 
|---|
| 387 | spin_unlock(lock: &pd->lock); | 
|---|
| 388 | } | 
|---|
| 389 |  | 
|---|
| 390 | static void gen8_ppgtt_alloc(struct i915_address_space *vm, | 
|---|
| 391 | struct i915_vm_pt_stash *stash, | 
|---|
| 392 | u64 start, u64 length) | 
|---|
| 393 | { | 
|---|
| 394 | GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); | 
|---|
| 395 | GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); | 
|---|
| 396 | GEM_BUG_ON(range_overflows(start, length, vm->total)); | 
|---|
| 397 |  | 
|---|
| 398 | start >>= GEN8_PTE_SHIFT; | 
|---|
| 399 | length >>= GEN8_PTE_SHIFT; | 
|---|
| 400 | GEM_BUG_ON(length == 0); | 
|---|
| 401 |  | 
|---|
| 402 | __gen8_ppgtt_alloc(vm, stash, pd: i915_vm_to_ppgtt(vm)->pd, | 
|---|
| 403 | start: &start, end: start + length, lvl: vm->top); | 
|---|
| 404 | } | 
|---|
| 405 |  | 
|---|
| 406 | static void __gen8_ppgtt_foreach(struct i915_address_space *vm, | 
|---|
| 407 | struct i915_page_directory *pd, | 
|---|
| 408 | u64 *start, u64 end, int lvl, | 
|---|
| 409 | void (*fn)(struct i915_address_space *vm, | 
|---|
| 410 | struct i915_page_table *pt, | 
|---|
| 411 | void *data), | 
|---|
| 412 | void *data) | 
|---|
| 413 | { | 
|---|
| 414 | unsigned int idx, len; | 
|---|
| 415 |  | 
|---|
| 416 | len = gen8_pd_range(start: *start, end, lvl: lvl--, idx: &idx); | 
|---|
| 417 |  | 
|---|
| 418 | spin_lock(lock: &pd->lock); | 
|---|
| 419 | do { | 
|---|
| 420 | struct i915_page_table *pt = pd->entry[idx]; | 
|---|
| 421 |  | 
|---|
| 422 | atomic_inc(v: &pt->used); | 
|---|
| 423 | spin_unlock(lock: &pd->lock); | 
|---|
| 424 |  | 
|---|
| 425 | if (lvl) { | 
|---|
| 426 | __gen8_ppgtt_foreach(vm, as_pd(pt), start, end, lvl, | 
|---|
| 427 | fn, data); | 
|---|
| 428 | } else { | 
|---|
| 429 | fn(vm, pt, data); | 
|---|
| 430 | *start += gen8_pt_count(start: *start, end); | 
|---|
| 431 | } | 
|---|
| 432 |  | 
|---|
| 433 | spin_lock(lock: &pd->lock); | 
|---|
| 434 | atomic_dec(v: &pt->used); | 
|---|
| 435 | } while (idx++, --len); | 
|---|
| 436 | spin_unlock(lock: &pd->lock); | 
|---|
| 437 | } | 
|---|
| 438 |  | 
|---|
| 439 | static void gen8_ppgtt_foreach(struct i915_address_space *vm, | 
|---|
| 440 | u64 start, u64 length, | 
|---|
| 441 | void (*fn)(struct i915_address_space *vm, | 
|---|
| 442 | struct i915_page_table *pt, | 
|---|
| 443 | void *data), | 
|---|
| 444 | void *data) | 
|---|
| 445 | { | 
|---|
| 446 | start >>= GEN8_PTE_SHIFT; | 
|---|
| 447 | length >>= GEN8_PTE_SHIFT; | 
|---|
| 448 |  | 
|---|
| 449 | __gen8_ppgtt_foreach(vm, pd: i915_vm_to_ppgtt(vm)->pd, | 
|---|
| 450 | start: &start, end: start + length, lvl: vm->top, | 
|---|
| 451 | fn, data); | 
|---|
| 452 | } | 
|---|
| 453 |  | 
|---|
| 454 | static __always_inline u64 | 
|---|
| 455 | gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, | 
|---|
| 456 | struct i915_page_directory *pdp, | 
|---|
| 457 | struct sgt_dma *iter, | 
|---|
| 458 | u64 idx, | 
|---|
| 459 | unsigned int pat_index, | 
|---|
| 460 | u32 flags) | 
|---|
| 461 | { | 
|---|
| 462 | struct i915_page_directory *pd; | 
|---|
| 463 | const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, pat_index, flags); | 
|---|
| 464 | gen8_pte_t *vaddr; | 
|---|
| 465 |  | 
|---|
| 466 | pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); | 
|---|
| 467 | vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); | 
|---|
| 468 | do { | 
|---|
| 469 | GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE); | 
|---|
| 470 | vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; | 
|---|
| 471 |  | 
|---|
| 472 | iter->dma += I915_GTT_PAGE_SIZE; | 
|---|
| 473 | if (iter->dma >= iter->max) { | 
|---|
| 474 | iter->sg = __sg_next(sg: iter->sg); | 
|---|
| 475 | if (!iter->sg || sg_dma_len(iter->sg) == 0) { | 
|---|
| 476 | idx = 0; | 
|---|
| 477 | break; | 
|---|
| 478 | } | 
|---|
| 479 |  | 
|---|
| 480 | iter->dma = sg_dma_address(iter->sg); | 
|---|
| 481 | iter->max = iter->dma + sg_dma_len(iter->sg); | 
|---|
| 482 | } | 
|---|
| 483 |  | 
|---|
| 484 | if (gen8_pd_index(++idx, 0) == 0) { | 
|---|
| 485 | if (gen8_pd_index(idx, 1) == 0) { | 
|---|
| 486 | /* Limited by sg length for 3lvl */ | 
|---|
| 487 | if (gen8_pd_index(idx, 2) == 0) | 
|---|
| 488 | break; | 
|---|
| 489 |  | 
|---|
| 490 | pd = pdp->entry[gen8_pd_index(idx, 2)]; | 
|---|
| 491 | } | 
|---|
| 492 |  | 
|---|
| 493 | drm_clflush_virt_range(addr: vaddr, PAGE_SIZE); | 
|---|
| 494 | vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); | 
|---|
| 495 | } | 
|---|
| 496 | } while (1); | 
|---|
| 497 | drm_clflush_virt_range(addr: vaddr, PAGE_SIZE); | 
|---|
| 498 |  | 
|---|
| 499 | return idx; | 
|---|
| 500 | } | 
|---|
| 501 |  | 
|---|
| 502 | static void | 
|---|
| 503 | xehp_ppgtt_insert_huge(struct i915_address_space *vm, | 
|---|
| 504 | struct i915_vma_resource *vma_res, | 
|---|
| 505 | struct sgt_dma *iter, | 
|---|
| 506 | unsigned int pat_index, | 
|---|
| 507 | u32 flags) | 
|---|
| 508 | { | 
|---|
| 509 | const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); | 
|---|
| 510 | unsigned int rem = sg_dma_len(iter->sg); | 
|---|
| 511 | u64 start = vma_res->start; | 
|---|
| 512 | u64 end = start + vma_res->vma_size; | 
|---|
| 513 |  | 
|---|
| 514 | GEM_BUG_ON(!i915_vm_is_4lvl(vm)); | 
|---|
| 515 |  | 
|---|
| 516 | do { | 
|---|
| 517 | struct i915_page_directory * const pdp = | 
|---|
| 518 | gen8_pdp_for_page_address(vm, addr: start); | 
|---|
| 519 | struct i915_page_directory * const pd = | 
|---|
| 520 | i915_pd_entry(pdp, __gen8_pte_index(start, 2)); | 
|---|
| 521 | struct i915_page_table *pt = | 
|---|
| 522 | i915_pt_entry(pd, __gen8_pte_index(start, 1)); | 
|---|
| 523 | gen8_pte_t encode = pte_encode; | 
|---|
| 524 | unsigned int page_size; | 
|---|
| 525 | gen8_pte_t *vaddr; | 
|---|
| 526 | u16 index, max, nent, i; | 
|---|
| 527 |  | 
|---|
| 528 | max = I915_PDES; | 
|---|
| 529 | nent = 1; | 
|---|
| 530 |  | 
|---|
| 531 | if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && | 
|---|
| 532 | IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && | 
|---|
| 533 | rem >= I915_GTT_PAGE_SIZE_2M && | 
|---|
| 534 | !__gen8_pte_index(start, 0)) { | 
|---|
| 535 | index = __gen8_pte_index(start, 1); | 
|---|
| 536 | encode |= GEN8_PDE_PS_2M; | 
|---|
| 537 | page_size = I915_GTT_PAGE_SIZE_2M; | 
|---|
| 538 |  | 
|---|
| 539 | vaddr = px_vaddr(pd); | 
|---|
| 540 | } else { | 
|---|
| 541 | index =  __gen8_pte_index(start, 0); | 
|---|
| 542 | page_size = I915_GTT_PAGE_SIZE; | 
|---|
| 543 |  | 
|---|
| 544 | if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { | 
|---|
| 545 | /* | 
|---|
| 546 | * Device local-memory on these platforms should | 
|---|
| 547 | * always use 64K pages or larger (including GTT | 
|---|
| 548 | * alignment), therefore if we know the whole | 
|---|
| 549 | * page-table needs to be filled we can always | 
|---|
| 550 | * safely use the compact-layout. Otherwise fall | 
|---|
| 551 | * back to the TLB hint with PS64. If this is | 
|---|
| 552 | * system memory we only bother with PS64. | 
|---|
| 553 | */ | 
|---|
| 554 | if ((encode & GEN12_PPGTT_PTE_LM) && | 
|---|
| 555 | end - start >= SZ_2M && !index) { | 
|---|
| 556 | index = __gen8_pte_index(start, 0) / 16; | 
|---|
| 557 | page_size = I915_GTT_PAGE_SIZE_64K; | 
|---|
| 558 |  | 
|---|
| 559 | max /= 16; | 
|---|
| 560 |  | 
|---|
| 561 | vaddr = px_vaddr(pd); | 
|---|
| 562 | vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K; | 
|---|
| 563 |  | 
|---|
| 564 | pt->is_compact = true; | 
|---|
| 565 | } else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && | 
|---|
| 566 | rem >= I915_GTT_PAGE_SIZE_64K && | 
|---|
| 567 | !(index % 16)) { | 
|---|
| 568 | encode |= GEN12_PTE_PS64; | 
|---|
| 569 | page_size = I915_GTT_PAGE_SIZE_64K; | 
|---|
| 570 | nent = 16; | 
|---|
| 571 | } | 
|---|
| 572 | } | 
|---|
| 573 |  | 
|---|
| 574 | vaddr = px_vaddr(pt); | 
|---|
| 575 | } | 
|---|
| 576 |  | 
|---|
| 577 | do { | 
|---|
| 578 | GEM_BUG_ON(rem < page_size); | 
|---|
| 579 |  | 
|---|
| 580 | for (i = 0; i < nent; i++) { | 
|---|
| 581 | vaddr[index++] = | 
|---|
| 582 | encode | (iter->dma + i * | 
|---|
| 583 | I915_GTT_PAGE_SIZE); | 
|---|
| 584 | } | 
|---|
| 585 |  | 
|---|
| 586 | start += page_size; | 
|---|
| 587 | iter->dma += page_size; | 
|---|
| 588 | rem -= page_size; | 
|---|
| 589 | if (iter->dma >= iter->max) { | 
|---|
| 590 | iter->sg = __sg_next(sg: iter->sg); | 
|---|
| 591 | if (!iter->sg) | 
|---|
| 592 | break; | 
|---|
| 593 |  | 
|---|
| 594 | rem = sg_dma_len(iter->sg); | 
|---|
| 595 | if (!rem) | 
|---|
| 596 | break; | 
|---|
| 597 |  | 
|---|
| 598 | iter->dma = sg_dma_address(iter->sg); | 
|---|
| 599 | iter->max = iter->dma + rem; | 
|---|
| 600 |  | 
|---|
| 601 | if (unlikely(!IS_ALIGNED(iter->dma, page_size))) | 
|---|
| 602 | break; | 
|---|
| 603 | } | 
|---|
| 604 | } while (rem >= page_size && index < max); | 
|---|
| 605 |  | 
|---|
| 606 | drm_clflush_virt_range(addr: vaddr, PAGE_SIZE); | 
|---|
| 607 | vma_res->page_sizes_gtt |= page_size; | 
|---|
| 608 | } while (iter->sg && sg_dma_len(iter->sg)); | 
|---|
| 609 | } | 
|---|
| 610 |  | 
|---|
| 611 | static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, | 
|---|
| 612 | struct i915_vma_resource *vma_res, | 
|---|
| 613 | struct sgt_dma *iter, | 
|---|
| 614 | unsigned int pat_index, | 
|---|
| 615 | u32 flags) | 
|---|
| 616 | { | 
|---|
| 617 | const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); | 
|---|
| 618 | unsigned int rem = sg_dma_len(iter->sg); | 
|---|
| 619 | u64 start = vma_res->start; | 
|---|
| 620 |  | 
|---|
| 621 | GEM_BUG_ON(!i915_vm_is_4lvl(vm)); | 
|---|
| 622 |  | 
|---|
| 623 | do { | 
|---|
| 624 | struct i915_page_directory * const pdp = | 
|---|
| 625 | gen8_pdp_for_page_address(vm, addr: start); | 
|---|
| 626 | struct i915_page_directory * const pd = | 
|---|
| 627 | i915_pd_entry(pdp, __gen8_pte_index(start, 2)); | 
|---|
| 628 | gen8_pte_t encode = pte_encode; | 
|---|
| 629 | unsigned int maybe_64K = -1; | 
|---|
| 630 | unsigned int page_size; | 
|---|
| 631 | gen8_pte_t *vaddr; | 
|---|
| 632 | u16 index; | 
|---|
| 633 |  | 
|---|
| 634 | if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && | 
|---|
| 635 | IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && | 
|---|
| 636 | rem >= I915_GTT_PAGE_SIZE_2M && | 
|---|
| 637 | !__gen8_pte_index(start, 0)) { | 
|---|
| 638 | index = __gen8_pte_index(start, 1); | 
|---|
| 639 | encode |= GEN8_PDE_PS_2M; | 
|---|
| 640 | page_size = I915_GTT_PAGE_SIZE_2M; | 
|---|
| 641 |  | 
|---|
| 642 | vaddr = px_vaddr(pd); | 
|---|
| 643 | } else { | 
|---|
| 644 | struct i915_page_table *pt = | 
|---|
| 645 | i915_pt_entry(pd, __gen8_pte_index(start, 1)); | 
|---|
| 646 |  | 
|---|
| 647 | index = __gen8_pte_index(start, 0); | 
|---|
| 648 | page_size = I915_GTT_PAGE_SIZE; | 
|---|
| 649 |  | 
|---|
| 650 | if (!index && | 
|---|
| 651 | vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K && | 
|---|
| 652 | IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && | 
|---|
| 653 | (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || | 
|---|
| 654 | rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)) | 
|---|
| 655 | maybe_64K = __gen8_pte_index(start, 1); | 
|---|
| 656 |  | 
|---|
| 657 | vaddr = px_vaddr(pt); | 
|---|
| 658 | } | 
|---|
| 659 |  | 
|---|
| 660 | do { | 
|---|
| 661 | GEM_BUG_ON(sg_dma_len(iter->sg) < page_size); | 
|---|
| 662 | vaddr[index++] = encode | iter->dma; | 
|---|
| 663 |  | 
|---|
| 664 | start += page_size; | 
|---|
| 665 | iter->dma += page_size; | 
|---|
| 666 | rem -= page_size; | 
|---|
| 667 | if (iter->dma >= iter->max) { | 
|---|
| 668 | iter->sg = __sg_next(sg: iter->sg); | 
|---|
| 669 | if (!iter->sg) | 
|---|
| 670 | break; | 
|---|
| 671 |  | 
|---|
| 672 | rem = sg_dma_len(iter->sg); | 
|---|
| 673 | if (!rem) | 
|---|
| 674 | break; | 
|---|
| 675 |  | 
|---|
| 676 | iter->dma = sg_dma_address(iter->sg); | 
|---|
| 677 | iter->max = iter->dma + rem; | 
|---|
| 678 |  | 
|---|
| 679 | if (maybe_64K != -1 && index < I915_PDES && | 
|---|
| 680 | !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && | 
|---|
| 681 | (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || | 
|---|
| 682 | rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))) | 
|---|
| 683 | maybe_64K = -1; | 
|---|
| 684 |  | 
|---|
| 685 | if (unlikely(!IS_ALIGNED(iter->dma, page_size))) | 
|---|
| 686 | break; | 
|---|
| 687 | } | 
|---|
| 688 | } while (rem >= page_size && index < I915_PDES); | 
|---|
| 689 |  | 
|---|
| 690 | drm_clflush_virt_range(addr: vaddr, PAGE_SIZE); | 
|---|
| 691 |  | 
|---|
| 692 | /* | 
|---|
| 693 | * Is it safe to mark the 2M block as 64K? -- Either we have | 
|---|
| 694 | * filled whole page-table with 64K entries, or filled part of | 
|---|
| 695 | * it and have reached the end of the sg table and we have | 
|---|
| 696 | * enough padding. | 
|---|
| 697 | */ | 
|---|
| 698 | if (maybe_64K != -1 && | 
|---|
| 699 | (index == I915_PDES || | 
|---|
| 700 | (i915_vm_has_scratch_64K(vm) && | 
|---|
| 701 | !iter->sg && IS_ALIGNED(vma_res->start + | 
|---|
| 702 | vma_res->node_size, | 
|---|
| 703 | I915_GTT_PAGE_SIZE_2M)))) { | 
|---|
| 704 | vaddr = px_vaddr(pd); | 
|---|
| 705 | vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; | 
|---|
| 706 | drm_clflush_virt_range(addr: vaddr, PAGE_SIZE); | 
|---|
| 707 | page_size = I915_GTT_PAGE_SIZE_64K; | 
|---|
| 708 |  | 
|---|
| 709 | /* | 
|---|
| 710 | * We write all 4K page entries, even when using 64K | 
|---|
| 711 | * pages. In order to verify that the HW isn't cheating | 
|---|
| 712 | * by using the 4K PTE instead of the 64K PTE, we want | 
|---|
| 713 | * to remove all the surplus entries. If the HW skipped | 
|---|
| 714 | * the 64K PTE, it will read/write into the scratch page | 
|---|
| 715 | * instead - which we detect as missing results during | 
|---|
| 716 | * selftests. | 
|---|
| 717 | */ | 
|---|
| 718 | if (I915_SELFTEST_ONLY(vm->scrub_64K)) { | 
|---|
| 719 | u16 i; | 
|---|
| 720 |  | 
|---|
| 721 | encode = vm->scratch[0]->encode; | 
|---|
| 722 | vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K)); | 
|---|
| 723 |  | 
|---|
| 724 | for (i = 1; i < index; i += 16) | 
|---|
| 725 | memset64(s: vaddr + i, v: encode, n: 15); | 
|---|
| 726 |  | 
|---|
| 727 | drm_clflush_virt_range(addr: vaddr, PAGE_SIZE); | 
|---|
| 728 | } | 
|---|
| 729 | } | 
|---|
| 730 |  | 
|---|
| 731 | vma_res->page_sizes_gtt |= page_size; | 
|---|
| 732 | } while (iter->sg && sg_dma_len(iter->sg)); | 
|---|
| 733 | } | 
|---|
| 734 |  | 
|---|
| 735 | static void gen8_ppgtt_insert(struct i915_address_space *vm, | 
|---|
| 736 | struct i915_vma_resource *vma_res, | 
|---|
| 737 | unsigned int pat_index, | 
|---|
| 738 | u32 flags) | 
|---|
| 739 | { | 
|---|
| 740 | struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); | 
|---|
| 741 | struct sgt_dma iter = sgt_dma(vma_res); | 
|---|
| 742 |  | 
|---|
| 743 | if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) { | 
|---|
| 744 | if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 55)) | 
|---|
| 745 | xehp_ppgtt_insert_huge(vm, vma_res, iter: &iter, pat_index, flags); | 
|---|
| 746 | else | 
|---|
| 747 | gen8_ppgtt_insert_huge(vm, vma_res, iter: &iter, pat_index, flags); | 
|---|
| 748 | } else  { | 
|---|
| 749 | u64 idx = vma_res->start >> GEN8_PTE_SHIFT; | 
|---|
| 750 |  | 
|---|
| 751 | do { | 
|---|
| 752 | struct i915_page_directory * const pdp = | 
|---|
| 753 | gen8_pdp_for_page_index(vm, idx); | 
|---|
| 754 |  | 
|---|
| 755 | idx = gen8_ppgtt_insert_pte(ppgtt, pdp, iter: &iter, idx, | 
|---|
| 756 | pat_index, flags); | 
|---|
| 757 | } while (idx); | 
|---|
| 758 |  | 
|---|
| 759 | vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; | 
|---|
| 760 | } | 
|---|
| 761 | } | 
|---|
| 762 |  | 
|---|
| 763 | static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, | 
|---|
| 764 | dma_addr_t addr, | 
|---|
| 765 | u64 offset, | 
|---|
| 766 | unsigned int pat_index, | 
|---|
| 767 | u32 flags) | 
|---|
| 768 | { | 
|---|
| 769 | u64 idx = offset >> GEN8_PTE_SHIFT; | 
|---|
| 770 | struct i915_page_directory * const pdp = | 
|---|
| 771 | gen8_pdp_for_page_index(vm, idx); | 
|---|
| 772 | struct i915_page_directory *pd = | 
|---|
| 773 | i915_pd_entry(pdp, gen8_pd_index(idx, 2)); | 
|---|
| 774 | struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1)); | 
|---|
| 775 | gen8_pte_t *vaddr; | 
|---|
| 776 |  | 
|---|
| 777 | GEM_BUG_ON(pt->is_compact); | 
|---|
| 778 |  | 
|---|
| 779 | vaddr = px_vaddr(pt); | 
|---|
| 780 | vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, pat_index, flags); | 
|---|
| 781 | drm_clflush_virt_range(addr: &vaddr[gen8_pd_index(idx, 0)], length: sizeof(*vaddr)); | 
|---|
| 782 | } | 
|---|
| 783 |  | 
|---|
| 784 | static void xehp_ppgtt_insert_entry_lm(struct i915_address_space *vm, | 
|---|
| 785 | dma_addr_t addr, | 
|---|
| 786 | u64 offset, | 
|---|
| 787 | unsigned int pat_index, | 
|---|
| 788 | u32 flags) | 
|---|
| 789 | { | 
|---|
| 790 | u64 idx = offset >> GEN8_PTE_SHIFT; | 
|---|
| 791 | struct i915_page_directory * const pdp = | 
|---|
| 792 | gen8_pdp_for_page_index(vm, idx); | 
|---|
| 793 | struct i915_page_directory *pd = | 
|---|
| 794 | i915_pd_entry(pdp, gen8_pd_index(idx, 2)); | 
|---|
| 795 | struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1)); | 
|---|
| 796 | gen8_pte_t *vaddr; | 
|---|
| 797 |  | 
|---|
| 798 | GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K)); | 
|---|
| 799 | GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K)); | 
|---|
| 800 |  | 
|---|
| 801 | /* XXX: we don't strictly need to use this layout */ | 
|---|
| 802 |  | 
|---|
| 803 | if (!pt->is_compact) { | 
|---|
| 804 | vaddr = px_vaddr(pd); | 
|---|
| 805 | vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K; | 
|---|
| 806 | pt->is_compact = true; | 
|---|
| 807 | } | 
|---|
| 808 |  | 
|---|
| 809 | vaddr = px_vaddr(pt); | 
|---|
| 810 | vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, pat_index, flags); | 
|---|
| 811 | } | 
|---|
| 812 |  | 
|---|
| 813 | static void xehp_ppgtt_insert_entry(struct i915_address_space *vm, | 
|---|
| 814 | dma_addr_t addr, | 
|---|
| 815 | u64 offset, | 
|---|
| 816 | unsigned int pat_index, | 
|---|
| 817 | u32 flags) | 
|---|
| 818 | { | 
|---|
| 819 | if (flags & PTE_LM) | 
|---|
| 820 | return xehp_ppgtt_insert_entry_lm(vm, addr, offset, | 
|---|
| 821 | pat_index, flags); | 
|---|
| 822 |  | 
|---|
| 823 | return gen8_ppgtt_insert_entry(vm, addr, offset, pat_index, flags); | 
|---|
| 824 | } | 
|---|
| 825 |  | 
|---|
| 826 | static int gen8_init_scratch(struct i915_address_space *vm) | 
|---|
| 827 | { | 
|---|
| 828 | u32 pte_flags; | 
|---|
| 829 | int ret; | 
|---|
| 830 | int i; | 
|---|
| 831 |  | 
|---|
| 832 | /* | 
|---|
| 833 | * If everybody agrees to not to write into the scratch page, | 
|---|
| 834 | * we can reuse it for all vm, keeping contexts and processes separate. | 
|---|
| 835 | */ | 
|---|
| 836 | if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) { | 
|---|
| 837 | struct i915_address_space *clone = vm->gt->vm; | 
|---|
| 838 |  | 
|---|
| 839 | GEM_BUG_ON(!clone->has_read_only); | 
|---|
| 840 |  | 
|---|
| 841 | vm->scratch_order = clone->scratch_order; | 
|---|
| 842 | for (i = 0; i <= vm->top; i++) | 
|---|
| 843 | vm->scratch[i] = i915_gem_object_get(obj: clone->scratch[i]); | 
|---|
| 844 |  | 
|---|
| 845 | return 0; | 
|---|
| 846 | } | 
|---|
| 847 |  | 
|---|
| 848 | ret = setup_scratch_page(vm); | 
|---|
| 849 | if (ret) | 
|---|
| 850 | return ret; | 
|---|
| 851 |  | 
|---|
| 852 | pte_flags = vm->has_read_only; | 
|---|
| 853 | if (i915_gem_object_is_lmem(obj: vm->scratch[0])) | 
|---|
| 854 | pte_flags |= PTE_LM; | 
|---|
| 855 |  | 
|---|
| 856 | vm->scratch[0]->encode = | 
|---|
| 857 | vm->pte_encode(px_dma(vm->scratch[0]), | 
|---|
| 858 | i915_gem_get_pat_index(i915: vm->i915, | 
|---|
| 859 | level: I915_CACHE_NONE), | 
|---|
| 860 | pte_flags); | 
|---|
| 861 |  | 
|---|
| 862 | for (i = 1; i <= vm->top; i++) { | 
|---|
| 863 | struct drm_i915_gem_object *obj; | 
|---|
| 864 |  | 
|---|
| 865 | obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); | 
|---|
| 866 | if (IS_ERR(ptr: obj)) { | 
|---|
| 867 | ret = PTR_ERR(ptr: obj); | 
|---|
| 868 | goto free_scratch; | 
|---|
| 869 | } | 
|---|
| 870 |  | 
|---|
| 871 | ret = map_pt_dma(vm, obj); | 
|---|
| 872 | if (ret) { | 
|---|
| 873 | i915_gem_object_put(obj); | 
|---|
| 874 | goto free_scratch; | 
|---|
| 875 | } | 
|---|
| 876 |  | 
|---|
| 877 | fill_px(obj, vm->scratch[i - 1]->encode); | 
|---|
| 878 | obj->encode = gen8_pde_encode(px_dma(obj), level: I915_CACHE_NONE); | 
|---|
| 879 |  | 
|---|
| 880 | vm->scratch[i] = obj; | 
|---|
| 881 | } | 
|---|
| 882 |  | 
|---|
| 883 | return 0; | 
|---|
| 884 |  | 
|---|
| 885 | free_scratch: | 
|---|
| 886 | while (i--) | 
|---|
| 887 | i915_gem_object_put(obj: vm->scratch[i]); | 
|---|
| 888 | vm->scratch[0] = NULL; | 
|---|
| 889 | return ret; | 
|---|
| 890 | } | 
|---|
| 891 |  | 
|---|
| 892 | static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) | 
|---|
| 893 | { | 
|---|
| 894 | struct i915_address_space *vm = &ppgtt->vm; | 
|---|
| 895 | struct i915_page_directory *pd = ppgtt->pd; | 
|---|
| 896 | unsigned int idx; | 
|---|
| 897 |  | 
|---|
| 898 | GEM_BUG_ON(vm->top != 2); | 
|---|
| 899 | GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES); | 
|---|
| 900 |  | 
|---|
| 901 | for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) { | 
|---|
| 902 | struct i915_page_directory *pde; | 
|---|
| 903 | int err; | 
|---|
| 904 |  | 
|---|
| 905 | pde = alloc_pd(vm); | 
|---|
| 906 | if (IS_ERR(ptr: pde)) | 
|---|
| 907 | return PTR_ERR(ptr: pde); | 
|---|
| 908 |  | 
|---|
| 909 | err = map_pt_dma(vm, obj: pde->pt.base); | 
|---|
| 910 | if (err) { | 
|---|
| 911 | free_pd(vm, pde); | 
|---|
| 912 | return err; | 
|---|
| 913 | } | 
|---|
| 914 |  | 
|---|
| 915 | fill_px(pde, vm->scratch[1]->encode); | 
|---|
| 916 | set_pd_entry(pd, idx, pde); | 
|---|
| 917 | atomic_inc(px_used(pde)); /* keep pinned */ | 
|---|
| 918 | } | 
|---|
| 919 | wmb(); | 
|---|
| 920 |  | 
|---|
| 921 | return 0; | 
|---|
| 922 | } | 
|---|
| 923 |  | 
|---|
| 924 | static struct i915_page_directory * | 
|---|
| 925 | gen8_alloc_top_pd(struct i915_address_space *vm) | 
|---|
| 926 | { | 
|---|
| 927 | const unsigned int count = gen8_pd_top_count(vm); | 
|---|
| 928 | struct i915_page_directory *pd; | 
|---|
| 929 | int err; | 
|---|
| 930 |  | 
|---|
| 931 | GEM_BUG_ON(count > I915_PDES); | 
|---|
| 932 |  | 
|---|
| 933 | pd = __alloc_pd(npde: count); | 
|---|
| 934 | if (unlikely(!pd)) | 
|---|
| 935 | return ERR_PTR(error: -ENOMEM); | 
|---|
| 936 |  | 
|---|
| 937 | pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); | 
|---|
| 938 | if (IS_ERR(ptr: pd->pt.base)) { | 
|---|
| 939 | err = PTR_ERR(ptr: pd->pt.base); | 
|---|
| 940 | pd->pt.base = NULL; | 
|---|
| 941 | goto err_pd; | 
|---|
| 942 | } | 
|---|
| 943 |  | 
|---|
| 944 | err = map_pt_dma(vm, obj: pd->pt.base); | 
|---|
| 945 | if (err) | 
|---|
| 946 | goto err_pd; | 
|---|
| 947 |  | 
|---|
| 948 | fill_page_dma(px_base(pd), val: vm->scratch[vm->top]->encode, count); | 
|---|
| 949 | atomic_inc(px_used(pd)); /* mark as pinned */ | 
|---|
| 950 | return pd; | 
|---|
| 951 |  | 
|---|
| 952 | err_pd: | 
|---|
| 953 | free_pd(vm, pd); | 
|---|
| 954 | return ERR_PTR(error: err); | 
|---|
| 955 | } | 
|---|
| 956 |  | 
|---|
| 957 | static int gen8_init_rsvd(struct i915_address_space *vm) | 
|---|
| 958 | { | 
|---|
| 959 | struct drm_i915_private *i915 = vm->i915; | 
|---|
| 960 | struct drm_i915_gem_object *obj; | 
|---|
| 961 | struct i915_vma *vma; | 
|---|
| 962 | int ret; | 
|---|
| 963 |  | 
|---|
| 964 | if (!intel_gt_needs_wa_16018031267(gt: vm->gt)) | 
|---|
| 965 | return 0; | 
|---|
| 966 |  | 
|---|
| 967 | /* The memory will be used only by GPU. */ | 
|---|
| 968 | obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, | 
|---|
| 969 | I915_BO_ALLOC_VOLATILE | | 
|---|
| 970 | I915_BO_ALLOC_GPU_ONLY); | 
|---|
| 971 | if (IS_ERR(ptr: obj)) | 
|---|
| 972 | obj = i915_gem_object_create_internal(i915, PAGE_SIZE); | 
|---|
| 973 | if (IS_ERR(ptr: obj)) | 
|---|
| 974 | return PTR_ERR(ptr: obj); | 
|---|
| 975 |  | 
|---|
| 976 | vma = i915_vma_instance(obj, vm, NULL); | 
|---|
| 977 | if (IS_ERR(ptr: vma)) { | 
|---|
| 978 | ret = PTR_ERR(ptr: vma); | 
|---|
| 979 | goto unref; | 
|---|
| 980 | } | 
|---|
| 981 |  | 
|---|
| 982 | ret = i915_vma_pin(vma, size: 0, alignment: 0, PIN_USER | PIN_HIGH); | 
|---|
| 983 | if (ret) | 
|---|
| 984 | goto unref; | 
|---|
| 985 |  | 
|---|
| 986 | vm->rsvd.vma = i915_vma_make_unshrinkable(vma); | 
|---|
| 987 | vm->rsvd.obj = obj; | 
|---|
| 988 | vm->total -= vma->node.size; | 
|---|
| 989 | return 0; | 
|---|
| 990 | unref: | 
|---|
| 991 | i915_gem_object_put(obj); | 
|---|
| 992 | return ret; | 
|---|
| 993 | } | 
|---|
| 994 |  | 
|---|
| 995 | /* | 
|---|
| 996 | * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers | 
|---|
| 997 | * with a net effect resembling a 2-level page table in normal x86 terms. Each | 
|---|
| 998 | * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address | 
|---|
| 999 | * space. | 
|---|
| 1000 | * | 
|---|
| 1001 | */ | 
|---|
| 1002 | struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, | 
|---|
| 1003 | unsigned long lmem_pt_obj_flags) | 
|---|
| 1004 | { | 
|---|
| 1005 | struct i915_page_directory *pd; | 
|---|
| 1006 | struct i915_ppgtt *ppgtt; | 
|---|
| 1007 | int err; | 
|---|
| 1008 |  | 
|---|
| 1009 | ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); | 
|---|
| 1010 | if (!ppgtt) | 
|---|
| 1011 | return ERR_PTR(error: -ENOMEM); | 
|---|
| 1012 |  | 
|---|
| 1013 | ppgtt_init(ppgtt, gt, lmem_pt_obj_flags); | 
|---|
| 1014 | ppgtt->vm.top = i915_vm_is_4lvl(vm: &ppgtt->vm) ? 3 : 2; | 
|---|
| 1015 | ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t)); | 
|---|
| 1016 |  | 
|---|
| 1017 | /* | 
|---|
| 1018 | * From bdw, there is hw support for read-only pages in the PPGTT. | 
|---|
| 1019 | * | 
|---|
| 1020 | * Gen11 has HSDES#:1807136187 unresolved. Disable ro support | 
|---|
| 1021 | * for now. | 
|---|
| 1022 | * | 
|---|
| 1023 | * Gen12 has inherited the same read-only fault issue from gen11. | 
|---|
| 1024 | */ | 
|---|
| 1025 | ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12); | 
|---|
| 1026 |  | 
|---|
| 1027 | if (HAS_LMEM(gt->i915)) | 
|---|
| 1028 | ppgtt->vm.alloc_pt_dma = alloc_pt_lmem; | 
|---|
| 1029 | else | 
|---|
| 1030 | ppgtt->vm.alloc_pt_dma = alloc_pt_dma; | 
|---|
| 1031 |  | 
|---|
| 1032 | /* | 
|---|
| 1033 | * Using SMEM here instead of LMEM has the advantage of not reserving | 
|---|
| 1034 | * high performance memory for a "never" used filler page. It also | 
|---|
| 1035 | * removes the device access that would be required to initialise the | 
|---|
| 1036 | * scratch page, reducing pressure on an even scarcer resource. | 
|---|
| 1037 | */ | 
|---|
| 1038 | ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; | 
|---|
| 1039 |  | 
|---|
| 1040 | if (GRAPHICS_VER(gt->i915) >= 12) | 
|---|
| 1041 | ppgtt->vm.pte_encode = gen12_pte_encode; | 
|---|
| 1042 | else | 
|---|
| 1043 | ppgtt->vm.pte_encode = gen8_pte_encode; | 
|---|
| 1044 |  | 
|---|
| 1045 | ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; | 
|---|
| 1046 | ppgtt->vm.insert_entries = gen8_ppgtt_insert; | 
|---|
| 1047 | if (HAS_64K_PAGES(gt->i915)) | 
|---|
| 1048 | ppgtt->vm.insert_page = xehp_ppgtt_insert_entry; | 
|---|
| 1049 | else | 
|---|
| 1050 | ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; | 
|---|
| 1051 | ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; | 
|---|
| 1052 | ppgtt->vm.clear_range = gen8_ppgtt_clear; | 
|---|
| 1053 | ppgtt->vm.foreach = gen8_ppgtt_foreach; | 
|---|
| 1054 | ppgtt->vm.cleanup = gen8_ppgtt_cleanup; | 
|---|
| 1055 |  | 
|---|
| 1056 | err = gen8_init_scratch(vm: &ppgtt->vm); | 
|---|
| 1057 | if (err) | 
|---|
| 1058 | goto err_put; | 
|---|
| 1059 |  | 
|---|
| 1060 | pd = gen8_alloc_top_pd(vm: &ppgtt->vm); | 
|---|
| 1061 | if (IS_ERR(ptr: pd)) { | 
|---|
| 1062 | err = PTR_ERR(ptr: pd); | 
|---|
| 1063 | goto err_put; | 
|---|
| 1064 | } | 
|---|
| 1065 | ppgtt->pd = pd; | 
|---|
| 1066 |  | 
|---|
| 1067 | if (!i915_vm_is_4lvl(vm: &ppgtt->vm)) { | 
|---|
| 1068 | err = gen8_preallocate_top_level_pdp(ppgtt); | 
|---|
| 1069 | if (err) | 
|---|
| 1070 | goto err_put; | 
|---|
| 1071 | } | 
|---|
| 1072 |  | 
|---|
| 1073 | if (intel_vgpu_active(i915: gt->i915)) | 
|---|
| 1074 | gen8_ppgtt_notify_vgt(ppgtt, create: true); | 
|---|
| 1075 |  | 
|---|
| 1076 | err = gen8_init_rsvd(vm: &ppgtt->vm); | 
|---|
| 1077 | if (err) | 
|---|
| 1078 | goto err_put; | 
|---|
| 1079 |  | 
|---|
| 1080 | return ppgtt; | 
|---|
| 1081 |  | 
|---|
| 1082 | err_put: | 
|---|
| 1083 | i915_vm_put(vm: &ppgtt->vm); | 
|---|
| 1084 | return ERR_PTR(error: err); | 
|---|
| 1085 | } | 
|---|
| 1086 |  | 
|---|