| 1 | // SPDX-License-Identifier: MIT | 
|---|
| 2 | /* | 
|---|
| 3 | * Copyright © 2020 Intel Corporation | 
|---|
| 4 | */ | 
|---|
| 5 |  | 
|---|
| 6 | #include <linux/slab.h> /* fault-inject.h is not standalone! */ | 
|---|
| 7 |  | 
|---|
| 8 | #include <linux/fault-inject.h> | 
|---|
| 9 | #include <linux/sched/mm.h> | 
|---|
| 10 |  | 
|---|
| 11 | #include <drm/drm_cache.h> | 
|---|
| 12 |  | 
|---|
| 13 | #include "gem/i915_gem_internal.h" | 
|---|
| 14 | #include "gem/i915_gem_lmem.h" | 
|---|
| 15 | #include "i915_reg.h" | 
|---|
| 16 | #include "i915_trace.h" | 
|---|
| 17 | #include "i915_utils.h" | 
|---|
| 18 | #include "intel_gt.h" | 
|---|
| 19 | #include "intel_gt_mcr.h" | 
|---|
| 20 | #include "intel_gt_print.h" | 
|---|
| 21 | #include "intel_gt_regs.h" | 
|---|
| 22 | #include "intel_gtt.h" | 
|---|
| 23 |  | 
|---|
| 24 | bool i915_ggtt_require_binder(struct drm_i915_private *i915) | 
|---|
| 25 | { | 
|---|
| 26 | /* Wa_13010847436 & Wa_14019519902 */ | 
|---|
| 27 | return !i915_direct_stolen_access(i915) && | 
|---|
| 28 | MEDIA_VER_FULL(i915) == IP_VER(13, 0); | 
|---|
| 29 | } | 
|---|
| 30 |  | 
|---|
| 31 | static bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915) | 
|---|
| 32 | { | 
|---|
| 33 | return IS_BROXTON(i915) && i915_vtd_active(i915); | 
|---|
| 34 | } | 
|---|
| 35 |  | 
|---|
| 36 | bool intel_vm_no_concurrent_access_wa(struct drm_i915_private *i915) | 
|---|
| 37 | { | 
|---|
| 38 | return IS_CHERRYVIEW(i915) || intel_ggtt_update_needs_vtd_wa(i915); | 
|---|
| 39 | } | 
|---|
| 40 |  | 
|---|
| 41 | struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) | 
|---|
| 42 | { | 
|---|
| 43 | struct drm_i915_gem_object *obj; | 
|---|
| 44 |  | 
|---|
| 45 | /* | 
|---|
| 46 | * To avoid severe over-allocation when dealing with min_page_size | 
|---|
| 47 | * restrictions, we override that behaviour here by allowing an object | 
|---|
| 48 | * size and page layout which can be smaller. In practice this should be | 
|---|
| 49 | * totally fine, since GTT paging structures are not typically inserted | 
|---|
| 50 | * into the GTT. | 
|---|
| 51 | * | 
|---|
| 52 | * Note that we also hit this path for the scratch page, and for this | 
|---|
| 53 | * case it might need to be 64K, but that should work fine here since we | 
|---|
| 54 | * used the passed in size for the page size, which should ensure it | 
|---|
| 55 | * also has the same alignment. | 
|---|
| 56 | */ | 
|---|
| 57 | obj = __i915_gem_object_create_lmem_with_ps(i915: vm->i915, size: sz, page_size: sz, | 
|---|
| 58 | flags: vm->lmem_pt_obj_flags); | 
|---|
| 59 | /* | 
|---|
| 60 | * Ensure all paging structures for this vm share the same dma-resv | 
|---|
| 61 | * object underneath, with the idea that one object_lock() will lock | 
|---|
| 62 | * them all at once. | 
|---|
| 63 | */ | 
|---|
| 64 | if (!IS_ERR(ptr: obj)) { | 
|---|
| 65 | obj->base.resv = i915_vm_resv_get(vm); | 
|---|
| 66 | obj->shares_resv_from = vm; | 
|---|
| 67 |  | 
|---|
| 68 | if (vm->fpriv) | 
|---|
| 69 | i915_drm_client_add_object(client: vm->fpriv->client, obj); | 
|---|
| 70 | } | 
|---|
| 71 |  | 
|---|
| 72 | return obj; | 
|---|
| 73 | } | 
|---|
| 74 |  | 
|---|
| 75 | struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) | 
|---|
| 76 | { | 
|---|
| 77 | struct drm_i915_gem_object *obj; | 
|---|
| 78 |  | 
|---|
| 79 | if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) | 
|---|
| 80 | i915_gem_shrink_all(i915: vm->i915); | 
|---|
| 81 |  | 
|---|
| 82 | obj = i915_gem_object_create_internal(i915: vm->i915, size: sz); | 
|---|
| 83 | /* | 
|---|
| 84 | * Ensure all paging structures for this vm share the same dma-resv | 
|---|
| 85 | * object underneath, with the idea that one object_lock() will lock | 
|---|
| 86 | * them all at once. | 
|---|
| 87 | */ | 
|---|
| 88 | if (!IS_ERR(ptr: obj)) { | 
|---|
| 89 | obj->base.resv = i915_vm_resv_get(vm); | 
|---|
| 90 | obj->shares_resv_from = vm; | 
|---|
| 91 |  | 
|---|
| 92 | if (vm->fpriv) | 
|---|
| 93 | i915_drm_client_add_object(client: vm->fpriv->client, obj); | 
|---|
| 94 | } | 
|---|
| 95 |  | 
|---|
| 96 | return obj; | 
|---|
| 97 | } | 
|---|
| 98 |  | 
|---|
| 99 | int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) | 
|---|
| 100 | { | 
|---|
| 101 | enum i915_map_type type; | 
|---|
| 102 | void *vaddr; | 
|---|
| 103 |  | 
|---|
| 104 | type = intel_gt_coherent_map_type(gt: vm->gt, obj, always_coherent: true); | 
|---|
| 105 | /* | 
|---|
| 106 | * FIXME: It is suspected that some Address Translation Service (ATS) | 
|---|
| 107 | * issue on IOMMU is causing CAT errors to occur on some MTL workloads. | 
|---|
| 108 | * Applying a write barrier to the ppgtt set entry functions appeared | 
|---|
| 109 | * to have no effect, so we must temporarily use I915_MAP_WC here on | 
|---|
| 110 | * MTL until a proper ATS solution is found. | 
|---|
| 111 | */ | 
|---|
| 112 | if (IS_METEORLAKE(vm->i915)) | 
|---|
| 113 | type = I915_MAP_WC; | 
|---|
| 114 |  | 
|---|
| 115 | vaddr = i915_gem_object_pin_map_unlocked(obj, type); | 
|---|
| 116 | if (IS_ERR(ptr: vaddr)) | 
|---|
| 117 | return PTR_ERR(ptr: vaddr); | 
|---|
| 118 |  | 
|---|
| 119 | i915_gem_object_make_unshrinkable(obj); | 
|---|
| 120 | return 0; | 
|---|
| 121 | } | 
|---|
| 122 |  | 
|---|
| 123 | int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj) | 
|---|
| 124 | { | 
|---|
| 125 | enum i915_map_type type; | 
|---|
| 126 | void *vaddr; | 
|---|
| 127 |  | 
|---|
| 128 | type = intel_gt_coherent_map_type(gt: vm->gt, obj, always_coherent: true); | 
|---|
| 129 | /* | 
|---|
| 130 | * FIXME: It is suspected that some Address Translation Service (ATS) | 
|---|
| 131 | * issue on IOMMU is causing CAT errors to occur on some MTL workloads. | 
|---|
| 132 | * Applying a write barrier to the ppgtt set entry functions appeared | 
|---|
| 133 | * to have no effect, so we must temporarily use I915_MAP_WC here on | 
|---|
| 134 | * MTL until a proper ATS solution is found. | 
|---|
| 135 | */ | 
|---|
| 136 | if (IS_METEORLAKE(vm->i915)) | 
|---|
| 137 | type = I915_MAP_WC; | 
|---|
| 138 |  | 
|---|
| 139 | vaddr = i915_gem_object_pin_map(obj, type); | 
|---|
| 140 | if (IS_ERR(ptr: vaddr)) | 
|---|
| 141 | return PTR_ERR(ptr: vaddr); | 
|---|
| 142 |  | 
|---|
| 143 | i915_gem_object_make_unshrinkable(obj); | 
|---|
| 144 | return 0; | 
|---|
| 145 | } | 
|---|
| 146 |  | 
|---|
| 147 | static void clear_vm_list(struct list_head *list) | 
|---|
| 148 | { | 
|---|
| 149 | struct i915_vma *vma, *vn; | 
|---|
| 150 |  | 
|---|
| 151 | list_for_each_entry_safe(vma, vn, list, vm_link) { | 
|---|
| 152 | struct drm_i915_gem_object *obj = vma->obj; | 
|---|
| 153 |  | 
|---|
| 154 | if (!i915_gem_object_get_rcu(obj)) { | 
|---|
| 155 | /* | 
|---|
| 156 | * Object is dying, but has not yet cleared its | 
|---|
| 157 | * vma list. | 
|---|
| 158 | * Unbind the dying vma to ensure our list | 
|---|
| 159 | * is completely drained. We leave the destruction to | 
|---|
| 160 | * the object destructor to avoid the vma | 
|---|
| 161 | * disappearing under it. | 
|---|
| 162 | */ | 
|---|
| 163 | atomic_and(i: ~I915_VMA_PIN_MASK, v: &vma->flags); | 
|---|
| 164 | WARN_ON(__i915_vma_unbind(vma)); | 
|---|
| 165 |  | 
|---|
| 166 | /* Remove from the unbound list */ | 
|---|
| 167 | list_del_init(entry: &vma->vm_link); | 
|---|
| 168 |  | 
|---|
| 169 | /* | 
|---|
| 170 | * Delay the vm and vm mutex freeing until the | 
|---|
| 171 | * object is done with destruction. | 
|---|
| 172 | */ | 
|---|
| 173 | i915_vm_resv_get(vm: vma->vm); | 
|---|
| 174 | vma->vm_ddestroy = true; | 
|---|
| 175 | } else { | 
|---|
| 176 | i915_vma_destroy_locked(vma); | 
|---|
| 177 | i915_gem_object_put(obj); | 
|---|
| 178 | } | 
|---|
| 179 | } | 
|---|
| 180 | } | 
|---|
| 181 |  | 
|---|
| 182 | static void __i915_vm_close(struct i915_address_space *vm) | 
|---|
| 183 | { | 
|---|
| 184 | mutex_lock(lock: &vm->mutex); | 
|---|
| 185 |  | 
|---|
| 186 | clear_vm_list(list: &vm->bound_list); | 
|---|
| 187 | clear_vm_list(list: &vm->unbound_list); | 
|---|
| 188 |  | 
|---|
| 189 | /* Check for must-fix unanticipated side-effects */ | 
|---|
| 190 | GEM_BUG_ON(!list_empty(&vm->bound_list)); | 
|---|
| 191 | GEM_BUG_ON(!list_empty(&vm->unbound_list)); | 
|---|
| 192 |  | 
|---|
| 193 | mutex_unlock(lock: &vm->mutex); | 
|---|
| 194 | } | 
|---|
| 195 |  | 
|---|
| 196 | /* lock the vm into the current ww, if we lock one, we lock all */ | 
|---|
| 197 | int i915_vm_lock_objects(struct i915_address_space *vm, | 
|---|
| 198 | struct i915_gem_ww_ctx *ww) | 
|---|
| 199 | { | 
|---|
| 200 | if (vm->scratch[0]->base.resv == &vm->_resv) { | 
|---|
| 201 | return i915_gem_object_lock(obj: vm->scratch[0], ww); | 
|---|
| 202 | } else { | 
|---|
| 203 | struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); | 
|---|
| 204 |  | 
|---|
| 205 | /* We borrowed the scratch page from ggtt, take the top level object */ | 
|---|
| 206 | return i915_gem_object_lock(obj: ppgtt->pd->pt.base, ww); | 
|---|
| 207 | } | 
|---|
| 208 | } | 
|---|
| 209 |  | 
|---|
| 210 | void i915_address_space_fini(struct i915_address_space *vm) | 
|---|
| 211 | { | 
|---|
| 212 | drm_mm_takedown(mm: &vm->mm); | 
|---|
| 213 | } | 
|---|
| 214 |  | 
|---|
| 215 | /** | 
|---|
| 216 | * i915_vm_resv_release - Final struct i915_address_space destructor | 
|---|
| 217 | * @kref: Pointer to the &i915_address_space.resv_ref member. | 
|---|
| 218 | * | 
|---|
| 219 | * This function is called when the last lock sharer no longer shares the | 
|---|
| 220 | * &i915_address_space._resv lock, and also if we raced when | 
|---|
| 221 | * destroying a vma by the vma destruction | 
|---|
| 222 | */ | 
|---|
| 223 | void i915_vm_resv_release(struct kref *kref) | 
|---|
| 224 | { | 
|---|
| 225 | struct i915_address_space *vm = | 
|---|
| 226 | container_of(kref, typeof(*vm), resv_ref); | 
|---|
| 227 |  | 
|---|
| 228 | dma_resv_fini(obj: &vm->_resv); | 
|---|
| 229 | mutex_destroy(lock: &vm->mutex); | 
|---|
| 230 |  | 
|---|
| 231 | kfree(objp: vm); | 
|---|
| 232 | } | 
|---|
| 233 |  | 
|---|
| 234 | static void __i915_vm_release(struct work_struct *work) | 
|---|
| 235 | { | 
|---|
| 236 | struct i915_address_space *vm = | 
|---|
| 237 | container_of(work, struct i915_address_space, release_work); | 
|---|
| 238 |  | 
|---|
| 239 | __i915_vm_close(vm); | 
|---|
| 240 |  | 
|---|
| 241 | /* Synchronize async unbinds. */ | 
|---|
| 242 | i915_vma_resource_bind_dep_sync_all(vm); | 
|---|
| 243 |  | 
|---|
| 244 | vm->cleanup(vm); | 
|---|
| 245 | i915_address_space_fini(vm); | 
|---|
| 246 |  | 
|---|
| 247 | i915_vm_resv_put(vm); | 
|---|
| 248 | } | 
|---|
| 249 |  | 
|---|
| 250 | void i915_vm_release(struct kref *kref) | 
|---|
| 251 | { | 
|---|
| 252 | struct i915_address_space *vm = | 
|---|
| 253 | container_of(kref, struct i915_address_space, ref); | 
|---|
| 254 |  | 
|---|
| 255 | GEM_BUG_ON(i915_is_ggtt(vm)); | 
|---|
| 256 | trace_i915_ppgtt_release(vm); | 
|---|
| 257 |  | 
|---|
| 258 | queue_work(wq: vm->i915->wq, work: &vm->release_work); | 
|---|
| 259 | } | 
|---|
| 260 |  | 
|---|
| 261 | void i915_address_space_init(struct i915_address_space *vm, int subclass) | 
|---|
| 262 | { | 
|---|
| 263 | kref_init(kref: &vm->ref); | 
|---|
| 264 |  | 
|---|
| 265 | /* | 
|---|
| 266 | * Special case for GGTT that has already done an early | 
|---|
| 267 | * kref_init here. | 
|---|
| 268 | */ | 
|---|
| 269 | if (!kref_read(kref: &vm->resv_ref)) | 
|---|
| 270 | kref_init(kref: &vm->resv_ref); | 
|---|
| 271 |  | 
|---|
| 272 | vm->pending_unbind = RB_ROOT_CACHED; | 
|---|
| 273 | INIT_WORK(&vm->release_work, __i915_vm_release); | 
|---|
| 274 |  | 
|---|
| 275 | /* | 
|---|
| 276 | * The vm->mutex must be reclaim safe (for use in the shrinker). | 
|---|
| 277 | * Do a dummy acquire now under fs_reclaim so that any allocation | 
|---|
| 278 | * attempt holding the lock is immediately reported by lockdep. | 
|---|
| 279 | */ | 
|---|
| 280 | mutex_init(&vm->mutex); | 
|---|
| 281 | lockdep_set_subclass(&vm->mutex, subclass); | 
|---|
| 282 |  | 
|---|
| 283 | if (!intel_vm_no_concurrent_access_wa(i915: vm->i915)) { | 
|---|
| 284 | i915_gem_shrinker_taints_mutex(i915: vm->i915, mutex: &vm->mutex); | 
|---|
| 285 | } else { | 
|---|
| 286 | /* | 
|---|
| 287 | * CHV + BXT VTD workaround use stop_machine(), | 
|---|
| 288 | * which is allowed to allocate memory. This means &vm->mutex | 
|---|
| 289 | * is the outer lock, and in theory we can allocate memory inside | 
|---|
| 290 | * it through stop_machine(). | 
|---|
| 291 | * | 
|---|
| 292 | * Add the annotation for this, we use trylock in shrinker. | 
|---|
| 293 | */ | 
|---|
| 294 | mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_); | 
|---|
| 295 | might_alloc(GFP_KERNEL); | 
|---|
| 296 | mutex_release(&vm->mutex.dep_map, _THIS_IP_); | 
|---|
| 297 | } | 
|---|
| 298 | dma_resv_init(obj: &vm->_resv); | 
|---|
| 299 |  | 
|---|
| 300 | GEM_BUG_ON(!vm->total); | 
|---|
| 301 | drm_mm_init(mm: &vm->mm, start: 0, size: vm->total); | 
|---|
| 302 |  | 
|---|
| 303 | memset64(s: vm->min_alignment, I915_GTT_MIN_ALIGNMENT, | 
|---|
| 304 | ARRAY_SIZE(vm->min_alignment)); | 
|---|
| 305 |  | 
|---|
| 306 | if (HAS_64K_PAGES(vm->i915)) { | 
|---|
| 307 | vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K; | 
|---|
| 308 | vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K; | 
|---|
| 309 | } | 
|---|
| 310 |  | 
|---|
| 311 | vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; | 
|---|
| 312 |  | 
|---|
| 313 | INIT_LIST_HEAD(list: &vm->bound_list); | 
|---|
| 314 | INIT_LIST_HEAD(list: &vm->unbound_list); | 
|---|
| 315 | } | 
|---|
| 316 |  | 
|---|
| 317 | void *__px_vaddr(struct drm_i915_gem_object *p) | 
|---|
| 318 | { | 
|---|
| 319 | enum i915_map_type type; | 
|---|
| 320 |  | 
|---|
| 321 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); | 
|---|
| 322 | return page_unpack_bits(p->mm.mapping, &type); | 
|---|
| 323 | } | 
|---|
| 324 |  | 
|---|
| 325 | dma_addr_t __px_dma(struct drm_i915_gem_object *p) | 
|---|
| 326 | { | 
|---|
| 327 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); | 
|---|
| 328 | return sg_dma_address(p->mm.pages->sgl); | 
|---|
| 329 | } | 
|---|
| 330 |  | 
|---|
| 331 | struct page *__px_page(struct drm_i915_gem_object *p) | 
|---|
| 332 | { | 
|---|
| 333 | GEM_BUG_ON(!i915_gem_object_has_pages(p)); | 
|---|
| 334 | return sg_page(sg: p->mm.pages->sgl); | 
|---|
| 335 | } | 
|---|
| 336 |  | 
|---|
| 337 | void | 
|---|
| 338 | fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count) | 
|---|
| 339 | { | 
|---|
| 340 | void *vaddr = __px_vaddr(p); | 
|---|
| 341 |  | 
|---|
| 342 | memset64(s: vaddr, v: val, n: count); | 
|---|
| 343 | drm_clflush_virt_range(addr: vaddr, PAGE_SIZE); | 
|---|
| 344 | } | 
|---|
| 345 |  | 
|---|
| 346 | static void poison_scratch_page(struct drm_i915_gem_object *scratch) | 
|---|
| 347 | { | 
|---|
| 348 | void *vaddr = __px_vaddr(p: scratch); | 
|---|
| 349 | u8 val; | 
|---|
| 350 |  | 
|---|
| 351 | val = 0; | 
|---|
| 352 | if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) | 
|---|
| 353 | val = POISON_FREE; | 
|---|
| 354 |  | 
|---|
| 355 | memset(s: vaddr, c: val, n: scratch->base.size); | 
|---|
| 356 | drm_clflush_virt_range(addr: vaddr, length: scratch->base.size); | 
|---|
| 357 | } | 
|---|
| 358 |  | 
|---|
| 359 | int setup_scratch_page(struct i915_address_space *vm) | 
|---|
| 360 | { | 
|---|
| 361 | unsigned long size; | 
|---|
| 362 |  | 
|---|
| 363 | /* | 
|---|
| 364 | * In order to utilize 64K pages for an object with a size < 2M, we will | 
|---|
| 365 | * need to support a 64K scratch page, given that every 16th entry for a | 
|---|
| 366 | * page-table operating in 64K mode must point to a properly aligned 64K | 
|---|
| 367 | * region, including any PTEs which happen to point to scratch. | 
|---|
| 368 | * | 
|---|
| 369 | * This is only relevant for the 48b PPGTT where we support | 
|---|
| 370 | * huge-gtt-pages, see also i915_vma_insert(). However, as we share the | 
|---|
| 371 | * scratch (read-only) between all vm, we create one 64k scratch page | 
|---|
| 372 | * for all. | 
|---|
| 373 | */ | 
|---|
| 374 | size = I915_GTT_PAGE_SIZE_4K; | 
|---|
| 375 | if (i915_vm_is_4lvl(vm) && | 
|---|
| 376 | HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K) && | 
|---|
| 377 | !HAS_64K_PAGES(vm->i915)) | 
|---|
| 378 | size = I915_GTT_PAGE_SIZE_64K; | 
|---|
| 379 |  | 
|---|
| 380 | do { | 
|---|
| 381 | struct drm_i915_gem_object *obj; | 
|---|
| 382 |  | 
|---|
| 383 | obj = vm->alloc_scratch_dma(vm, size); | 
|---|
| 384 | if (IS_ERR(ptr: obj)) | 
|---|
| 385 | goto skip; | 
|---|
| 386 |  | 
|---|
| 387 | if (map_pt_dma(vm, obj)) | 
|---|
| 388 | goto skip_obj; | 
|---|
| 389 |  | 
|---|
| 390 | /* We need a single contiguous page for our scratch */ | 
|---|
| 391 | if (obj->mm.page_sizes.sg < size) | 
|---|
| 392 | goto skip_obj; | 
|---|
| 393 |  | 
|---|
| 394 | /* And it needs to be correspondingly aligned */ | 
|---|
| 395 | if (__px_dma(p: obj) & (size - 1)) | 
|---|
| 396 | goto skip_obj; | 
|---|
| 397 |  | 
|---|
| 398 | /* | 
|---|
| 399 | * Use a non-zero scratch page for debugging. | 
|---|
| 400 | * | 
|---|
| 401 | * We want a value that should be reasonably obvious | 
|---|
| 402 | * to spot in the error state, while also causing a GPU hang | 
|---|
| 403 | * if executed. We prefer using a clear page in production, so | 
|---|
| 404 | * should it ever be accidentally used, the effect should be | 
|---|
| 405 | * fairly benign. | 
|---|
| 406 | */ | 
|---|
| 407 | poison_scratch_page(scratch: obj); | 
|---|
| 408 |  | 
|---|
| 409 | vm->scratch[0] = obj; | 
|---|
| 410 | vm->scratch_order = get_order(size); | 
|---|
| 411 | return 0; | 
|---|
| 412 |  | 
|---|
| 413 | skip_obj: | 
|---|
| 414 | i915_gem_object_put(obj); | 
|---|
| 415 | skip: | 
|---|
| 416 | if (size == I915_GTT_PAGE_SIZE_4K) | 
|---|
| 417 | return -ENOMEM; | 
|---|
| 418 |  | 
|---|
| 419 | size = I915_GTT_PAGE_SIZE_4K; | 
|---|
| 420 | } while (1); | 
|---|
| 421 | } | 
|---|
| 422 |  | 
|---|
| 423 | void free_scratch(struct i915_address_space *vm) | 
|---|
| 424 | { | 
|---|
| 425 | int i; | 
|---|
| 426 |  | 
|---|
| 427 | if (!vm->scratch[0]) | 
|---|
| 428 | return; | 
|---|
| 429 |  | 
|---|
| 430 | for (i = 0; i <= vm->top; i++) | 
|---|
| 431 | i915_gem_object_put(obj: vm->scratch[i]); | 
|---|
| 432 | } | 
|---|
| 433 |  | 
|---|
| 434 | void gtt_write_workarounds(struct intel_gt *gt) | 
|---|
| 435 | { | 
|---|
| 436 | struct drm_i915_private *i915 = gt->i915; | 
|---|
| 437 | struct intel_uncore *uncore = gt->uncore; | 
|---|
| 438 |  | 
|---|
| 439 | /* | 
|---|
| 440 | * This function is for gtt related workarounds. This function is | 
|---|
| 441 | * called on driver load and after a GPU reset, so you can place | 
|---|
| 442 | * workarounds here even if they get overwritten by GPU reset. | 
|---|
| 443 | */ | 
|---|
| 444 | /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ | 
|---|
| 445 | if (IS_BROADWELL(i915)) | 
|---|
| 446 | intel_uncore_write(uncore, | 
|---|
| 447 | GEN8_L3_LRA_1_GPGPU, | 
|---|
| 448 | GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); | 
|---|
| 449 | else if (IS_CHERRYVIEW(i915)) | 
|---|
| 450 | intel_uncore_write(uncore, | 
|---|
| 451 | GEN8_L3_LRA_1_GPGPU, | 
|---|
| 452 | GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); | 
|---|
| 453 | else if (IS_GEN9_LP(i915)) | 
|---|
| 454 | intel_uncore_write(uncore, | 
|---|
| 455 | GEN8_L3_LRA_1_GPGPU, | 
|---|
| 456 | GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); | 
|---|
| 457 | else if (GRAPHICS_VER(i915) >= 9 && GRAPHICS_VER(i915) <= 11) | 
|---|
| 458 | intel_uncore_write(uncore, | 
|---|
| 459 | GEN8_L3_LRA_1_GPGPU, | 
|---|
| 460 | GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); | 
|---|
| 461 |  | 
|---|
| 462 | /* | 
|---|
| 463 | * To support 64K PTEs we need to first enable the use of the | 
|---|
| 464 | * Intermediate-Page-Size(IPS) bit of the PDE field via some magical | 
|---|
| 465 | * mmio, otherwise the page-walker will simply ignore the IPS bit. This | 
|---|
| 466 | * shouldn't be needed after GEN10. | 
|---|
| 467 | * | 
|---|
| 468 | * 64K pages were first introduced from BDW+, although technically they | 
|---|
| 469 | * only *work* from gen9+. For pre-BDW we instead have the option for | 
|---|
| 470 | * 32K pages, but we don't currently have any support for it in our | 
|---|
| 471 | * driver. | 
|---|
| 472 | */ | 
|---|
| 473 | if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && | 
|---|
| 474 | GRAPHICS_VER(i915) <= 10) | 
|---|
| 475 | intel_uncore_rmw(uncore, | 
|---|
| 476 | GEN8_GAMW_ECO_DEV_RW_IA, | 
|---|
| 477 | clear: 0, | 
|---|
| 478 | GAMW_ECO_ENABLE_64K_IPS_FIELD); | 
|---|
| 479 |  | 
|---|
| 480 | if (IS_GRAPHICS_VER(i915, 8, 11)) { | 
|---|
| 481 | bool can_use_gtt_cache = true; | 
|---|
| 482 |  | 
|---|
| 483 | /* | 
|---|
| 484 | * According to the BSpec if we use 2M/1G pages then we also | 
|---|
| 485 | * need to disable the GTT cache. At least on BDW we can see | 
|---|
| 486 | * visual corruption when using 2M pages, and not disabling the | 
|---|
| 487 | * GTT cache. | 
|---|
| 488 | */ | 
|---|
| 489 | if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) | 
|---|
| 490 | can_use_gtt_cache = false; | 
|---|
| 491 |  | 
|---|
| 492 | /* WaGttCachingOffByDefault */ | 
|---|
| 493 | intel_uncore_write(uncore, | 
|---|
| 494 | HSW_GTT_CACHE_EN, | 
|---|
| 495 | val: can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); | 
|---|
| 496 | gt_WARN_ON_ONCE(gt, can_use_gtt_cache && | 
|---|
| 497 | intel_uncore_read(uncore, | 
|---|
| 498 | HSW_GTT_CACHE_EN) == 0); | 
|---|
| 499 | } | 
|---|
| 500 | } | 
|---|
| 501 |  | 
|---|
| 502 | static void xelpmp_setup_private_ppat(struct intel_uncore *uncore) | 
|---|
| 503 | { | 
|---|
| 504 | intel_uncore_write(uncore, XELPMP_PAT_INDEX(0), | 
|---|
| 505 | MTL_PPAT_L4_0_WB); | 
|---|
| 506 | intel_uncore_write(uncore, XELPMP_PAT_INDEX(1), | 
|---|
| 507 | MTL_PPAT_L4_1_WT); | 
|---|
| 508 | intel_uncore_write(uncore, XELPMP_PAT_INDEX(2), | 
|---|
| 509 | MTL_PPAT_L4_3_UC); | 
|---|
| 510 | intel_uncore_write(uncore, XELPMP_PAT_INDEX(3), | 
|---|
| 511 | MTL_PPAT_L4_0_WB | MTL_2_COH_1W); | 
|---|
| 512 | intel_uncore_write(uncore, XELPMP_PAT_INDEX(4), | 
|---|
| 513 | MTL_PPAT_L4_0_WB | MTL_3_COH_2W); | 
|---|
| 514 |  | 
|---|
| 515 | /* | 
|---|
| 516 | * Remaining PAT entries are left at the hardware-default | 
|---|
| 517 | * fully-cached setting | 
|---|
| 518 | */ | 
|---|
| 519 | } | 
|---|
| 520 |  | 
|---|
| 521 | static void xelpg_setup_private_ppat(struct intel_gt *gt) | 
|---|
| 522 | { | 
|---|
| 523 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), | 
|---|
| 524 | MTL_PPAT_L4_0_WB); | 
|---|
| 525 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), | 
|---|
| 526 | MTL_PPAT_L4_1_WT); | 
|---|
| 527 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), | 
|---|
| 528 | MTL_PPAT_L4_3_UC); | 
|---|
| 529 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), | 
|---|
| 530 | MTL_PPAT_L4_0_WB | MTL_2_COH_1W); | 
|---|
| 531 | intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), | 
|---|
| 532 | MTL_PPAT_L4_0_WB | MTL_3_COH_2W); | 
|---|
| 533 |  | 
|---|
| 534 | /* | 
|---|
| 535 | * Remaining PAT entries are left at the hardware-default | 
|---|
| 536 | * fully-cached setting | 
|---|
| 537 | */ | 
|---|
| 538 | } | 
|---|
| 539 |  | 
|---|
| 540 | static void tgl_setup_private_ppat(struct intel_uncore *uncore) | 
|---|
| 541 | { | 
|---|
| 542 | /* TGL doesn't support LLC or AGE settings */ | 
|---|
| 543 | intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); | 
|---|
| 544 | intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); | 
|---|
| 545 | intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); | 
|---|
| 546 | intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); | 
|---|
| 547 | intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); | 
|---|
| 548 | intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); | 
|---|
| 549 | intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); | 
|---|
| 550 | intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); | 
|---|
| 551 | } | 
|---|
| 552 |  | 
|---|
| 553 | static void xehp_setup_private_ppat(struct intel_gt *gt) | 
|---|
| 554 | { | 
|---|
| 555 | enum forcewake_domains fw; | 
|---|
| 556 | unsigned long flags; | 
|---|
| 557 |  | 
|---|
| 558 | fw = intel_uncore_forcewake_for_reg(uncore: gt->uncore, _MMIO(XEHP_PAT_INDEX(0).reg), | 
|---|
| 559 | FW_REG_WRITE); | 
|---|
| 560 | intel_uncore_forcewake_get(uncore: gt->uncore, domains: fw); | 
|---|
| 561 |  | 
|---|
| 562 | intel_gt_mcr_lock(gt, flags: &flags); | 
|---|
| 563 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB); | 
|---|
| 564 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC); | 
|---|
| 565 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT); | 
|---|
| 566 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC); | 
|---|
| 567 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB); | 
|---|
| 568 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB); | 
|---|
| 569 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB); | 
|---|
| 570 | intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB); | 
|---|
| 571 | intel_gt_mcr_unlock(gt, flags); | 
|---|
| 572 |  | 
|---|
| 573 | intel_uncore_forcewake_put(uncore: gt->uncore, domains: fw); | 
|---|
| 574 | } | 
|---|
| 575 |  | 
|---|
| 576 | static void icl_setup_private_ppat(struct intel_uncore *uncore) | 
|---|
| 577 | { | 
|---|
| 578 | intel_uncore_write(uncore, | 
|---|
| 579 | GEN10_PAT_INDEX(0), | 
|---|
| 580 | GEN8_PPAT_WB | GEN8_PPAT_LLC); | 
|---|
| 581 | intel_uncore_write(uncore, | 
|---|
| 582 | GEN10_PAT_INDEX(1), | 
|---|
| 583 | GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); | 
|---|
| 584 | intel_uncore_write(uncore, | 
|---|
| 585 | GEN10_PAT_INDEX(2), | 
|---|
| 586 | GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); | 
|---|
| 587 | intel_uncore_write(uncore, | 
|---|
| 588 | GEN10_PAT_INDEX(3), | 
|---|
| 589 | GEN8_PPAT_UC); | 
|---|
| 590 | intel_uncore_write(uncore, | 
|---|
| 591 | GEN10_PAT_INDEX(4), | 
|---|
| 592 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); | 
|---|
| 593 | intel_uncore_write(uncore, | 
|---|
| 594 | GEN10_PAT_INDEX(5), | 
|---|
| 595 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); | 
|---|
| 596 | intel_uncore_write(uncore, | 
|---|
| 597 | GEN10_PAT_INDEX(6), | 
|---|
| 598 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); | 
|---|
| 599 | intel_uncore_write(uncore, | 
|---|
| 600 | GEN10_PAT_INDEX(7), | 
|---|
| 601 | GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); | 
|---|
| 602 | } | 
|---|
| 603 |  | 
|---|
| 604 | /* | 
|---|
| 605 | * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability | 
|---|
| 606 | * bits. When using advanced contexts each context stores its own PAT, but | 
|---|
| 607 | * writing this data shouldn't be harmful even in those cases. | 
|---|
| 608 | */ | 
|---|
| 609 | static void bdw_setup_private_ppat(struct intel_uncore *uncore) | 
|---|
| 610 | { | 
|---|
| 611 | struct drm_i915_private *i915 = uncore->i915; | 
|---|
| 612 | u64 pat; | 
|---|
| 613 |  | 
|---|
| 614 | pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) |	/* for normal objects, no eLLC */ | 
|---|
| 615 | GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) |	/* for something pointing to ptes? */ | 
|---|
| 616 | GEN8_PPAT(3, GEN8_PPAT_UC) |			/* Uncached objects, mostly for scanout */ | 
|---|
| 617 | GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | | 
|---|
| 618 | GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | | 
|---|
| 619 | GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | | 
|---|
| 620 | GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); | 
|---|
| 621 |  | 
|---|
| 622 | /* for scanout with eLLC */ | 
|---|
| 623 | if (GRAPHICS_VER(i915) >= 9) | 
|---|
| 624 | pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); | 
|---|
| 625 | else | 
|---|
| 626 | pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); | 
|---|
| 627 |  | 
|---|
| 628 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); | 
|---|
| 629 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); | 
|---|
| 630 | } | 
|---|
| 631 |  | 
|---|
| 632 | static void chv_setup_private_ppat(struct intel_uncore *uncore) | 
|---|
| 633 | { | 
|---|
| 634 | u64 pat; | 
|---|
| 635 |  | 
|---|
| 636 | /* | 
|---|
| 637 | * Map WB on BDW to snooped on CHV. | 
|---|
| 638 | * | 
|---|
| 639 | * Only the snoop bit has meaning for CHV, the rest is | 
|---|
| 640 | * ignored. | 
|---|
| 641 | * | 
|---|
| 642 | * The hardware will never snoop for certain types of accesses: | 
|---|
| 643 | * - CPU GTT (GMADR->GGTT->no snoop->memory) | 
|---|
| 644 | * - PPGTT page tables | 
|---|
| 645 | * - some other special cycles | 
|---|
| 646 | * | 
|---|
| 647 | * As with BDW, we also need to consider the following for GT accesses: | 
|---|
| 648 | * "For GGTT, there is NO pat_sel[2:0] from the entry, | 
|---|
| 649 | * so RTL will always use the value corresponding to | 
|---|
| 650 | * pat_sel = 000". | 
|---|
| 651 | * Which means we must set the snoop bit in PAT entry 0 | 
|---|
| 652 | * in order to keep the global status page working. | 
|---|
| 653 | */ | 
|---|
| 654 |  | 
|---|
| 655 | pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | | 
|---|
| 656 | GEN8_PPAT(1, 0) | | 
|---|
| 657 | GEN8_PPAT(2, 0) | | 
|---|
| 658 | GEN8_PPAT(3, 0) | | 
|---|
| 659 | GEN8_PPAT(4, CHV_PPAT_SNOOP) | | 
|---|
| 660 | GEN8_PPAT(5, CHV_PPAT_SNOOP) | | 
|---|
| 661 | GEN8_PPAT(6, CHV_PPAT_SNOOP) | | 
|---|
| 662 | GEN8_PPAT(7, CHV_PPAT_SNOOP); | 
|---|
| 663 |  | 
|---|
| 664 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); | 
|---|
| 665 | intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); | 
|---|
| 666 | } | 
|---|
| 667 |  | 
|---|
| 668 | void setup_private_pat(struct intel_gt *gt) | 
|---|
| 669 | { | 
|---|
| 670 | struct intel_uncore *uncore = gt->uncore; | 
|---|
| 671 | struct drm_i915_private *i915 = gt->i915; | 
|---|
| 672 |  | 
|---|
| 673 | GEM_BUG_ON(GRAPHICS_VER(i915) < 8); | 
|---|
| 674 |  | 
|---|
| 675 | if (gt->type == GT_MEDIA) { | 
|---|
| 676 | xelpmp_setup_private_ppat(uncore: gt->uncore); | 
|---|
| 677 | return; | 
|---|
| 678 | } | 
|---|
| 679 |  | 
|---|
| 680 | if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) | 
|---|
| 681 | xelpg_setup_private_ppat(gt); | 
|---|
| 682 | else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) | 
|---|
| 683 | xehp_setup_private_ppat(gt); | 
|---|
| 684 | else if (GRAPHICS_VER(i915) >= 12) | 
|---|
| 685 | tgl_setup_private_ppat(uncore); | 
|---|
| 686 | else if (GRAPHICS_VER(i915) >= 11) | 
|---|
| 687 | icl_setup_private_ppat(uncore); | 
|---|
| 688 | else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) | 
|---|
| 689 | chv_setup_private_ppat(uncore); | 
|---|
| 690 | else | 
|---|
| 691 | bdw_setup_private_ppat(uncore); | 
|---|
| 692 | } | 
|---|
| 693 |  | 
|---|
| 694 | struct i915_vma * | 
|---|
| 695 | __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size) | 
|---|
| 696 | { | 
|---|
| 697 | struct drm_i915_gem_object *obj; | 
|---|
| 698 | struct i915_vma *vma; | 
|---|
| 699 |  | 
|---|
| 700 | obj = i915_gem_object_create_internal(i915: vm->i915, PAGE_ALIGN(size)); | 
|---|
| 701 | if (IS_ERR(ptr: obj)) | 
|---|
| 702 | return ERR_CAST(ptr: obj); | 
|---|
| 703 |  | 
|---|
| 704 | i915_gem_object_set_cache_coherency(obj, cache_level: I915_CACHE_LLC); | 
|---|
| 705 |  | 
|---|
| 706 | vma = i915_vma_instance(obj, vm, NULL); | 
|---|
| 707 | if (IS_ERR(ptr: vma)) { | 
|---|
| 708 | i915_gem_object_put(obj); | 
|---|
| 709 | return vma; | 
|---|
| 710 | } | 
|---|
| 711 |  | 
|---|
| 712 | return vma; | 
|---|
| 713 | } | 
|---|
| 714 |  | 
|---|
| 715 | struct i915_vma * | 
|---|
| 716 | __vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long size) | 
|---|
| 717 | { | 
|---|
| 718 | struct i915_vma *vma; | 
|---|
| 719 | int err; | 
|---|
| 720 |  | 
|---|
| 721 | vma = __vm_create_scratch_for_read(vm, size); | 
|---|
| 722 | if (IS_ERR(ptr: vma)) | 
|---|
| 723 | return vma; | 
|---|
| 724 |  | 
|---|
| 725 | err = i915_vma_pin(vma, size: 0, alignment: 0, | 
|---|
| 726 | flags: i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); | 
|---|
| 727 | if (err) { | 
|---|
| 728 | i915_vma_put(vma); | 
|---|
| 729 | return ERR_PTR(error: err); | 
|---|
| 730 | } | 
|---|
| 731 |  | 
|---|
| 732 | return vma; | 
|---|
| 733 | } | 
|---|
| 734 |  | 
|---|
| 735 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) | 
|---|
| 736 | #include "selftests/mock_gtt.c" | 
|---|
| 737 | #endif | 
|---|
| 738 |  | 
|---|