intel_gtt.c source code [Linux/drivers/gpu/drm/i915/gt/intel_gtt.c]

1	// SPDX-License-Identifier: MIT
2	/*
3	* Copyright © 2020 Intel Corporation
4	*/
5
6	#include <linux/slab.h> /* fault-inject.h is not standalone! */
7
8	#include <linux/fault-inject.h>
9	#include <linux/sched/mm.h>
10
11	#include <drm/drm_cache.h>
12
13	#include "gem/i915_gem_internal.h"
14	#include "gem/i915_gem_lmem.h"
15	#include "i915_reg.h"
16	#include "i915_trace.h"
17	#include "i915_utils.h"
18	#include "intel_gt.h"
19	#include "intel_gt_mcr.h"
20	#include "intel_gt_print.h"
21	#include "intel_gt_regs.h"
22	#include "intel_gtt.h"
23
24	bool i915_ggtt_require_binder(struct drm_i915_private *i915)
25	{
26	/ Wa_13010847436 & Wa_14019519902 /
27	return !i915_direct_stolen_access(i915) &&
28	MEDIA_VER_FULL(i915) == IP_VER(`13`, `0`);
29	}
30
31	static bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915)
32	{
33	return IS_BROXTON(i915) && i915_vtd_active(i915);
34	}
35
36	bool intel_vm_no_concurrent_access_wa(struct drm_i915_private *i915)
37	{
38	return IS_CHERRYVIEW(i915) \|\| intel_ggtt_update_needs_vtd_wa(i915);
39	}
40
41	struct drm_i915_gem_object alloc_pt_lmem(struct* i915_address_space vm, int* sz)
42	{
43	struct drm_i915_gem_object *obj;
44
45	/*
46	* To avoid severe over-allocation when dealing with min_page_size
47	* restrictions, we override that behaviour here by allowing an object
48	* size and page layout which can be smaller. In practice this should be
49	* totally fine, since GTT paging structures are not typically inserted
50	* into the GTT.
51	*
52	* Note that we also hit this path for the scratch page, and for this
53	* case it might need to be 64K, but that should work fine here since we
54	* used the passed in size for the page size, which should ensure it
55	* also has the same alignment.
56	*/
57	obj = __i915_gem_object_create_lmem_with_ps(i915: vm->i915, size: sz, page_size: sz,
58	flags: vm->lmem_pt_obj_flags);
59	/*
60	* Ensure all paging structures for this vm share the same dma-resv
61	* object underneath, with the idea that one object_lock() will lock
62	* them all at once.
63	*/
64	if (!IS_ERR(ptr: obj)) {
65	obj->base.resv = i915_vm_resv_get(vm);
66	obj->shares_resv_from = vm;
67
68	if (vm->fpriv)
69	i915_drm_client_add_object(client: vm->fpriv->client, obj);
70	}
71
72	return obj;
73	}
74
75	struct drm_i915_gem_object alloc_pt_dma(struct* i915_address_space vm, int* sz)
76	{
77	struct drm_i915_gem_object *obj;
78
79	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, `1`)))
80	i915_gem_shrink_all(i915: vm->i915);
81
82	obj = i915_gem_object_create_internal(i915: vm->i915, size: sz);
83	/*
84	* Ensure all paging structures for this vm share the same dma-resv
85	* object underneath, with the idea that one object_lock() will lock
86	* them all at once.
87	*/
88	if (!IS_ERR(ptr: obj)) {
89	obj->base.resv = i915_vm_resv_get(vm);
90	obj->shares_resv_from = vm;
91
92	if (vm->fpriv)
93	i915_drm_client_add_object(client: vm->fpriv->client, obj);
94	}
95
96	return obj;
97	}
98
99	int map_pt_dma(struct i915_address_space vm, struct* drm_i915_gem_object *obj)
100	{
101	enum i915_map_type type;
102	void *vaddr;
103
104	type = intel_gt_coherent_map_type(gt: vm->gt, obj, always_coherent: true);
105	/*
106	* FIXME: It is suspected that some Address Translation Service (ATS)
107	* issue on IOMMU is causing CAT errors to occur on some MTL workloads.
108	* Applying a write barrier to the ppgtt set entry functions appeared
109	* to have no effect, so we must temporarily use I915_MAP_WC here on
110	* MTL until a proper ATS solution is found.
111	*/
112	if (IS_METEORLAKE(vm->i915))
113	type = I915_MAP_WC;
114
115	vaddr = i915_gem_object_pin_map_unlocked(obj, type);
116	if (IS_ERR(ptr: vaddr))
117	return PTR_ERR(ptr: vaddr);
118
119	i915_gem_object_make_unshrinkable(obj);
120	return `0`;
121	}
122
123	int map_pt_dma_locked(struct i915_address_space vm, struct* drm_i915_gem_object *obj)
124	{
125	enum i915_map_type type;
126	void *vaddr;
127
128	type = intel_gt_coherent_map_type(gt: vm->gt, obj, always_coherent: true);
129	/*
130	* FIXME: It is suspected that some Address Translation Service (ATS)
131	* issue on IOMMU is causing CAT errors to occur on some MTL workloads.
132	* Applying a write barrier to the ppgtt set entry functions appeared
133	* to have no effect, so we must temporarily use I915_MAP_WC here on
134	* MTL until a proper ATS solution is found.
135	*/
136	if (IS_METEORLAKE(vm->i915))
137	type = I915_MAP_WC;
138
139	vaddr = i915_gem_object_pin_map(obj, type);
140	if (IS_ERR(ptr: vaddr))
141	return PTR_ERR(ptr: vaddr);
142
143	i915_gem_object_make_unshrinkable(obj);
144	return `0`;
145	}
146
147	static void clear_vm_list(struct list_head *list)
148	{
149	struct i915_vma vma, vn;
150
151	list_for_each_entry_safe(vma, vn, list, vm_link) {
152	struct drm_i915_gem_object *obj = vma->obj;
153
154	if (!i915_gem_object_get_rcu(obj)) {
155	/*
156	* Object is dying, but has not yet cleared its
157	* vma list.
158	* Unbind the dying vma to ensure our list
159	* is completely drained. We leave the destruction to
160	* the object destructor to avoid the vma
161	* disappearing under it.
162	*/
163	atomic_and(i: ~I915_VMA_PIN_MASK, v: &vma->flags);
164	WARN_ON(__i915_vma_unbind(vma));
165
166	/ Remove from the unbound list /
167	list_del_init(entry: &vma->vm_link);
168
169	/*
170	* Delay the vm and vm mutex freeing until the
171	* object is done with destruction.
172	*/
173	i915_vm_resv_get(vm: vma->vm);
174	vma->vm_ddestroy = true;
175	} else {
176	i915_vma_destroy_locked(vma);
177	i915_gem_object_put(obj);
178	}
179	}
180	}
181
182	static void __i915_vm_close(struct i915_address_space *vm)
183	{
184	mutex_lock(lock: &vm->mutex);
185
186	clear_vm_list(list: &vm->bound_list);
187	clear_vm_list(list: &vm->unbound_list);
188
189	/ Check for must-fix unanticipated side-effects /
190	GEM_BUG_ON(!list_empty(&vm->bound_list));
191	GEM_BUG_ON(!list_empty(&vm->unbound_list));
192
193	mutex_unlock(lock: &vm->mutex);
194	}
195
196	/ lock the vm into the current ww, if we lock one, we lock all /
197	int i915_vm_lock_objects(struct i915_address_space *vm,
198	struct i915_gem_ww_ctx *ww)
199	{
200	if (vm->scratch[`0`]->base.resv == &vm->_resv) {
201	return i915_gem_object_lock(obj: vm->scratch[`0`], ww);
202	} else {
203	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
204
205	/ We borrowed the scratch page from ggtt, take the top level object /
206	return i915_gem_object_lock(obj: ppgtt->pd->pt.base, ww);
207	}
208	}
209
210	void i915_address_space_fini(struct i915_address_space *vm)
211	{
212	drm_mm_takedown(mm: &vm->mm);
213	}
214
215	/**
216	* i915_vm_resv_release - Final struct i915_address_space destructor
217	* @kref: Pointer to the &i915_address_space.resv_ref member.
218	*
219	* This function is called when the last lock sharer no longer shares the
220	* &i915_address_space._resv lock, and also if we raced when
221	* destroying a vma by the vma destruction
222	*/
223	void i915_vm_resv_release(struct kref *kref)
224	{
225	struct i915_address_space *vm =
226	container_of(kref, typeof(*vm), resv_ref);
227
228	dma_resv_fini(obj: &vm->_resv);
229	mutex_destroy(lock: &vm->mutex);
230
231	kfree(objp: vm);
232	}
233
234	static void __i915_vm_release(struct work_struct *work)
235	{
236	struct i915_address_space *vm =
237	container_of(work, struct i915_address_space, release_work);
238
239	__i915_vm_close(vm);
240
241	/ Synchronize async unbinds. /
242	i915_vma_resource_bind_dep_sync_all(vm);
243
244	vm->cleanup(vm);
245	i915_address_space_fini(vm);
246
247	i915_vm_resv_put(vm);
248	}
249
250	void i915_vm_release(struct kref *kref)
251	{
252	struct i915_address_space *vm =
253	container_of(kref, struct i915_address_space, ref);
254
255	GEM_BUG_ON(i915_is_ggtt(vm));
256	trace_i915_ppgtt_release(vm);
257
258	queue_work(wq: vm->i915->wq, work: &vm->release_work);
259	}
260
261	void i915_address_space_init(struct i915_address_space vm, int* subclass)
262	{
263	kref_init(kref: &vm->ref);
264
265	/*
266	* Special case for GGTT that has already done an early
267	* kref_init here.
268	*/
269	if (!kref_read(kref: &vm->resv_ref))
270	kref_init(kref: &vm->resv_ref);
271
272	vm->pending_unbind = RB_ROOT_CACHED;
273	INIT_WORK(&vm->release_work, __i915_vm_release);
274
275	/*
276	* The vm->mutex must be reclaim safe (for use in the shrinker).
277	* Do a dummy acquire now under fs_reclaim so that any allocation
278	* attempt holding the lock is immediately reported by lockdep.
279	*/
280	mutex_init(&vm->mutex);
281	lockdep_set_subclass(&vm->mutex, subclass);
282
283	if (!intel_vm_no_concurrent_access_wa(i915: vm->i915)) {
284	i915_gem_shrinker_taints_mutex(i915: vm->i915, mutex: &vm->mutex);
285	} else {
286	/*
287	* CHV + BXT VTD workaround use stop_machine(),
288	* which is allowed to allocate memory. This means &vm->mutex
289	* is the outer lock, and in theory we can allocate memory inside
290	* it through stop_machine().
291	*
292	* Add the annotation for this, we use trylock in shrinker.
293	*/
294	mutex_acquire(&vm->mutex.dep_map, `0`, `0`, _THIS_IP_);
295	might_alloc(GFP_KERNEL);
296	mutex_release(&vm->mutex.dep_map, _THIS_IP_);
297	}
298	dma_resv_init(obj: &vm->_resv);
299
300	GEM_BUG_ON(!vm->total);
301	drm_mm_init(mm: &vm->mm, start: `0`, size: vm->total);
302
303	memset64(s: vm->min_alignment, I915_GTT_MIN_ALIGNMENT,
304	ARRAY_SIZE(vm->min_alignment));
305
306	if (HAS_64K_PAGES(vm->i915)) {
307	vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K;
308	vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K;
309	}
310
311	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
312
313	INIT_LIST_HEAD(list: &vm->bound_list);
314	INIT_LIST_HEAD(list: &vm->unbound_list);
315	}
316
317	void __px_vaddr(struct* drm_i915_gem_object *p)
318	{
319	enum i915_map_type type;
320
321	GEM_BUG_ON(!i915_gem_object_has_pages(p));
322	return page_unpack_bits(p->mm.mapping, &type);
323	}
324
325	dma_addr_t __px_dma(struct drm_i915_gem_object *p)
326	{
327	GEM_BUG_ON(!i915_gem_object_has_pages(p));
328	return sg_dma_address(p->mm.pages->sgl);
329	}
330
331	struct page __px_page(struct* drm_i915_gem_object *p)
332	{
333	GEM_BUG_ON(!i915_gem_object_has_pages(p));
334	return sg_page(sg: p->mm.pages->sgl);
335	}
336
337	void
338	fill_page_dma(struct drm_i915_gem_object p, const* u64 val, unsigned int count)
339	{
340	void *vaddr = __px_vaddr(p);
341
342	memset64(s: vaddr, v: val, n: count);
343	drm_clflush_virt_range(addr: vaddr, PAGE_SIZE);
344	}
345
346	static void poison_scratch_page(struct drm_i915_gem_object *scratch)
347	{
348	void *vaddr = __px_vaddr(p: scratch);
349	u8 val;
350
351	val = `0`;
352	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
353	val = POISON_FREE;
354
355	memset(s: vaddr, c: val, n: scratch->base.size);
356	drm_clflush_virt_range(addr: vaddr, length: scratch->base.size);
357	}
358
359	int setup_scratch_page(struct i915_address_space *vm)
360	{
361	unsigned long size;
362
363	/*
364	* In order to utilize 64K pages for an object with a size < 2M, we will
365	* need to support a 64K scratch page, given that every 16th entry for a
366	* page-table operating in 64K mode must point to a properly aligned 64K
367	* region, including any PTEs which happen to point to scratch.
368	*
369	* This is only relevant for the 48b PPGTT where we support
370	* huge-gtt-pages, see also i915_vma_insert(). However, as we share the
371	* scratch (read-only) between all vm, we create one 64k scratch page
372	* for all.
373	*/
374	size = I915_GTT_PAGE_SIZE_4K;
375	if (i915_vm_is_4lvl(vm) &&
376	HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K) &&
377	!HAS_64K_PAGES(vm->i915))
378	size = I915_GTT_PAGE_SIZE_64K;
379
380	do {
381	struct drm_i915_gem_object *obj;
382
383	obj = vm->alloc_scratch_dma(vm, size);
384	if (IS_ERR(ptr: obj))
385	goto skip;
386
387	if (map_pt_dma(vm, obj))
388	goto skip_obj;
389
390	/ We need a single contiguous page for our scratch /
391	if (obj->mm.page_sizes.sg < size)
392	goto skip_obj;
393
394	/ And it needs to be correspondingly aligned /
395	if (__px_dma(p: obj) & (size - `1`))
396	goto skip_obj;
397
398	/*
399	* Use a non-zero scratch page for debugging.
400	*
401	* We want a value that should be reasonably obvious
402	* to spot in the error state, while also causing a GPU hang
403	* if executed. We prefer using a clear page in production, so
404	* should it ever be accidentally used, the effect should be
405	* fairly benign.
406	*/
407	poison_scratch_page(scratch: obj);
408
409	vm->scratch[`0`] = obj;
410	vm->scratch_order = get_order(size);
411	return `0`;
412
413	skip_obj:
414	i915_gem_object_put(obj);
415	skip:
416	if (size == I915_GTT_PAGE_SIZE_4K)
417	return -ENOMEM;
418
419	size = I915_GTT_PAGE_SIZE_4K;
420	} while (`1`);
421	}
422
423	void free_scratch(struct i915_address_space *vm)
424	{
425	int i;
426
427	if (!vm->scratch[`0`])
428	return;
429
430	for (i = `0`; i <= vm->top; i++)
431	i915_gem_object_put(obj: vm->scratch[i]);
432	}
433
434	void gtt_write_workarounds(struct intel_gt *gt)
435	{
436	struct drm_i915_private *i915 = gt->i915;
437	struct intel_uncore *uncore = gt->uncore;
438
439	/*
440	* This function is for gtt related workarounds. This function is
441	* called on driver load and after a GPU reset, so you can place
442	* workarounds here even if they get overwritten by GPU reset.
443	*/
444	/ WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl /
445	if (IS_BROADWELL(i915))
446	intel_uncore_write(uncore,
447	GEN8_L3_LRA_1_GPGPU,
448	GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
449	else if (IS_CHERRYVIEW(i915))
450	intel_uncore_write(uncore,
451	GEN8_L3_LRA_1_GPGPU,
452	GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
453	else if (IS_GEN9_LP(i915))
454	intel_uncore_write(uncore,
455	GEN8_L3_LRA_1_GPGPU,
456	GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
457	else if (GRAPHICS_VER(i915) >= `9` && GRAPHICS_VER(i915) <= `11`)
458	intel_uncore_write(uncore,
459	GEN8_L3_LRA_1_GPGPU,
460	GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
461
462	/*
463	* To support 64K PTEs we need to first enable the use of the
464	* Intermediate-Page-Size(IPS) bit of the PDE field via some magical
465	* mmio, otherwise the page-walker will simply ignore the IPS bit. This
466	* shouldn't be needed after GEN10.
467	*
468	* 64K pages were first introduced from BDW+, although technically they
469	* only work from gen9+. For pre-BDW we instead have the option for
470	* 32K pages, but we don't currently have any support for it in our
471	* driver.
472	*/
473	if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
474	GRAPHICS_VER(i915) <= `10`)
475	intel_uncore_rmw(uncore,
476	GEN8_GAMW_ECO_DEV_RW_IA,
477	clear: `0`,
478	GAMW_ECO_ENABLE_64K_IPS_FIELD);
479
480	if (IS_GRAPHICS_VER(i915, `8`, `11`)) {
481	bool can_use_gtt_cache = true;
482
483	/*
484	* According to the BSpec if we use 2M/1G pages then we also
485	* need to disable the GTT cache. At least on BDW we can see
486	* visual corruption when using 2M pages, and not disabling the
487	* GTT cache.
488	*/
489	if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
490	can_use_gtt_cache = false;
491
492	/ WaGttCachingOffByDefault /
493	intel_uncore_write(uncore,
494	HSW_GTT_CACHE_EN,
495	val: can_use_gtt_cache ? GTT_CACHE_EN_ALL : `0`);
496	gt_WARN_ON_ONCE(gt, can_use_gtt_cache &&
497	intel_uncore_read(uncore,
498	HSW_GTT_CACHE_EN) == `0`);
499	}
500	}
501
502	static void xelpmp_setup_private_ppat(struct intel_uncore *uncore)
503	{
504	intel_uncore_write(uncore, XELPMP_PAT_INDEX(`0`),
505	MTL_PPAT_L4_0_WB);
506	intel_uncore_write(uncore, XELPMP_PAT_INDEX(`1`),
507	MTL_PPAT_L4_1_WT);
508	intel_uncore_write(uncore, XELPMP_PAT_INDEX(`2`),
509	MTL_PPAT_L4_3_UC);
510	intel_uncore_write(uncore, XELPMP_PAT_INDEX(`3`),
511	MTL_PPAT_L4_0_WB \| MTL_2_COH_1W);
512	intel_uncore_write(uncore, XELPMP_PAT_INDEX(`4`),
513	MTL_PPAT_L4_0_WB \| MTL_3_COH_2W);
514
515	/*
516	* Remaining PAT entries are left at the hardware-default
517	* fully-cached setting
518	*/
519	}
520
521	static void xelpg_setup_private_ppat(struct intel_gt *gt)
522	{
523	intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(`0`),
524	MTL_PPAT_L4_0_WB);
525	intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(`1`),
526	MTL_PPAT_L4_1_WT);
527	intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(`2`),
528	MTL_PPAT_L4_3_UC);
529	intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(`3`),
530	MTL_PPAT_L4_0_WB \| MTL_2_COH_1W);
531	intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(`4`),
532	MTL_PPAT_L4_0_WB \| MTL_3_COH_2W);
533
534	/*
535	* Remaining PAT entries are left at the hardware-default
536	* fully-cached setting
537	*/
538	}
539
540	static void tgl_setup_private_ppat(struct intel_uncore *uncore)
541	{
542	/ TGL doesn't support LLC or AGE settings /
543	intel_uncore_write(uncore, GEN12_PAT_INDEX(`0`), GEN8_PPAT_WB);
544	intel_uncore_write(uncore, GEN12_PAT_INDEX(`1`), GEN8_PPAT_WC);
545	intel_uncore_write(uncore, GEN12_PAT_INDEX(`2`), GEN8_PPAT_WT);
546	intel_uncore_write(uncore, GEN12_PAT_INDEX(`3`), GEN8_PPAT_UC);
547	intel_uncore_write(uncore, GEN12_PAT_INDEX(`4`), GEN8_PPAT_WB);
548	intel_uncore_write(uncore, GEN12_PAT_INDEX(`5`), GEN8_PPAT_WB);
549	intel_uncore_write(uncore, GEN12_PAT_INDEX(`6`), GEN8_PPAT_WB);
550	intel_uncore_write(uncore, GEN12_PAT_INDEX(`7`), GEN8_PPAT_WB);
551	}
552
553	static void xehp_setup_private_ppat(struct intel_gt *gt)
554	{
555	enum forcewake_domains fw;
556	unsigned long flags;
557
558	fw = intel_uncore_forcewake_for_reg(uncore: gt->uncore, _MMIO(XEHP_PAT_INDEX(`0`).reg),
559	FW_REG_WRITE);
560	intel_uncore_forcewake_get(uncore: gt->uncore, domains: fw);
561
562	intel_gt_mcr_lock(gt, flags: &flags);
563	intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(`0`), GEN8_PPAT_WB);
564	intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(`1`), GEN8_PPAT_WC);
565	intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(`2`), GEN8_PPAT_WT);
566	intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(`3`), GEN8_PPAT_UC);
567	intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(`4`), GEN8_PPAT_WB);
568	intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(`5`), GEN8_PPAT_WB);
569	intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(`6`), GEN8_PPAT_WB);
570	intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(`7`), GEN8_PPAT_WB);
571	intel_gt_mcr_unlock(gt, flags);
572
573	intel_uncore_forcewake_put(uncore: gt->uncore, domains: fw);
574	}
575
576	static void icl_setup_private_ppat(struct intel_uncore *uncore)
577	{
578	intel_uncore_write(uncore,
579	GEN10_PAT_INDEX(`0`),
580	GEN8_PPAT_WB \| GEN8_PPAT_LLC);
581	intel_uncore_write(uncore,
582	GEN10_PAT_INDEX(`1`),
583	GEN8_PPAT_WC \| GEN8_PPAT_LLCELLC);
584	intel_uncore_write(uncore,
585	GEN10_PAT_INDEX(`2`),
586	GEN8_PPAT_WB \| GEN8_PPAT_ELLC_OVERRIDE);
587	intel_uncore_write(uncore,
588	GEN10_PAT_INDEX(`3`),
589	GEN8_PPAT_UC);
590	intel_uncore_write(uncore,
591	GEN10_PAT_INDEX(`4`),
592	GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(`0`));
593	intel_uncore_write(uncore,
594	GEN10_PAT_INDEX(`5`),
595	GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(`1`));
596	intel_uncore_write(uncore,
597	GEN10_PAT_INDEX(`6`),
598	GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(`2`));
599	intel_uncore_write(uncore,
600	GEN10_PAT_INDEX(`7`),
601	GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(`3`));
602	}
603
604	/*
605	* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
606	* bits. When using advanced contexts each context stores its own PAT, but
607	* writing this data shouldn't be harmful even in those cases.
608	*/
609	static void bdw_setup_private_ppat(struct intel_uncore *uncore)
610	{
611	struct drm_i915_private *i915 = uncore->i915;
612	u64 pat;
613
614	pat = GEN8_PPAT(`0`, GEN8_PPAT_WB \| GEN8_PPAT_LLC) \| / for normal objects, no eLLC /
615	GEN8_PPAT(`1`, GEN8_PPAT_WC \| GEN8_PPAT_LLCELLC) \| / for something pointing to ptes? /
616	GEN8_PPAT(`3`, GEN8_PPAT_UC) \| / Uncached objects, mostly for scanout /
617	GEN8_PPAT(`4`, GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(`0`)) \|
618	GEN8_PPAT(`5`, GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(`1`)) \|
619	GEN8_PPAT(`6`, GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(`2`)) \|
620	GEN8_PPAT(`7`, GEN8_PPAT_WB \| GEN8_PPAT_LLCELLC \| GEN8_PPAT_AGE(`3`));
621
622	/ for scanout with eLLC /
623	if (GRAPHICS_VER(i915) >= `9`)
624	pat \|= GEN8_PPAT(`2`, GEN8_PPAT_WB \| GEN8_PPAT_ELLC_OVERRIDE);
625	else
626	pat \|= GEN8_PPAT(`2`, GEN8_PPAT_WT \| GEN8_PPAT_LLCELLC);
627
628	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
629	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
630	}
631
632	static void chv_setup_private_ppat(struct intel_uncore *uncore)
633	{
634	u64 pat;
635
636	/*
637	* Map WB on BDW to snooped on CHV.
638	*
639	* Only the snoop bit has meaning for CHV, the rest is
640	* ignored.
641	*
642	* The hardware will never snoop for certain types of accesses:
643	* - CPU GTT (GMADR->GGTT->no snoop->memory)
644	* - PPGTT page tables
645	* - some other special cycles
646	*
647	* As with BDW, we also need to consider the following for GT accesses:
648	* "For GGTT, there is NO pat_sel[2:0] from the entry,
649	* so RTL will always use the value corresponding to
650	* pat_sel = 000".
651	* Which means we must set the snoop bit in PAT entry 0
652	* in order to keep the global status page working.
653	*/
654
655	pat = GEN8_PPAT(`0`, CHV_PPAT_SNOOP) \|
656	GEN8_PPAT(`1`, `0`) \|
657	GEN8_PPAT(`2`, `0`) \|
658	GEN8_PPAT(`3`, `0`) \|
659	GEN8_PPAT(`4`, CHV_PPAT_SNOOP) \|
660	GEN8_PPAT(`5`, CHV_PPAT_SNOOP) \|
661	GEN8_PPAT(`6`, CHV_PPAT_SNOOP) \|
662	GEN8_PPAT(`7`, CHV_PPAT_SNOOP);
663
664	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
665	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
666	}
667
668	void setup_private_pat(struct intel_gt *gt)
669	{
670	struct intel_uncore *uncore = gt->uncore;
671	struct drm_i915_private *i915 = gt->i915;
672
673	GEM_BUG_ON(GRAPHICS_VER(i915) < `8`);
674
675	if (gt->type == GT_MEDIA) {
676	xelpmp_setup_private_ppat(uncore: gt->uncore);
677	return;
678	}
679
680	if (GRAPHICS_VER_FULL(i915) >= IP_VER(`12`, `70`))
681	xelpg_setup_private_ppat(gt);
682	else if (GRAPHICS_VER_FULL(i915) >= IP_VER(`12`, `55`))
683	xehp_setup_private_ppat(gt);
684	else if (GRAPHICS_VER(i915) >= `12`)
685	tgl_setup_private_ppat(uncore);
686	else if (GRAPHICS_VER(i915) >= `11`)
687	icl_setup_private_ppat(uncore);
688	else if (IS_CHERRYVIEW(i915) \|\| IS_GEN9_LP(i915))
689	chv_setup_private_ppat(uncore);
690	else
691	bdw_setup_private_ppat(uncore);
692	}
693
694	struct i915_vma *
695	__vm_create_scratch_for_read(struct i915_address_space vm, unsigned* long size)
696	{
697	struct drm_i915_gem_object *obj;
698	struct i915_vma *vma;
699
700	obj = i915_gem_object_create_internal(i915: vm->i915, PAGE_ALIGN(size));
701	if (IS_ERR(ptr: obj))
702	return ERR_CAST(ptr: obj);
703
704	i915_gem_object_set_cache_coherency(obj, cache_level: I915_CACHE_LLC);
705
706	vma = i915_vma_instance(obj, vm, NULL);
707	if (IS_ERR(ptr: vma)) {
708	i915_gem_object_put(obj);
709	return vma;
710	}
711
712	return vma;
713	}
714
715	struct i915_vma *
716	__vm_create_scratch_for_read_pinned(struct i915_address_space vm, unsigned* long size)
717	{
718	struct i915_vma *vma;
719	int err;
720
721	vma = __vm_create_scratch_for_read(vm, size);
722	if (IS_ERR(ptr: vma))
723	return vma;
724
725	err = i915_vma_pin(vma, size: `0`, alignment: `0`,
726	flags: i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
727	if (err) {
728	i915_vma_put(vma);
729	return ERR_PTR(error: err);
730	}
731
732	return vma;
733	}
734
735	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
736	#include "selftests/mock_gtt.c"
737	#endif
738

Browse the source code of Linux/drivers/gpu/drm/i915/gt/intel_gtt.c