intel_ring_submission.c source code [Linux/drivers/gpu/drm/i915/gt/intel_ring_submission.c]

1	// SPDX-License-Identifier: MIT
2	/*
3	* Copyright © 2008-2021 Intel Corporation
4	*/
5
6	#include <drm/drm_cache.h>
7
8	#include "gem/i915_gem_internal.h"
9
10	#include "gen2_engine_cs.h"
11	#include "gen6_engine_cs.h"
12	#include "gen6_ppgtt.h"
13	#include "gen7_renderclear.h"
14	#include "i915_drv.h"
15	#include "i915_irq.h"
16	#include "i915_mitigations.h"
17	#include "i915_reg.h"
18	#include "i915_wait_util.h"
19	#include "intel_breadcrumbs.h"
20	#include "intel_context.h"
21	#include "intel_engine_heartbeat.h"
22	#include "intel_engine_pm.h"
23	#include "intel_engine_regs.h"
24	#include "intel_gt.h"
25	#include "intel_gt_irq.h"
26	#include "intel_gt_print.h"
27	#include "intel_gt_regs.h"
28	#include "intel_reset.h"
29	#include "intel_ring.h"
30	#include "shmem_utils.h"
31
32	/ Rough estimate of the typical request size, performing a flush,*
33	* set-context and then emitting the batch.
34	*/
35	#define LEGACY_REQUEST_SIZE 200
36
37	static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
38	{
39	/*
40	* Keep the render interrupt unmasked as this papers over
41	* lost interrupts following a reset.
42	*/
43	if (engine->class == RENDER_CLASS) {
44	if (GRAPHICS_VER(engine->i915) >= `6`)
45	mask &= ~BIT(`0`);
46	else
47	mask &= ~I915_USER_INTERRUPT;
48	}
49
50	intel_engine_set_hwsp_writemask(engine, mask);
51	}
52
53	static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
54	{
55	u32 addr;
56
57	addr = lower_32_bits(phys);
58	if (GRAPHICS_VER(engine->i915) >= `4`)
59	addr \|= (phys >> `28`) & `0xf0`;
60
61	intel_uncore_write(uncore: engine->uncore, HWS_PGA, val: addr);
62	}
63
64	static struct page status_page(struct* intel_engine_cs *engine)
65	{
66	struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
67
68	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
69	return sg_page(sg: obj->mm.pages->sgl);
70	}
71
72	static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
73	{
74	set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
75	set_hwstam(engine, mask: ~`0u`);
76	}
77
78	static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
79	{
80	i915_reg_t hwsp;
81
82	/*
83	* The ring status page addresses are no longer next to the rest of
84	* the ring registers as of gen7.
85	*/
86	if (GRAPHICS_VER(engine->i915) == `7`) {
87	switch (engine->id) {
88	/*
89	* No more rings exist on Gen7. Default case is only to shut up
90	* gcc switch check warning.
91	*/
92	default:
93	GEM_BUG_ON(engine->id);
94	fallthrough;
95	case RCS0:
96	hwsp = RENDER_HWS_PGA_GEN7;
97	break;
98	case BCS0:
99	hwsp = BLT_HWS_PGA_GEN7;
100	break;
101	case VCS0:
102	hwsp = BSD_HWS_PGA_GEN7;
103	break;
104	case VECS0:
105	hwsp = VEBOX_HWS_PGA_GEN7;
106	break;
107	}
108	} else if (GRAPHICS_VER(engine->i915) == `6`) {
109	hwsp = RING_HWS_PGA_GEN6(engine->mmio_base);
110	} else {
111	hwsp = RING_HWS_PGA(engine->mmio_base);
112	}
113
114	intel_uncore_write_fw(engine->uncore, hwsp, offset);
115	intel_uncore_posting_read_fw(engine->uncore, hwsp);
116	}
117
118	static void flush_cs_tlb(struct intel_engine_cs *engine)
119	{
120	if (!IS_GRAPHICS_VER(engine->i915, `6`, `7`))
121	return;
122
123	/ ring should be idle before issuing a sync flush/
124	if ((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == `0`)
125	drm_warn(&engine->i915->drm, "%s not idle before sync flush!\n",
126	engine->name);
127
128	ENGINE_WRITE_FW(engine, RING_INSTPM,
129	_MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE \|
130	INSTPM_SYNC_FLUSH));
131	if (__intel_wait_for_register_fw(uncore: engine->uncore,
132	RING_INSTPM(engine->mmio_base),
133	INSTPM_SYNC_FLUSH, value: `0`,
134	fast_timeout_us: `2000`, slow_timeout_ms: `0`, NULL))
135	ENGINE_TRACE(engine,
136	"wait for SyncFlush to complete for TLB invalidation timed out\n");
137	}
138
139	static void ring_setup_status_page(struct intel_engine_cs *engine)
140	{
141	set_hwsp(engine, offset: i915_ggtt_offset(vma: engine->status_page.vma));
142	set_hwstam(engine, mask: ~`0u`);
143
144	flush_cs_tlb(engine);
145	}
146
147	static struct i915_address_space vm_alias(struct* i915_address_space *vm)
148	{
149	if (i915_is_ggtt(vm))
150	vm = &i915_vm_to_ggtt(vm)->alias->vm;
151
152	return vm;
153	}
154
155	static u32 pp_dir(struct i915_address_space *vm)
156	{
157	return to_gen6_ppgtt(base: i915_vm_to_ppgtt(vm))->pp_dir;
158	}
159
160	static void set_pp_dir(struct intel_engine_cs *engine)
161	{
162	struct i915_address_space *vm = vm_alias(vm: engine->gt->vm);
163
164	if (!vm)
165	return;
166
167	ENGINE_WRITE_FW(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G);
168	ENGINE_WRITE_FW(engine, RING_PP_DIR_BASE, pp_dir(vm));
169
170	if (GRAPHICS_VER(engine->i915) >= `7`) {
171	ENGINE_WRITE_FW(engine,
172	RING_MODE_GEN7,
173	_MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
174	}
175	}
176
177	static bool stop_ring(struct intel_engine_cs *engine)
178	{
179	/ Empty the ring by skipping to the end /
180	ENGINE_WRITE_FW(engine, RING_HEAD, ENGINE_READ_FW(engine, RING_TAIL));
181	ENGINE_POSTING_READ(engine, RING_HEAD);
182
183	/ The ring must be empty before it is disabled /
184	ENGINE_WRITE_FW(engine, RING_CTL, `0`);
185	ENGINE_POSTING_READ(engine, RING_CTL);
186
187	/ Then reset the disabled ring /
188	ENGINE_WRITE_FW(engine, RING_HEAD, `0`);
189	ENGINE_WRITE_FW(engine, RING_TAIL, `0`);
190
191	return (ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) == `0`;
192	}
193
194	static int xcs_resume(struct intel_engine_cs *engine)
195	{
196	struct intel_ring *ring = engine->legacy.ring;
197	ktime_t kt;
198
199	ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
200	ring->head, ring->tail);
201
202	/*
203	* Double check the ring is empty & disabled before we resume. Called
204	* from atomic context during PCI probe, so _hardirq().
205	*/
206	intel_synchronize_hardirq(i915: engine->i915);
207	if (!stop_ring(engine))
208	goto err;
209
210	if (HWS_NEEDS_PHYSICAL(engine->i915))
211	ring_setup_phys_status_page(engine);
212	else
213	ring_setup_status_page(engine);
214
215	intel_breadcrumbs_reset(b: engine->breadcrumbs);
216
217	/ Enforce ordering by reading HEAD register back /
218	ENGINE_POSTING_READ(engine, RING_HEAD);
219
220	/*
221	* Initialize the ring. This must happen _after_ we've cleared the ring
222	* registers with the above sequence (the readback of the HEAD registers
223	* also enforces ordering), otherwise the hw might lose the new ring
224	* register values.
225	*/
226	ENGINE_WRITE_FW(engine, RING_START, i915_ggtt_offset(ring->vma));
227
228	/ Check that the ring offsets point within the ring! /
229	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
230	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
231	intel_ring_update_space(ring);
232
233	set_pp_dir(engine);
234
235	/*
236	* First wake the ring up to an empty/idle ring.
237	* Use 50ms of delay to let the engine write successfully
238	* for all platforms. Experimented with different values and
239	* determined that 50ms works best based on testing.
240	*/
241	for ((kt) = ktime_get() + (`50` * NSEC_PER_MSEC);
242	ktime_before(cmp1: ktime_get(), cmp2: (kt)); cpu_relax()) {
243	/*
244	* In case of resets fails because engine resumes from
245	* incorrect RING_HEAD and then GPU may be then fed
246	* to invalid instructions, which may lead to unrecoverable
247	* hang. So at first write doesn't succeed then try again.
248	*/
249	ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
250	if (ENGINE_READ_FW(engine, RING_HEAD) == ring->head)
251	break;
252	}
253
254	ENGINE_WRITE_FW(engine, RING_TAIL, ring->head);
255	if (ENGINE_READ_FW(engine, RING_HEAD) != ENGINE_READ_FW(engine, RING_TAIL)) {
256	ENGINE_TRACE(engine, "failed to reset empty ring: [%x, %x]: %x\n",
257	ENGINE_READ_FW(engine, RING_HEAD),
258	ENGINE_READ_FW(engine, RING_TAIL),
259	ring->head);
260	goto err;
261	}
262
263	ENGINE_WRITE_FW(engine, RING_CTL,
264	RING_CTL_SIZE(ring->size) \| RING_VALID);
265
266	/ If the head is still not zero, the ring is dead /
267	if (__intel_wait_for_register_fw(uncore: engine->uncore,
268	RING_CTL(engine->mmio_base),
269	RING_VALID, RING_VALID,
270	fast_timeout_us: `5000`, slow_timeout_ms: `0`, NULL)) {
271	ENGINE_TRACE(engine, "failed to restart\n");
272	goto err;
273	}
274
275	if (GRAPHICS_VER(engine->i915) > `2`) {
276	ENGINE_WRITE_FW(engine,
277	RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
278	ENGINE_POSTING_READ(engine, RING_MI_MODE);
279	}
280
281	/ Now awake, let it get started /
282	if (ring->tail != ring->head) {
283	ENGINE_WRITE_FW(engine, RING_TAIL, ring->tail);
284	ENGINE_POSTING_READ(engine, RING_TAIL);
285	}
286
287	/ Papering over lost _interrupts_ immediately following the restart /
288	intel_engine_signal_breadcrumbs(engine);
289	return `0`;
290
291	err:
292	gt_err(engine->gt, "%s initialization failed\n", engine->name);
293	ENGINE_TRACE(engine,
294	"ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
295	ENGINE_READ(engine, RING_CTL),
296	ENGINE_READ(engine, RING_CTL) & RING_VALID,
297	ENGINE_READ(engine, RING_HEAD), ring->head,
298	ENGINE_READ(engine, RING_TAIL), ring->tail,
299	ENGINE_READ(engine, RING_START),
300	i915_ggtt_offset(ring->vma));
301	GEM_TRACE_DUMP();
302	return -EIO;
303	}
304
305	static void sanitize_hwsp(struct intel_engine_cs *engine)
306	{
307	struct intel_timeline *tl;
308
309	list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
310	intel_timeline_reset_seqno(tl);
311	}
312
313	static void xcs_sanitize(struct intel_engine_cs *engine)
314	{
315	/*
316	* Poison residual state on resume, in case the suspend didn't!
317	*
318	* We have to assume that across suspend/resume (or other loss
319	* of control) that the contents of our pinned buffers has been
320	* lost, replaced by garbage. Since this doesn't always happen,
321	* let's poison such state so that we more quickly spot when
322	* we falsely assume it has been preserved.
323	*/
324	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
325	memset(s: engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
326
327	/*
328	* The kernel_context HWSP is stored in the status_page. As above,
329	* that may be lost on resume/initialisation, and so we need to
330	* reset the value in the HWSP.
331	*/
332	sanitize_hwsp(engine);
333
334	/ And scrub the dirty cachelines for the HWSP /
335	drm_clflush_virt_range(addr: engine->status_page.addr, PAGE_SIZE);
336
337	intel_engine_reset_pinned_contexts(engine);
338	}
339
340	static void reset_prepare(struct intel_engine_cs *engine)
341	{
342	/*
343	* We stop engines, otherwise we might get failed reset and a
344	* dead gpu (on elk). Also as modern gpu as kbl can suffer
345	* from system hang if batchbuffer is progressing when
346	* the reset is issued, regardless of READY_TO_RESET ack.
347	* Thus assume it is best to stop engines on all gens
348	* where we have a gpu reset.
349	*
350	* WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
351	*
352	* WaMediaResetMainRingCleanup:ctg,elk (presumably)
353	* WaClearRingBufHeadRegAtInit:ctg,elk
354	*
355	* FIXME: Wa for more modern gens needs to be validated
356	*/
357	ENGINE_TRACE(engine, "\n");
358	intel_engine_stop_cs(engine);
359
360	if (!stop_ring(engine)) {
361	/ G45 ring initialization often fails to reset head to zero /
362	ENGINE_TRACE(engine,
363	"HEAD not reset to zero, "
364	"{ CTL:%08x, HEAD:%08x, TAIL:%08x, START:%08x }\n",
365	ENGINE_READ_FW(engine, RING_CTL),
366	ENGINE_READ_FW(engine, RING_HEAD),
367	ENGINE_READ_FW(engine, RING_TAIL),
368	ENGINE_READ_FW(engine, RING_START));
369	/*
370	* Sometimes engine head failed to set to zero even after writing into it.
371	* Use wait_for_atomic() with 20ms delay to let engine resumes from
372	* correct RING_HEAD. Experimented different values and determined
373	* that 20ms works best based on testing.
374	*/
375	if (wait_for_atomic((!stop_ring(engine) == `0`), `20`)) {
376	drm_err(&engine->i915->drm,
377	"failed to set %s head to zero "
378	"ctl %08x head %08x tail %08x start %08x\n",
379	engine->name,
380	ENGINE_READ_FW(engine, RING_CTL),
381	ENGINE_READ_FW(engine, RING_HEAD),
382	ENGINE_READ_FW(engine, RING_TAIL),
383	ENGINE_READ_FW(engine, RING_START));
384	}
385	}
386	}
387
388	static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
389	{
390	struct i915_request pos, rq;
391	unsigned long flags;
392	u32 head;
393
394	rq = NULL;
395	spin_lock_irqsave(&engine->sched_engine->lock, flags);
396	rcu_read_lock();
397	list_for_each_entry(pos, &engine->sched_engine->requests, sched.link) {
398	if (!__i915_request_is_complete(rq: pos)) {
399	rq = pos;
400	break;
401	}
402	}
403	rcu_read_unlock();
404
405	/*
406	* The guilty request will get skipped on a hung engine.
407	*
408	* Users of client default contexts do not rely on logical
409	* state preserved between batches so it is safe to execute
410	* queued requests following the hang. Non default contexts
411	* rely on preserved state, so skipping a batch loses the
412	* evolution of the state and it needs to be considered corrupted.
413	* Executing more queued batches on top of corrupted state is
414	* risky. But we take the risk by trying to advance through
415	* the queued requests in order to make the client behaviour
416	* more predictable around resets, by not throwing away random
417	* amount of batches it has prepared for execution. Sophisticated
418	* clients can use gem_reset_stats_ioctl and dma fence status
419	* (exported via sync_file info ioctl on explicit fences) to observe
420	* when it loses the context state and should rebuild accordingly.
421	*
422	* The context ban, and ultimately the client ban, mechanism are safety
423	* valves if client submission ends up resulting in nothing more than
424	* subsequent hangs.
425	*/
426
427	if (rq) {
428	/*
429	* Try to restore the logical GPU state to match the
430	* continuation of the request queue. If we skip the
431	* context/PD restore, then the next request may try to execute
432	* assuming that its context is valid and loaded on the GPU and
433	* so may try to access invalid memory, prompting repeated GPU
434	* hangs.
435	*
436	* If the request was guilty, we still restore the logical
437	* state in case the next request requires it (e.g. the
438	* aliasing ppgtt), but skip over the hung batch.
439	*
440	* If the request was innocent, we try to replay the request
441	* with the restored context.
442	*/
443	__i915_request_reset(rq, guilty: stalled);
444
445	GEM_BUG_ON(rq->ring != engine->legacy.ring);
446	head = rq->head;
447	} else {
448	head = engine->legacy.ring->tail;
449	}
450	engine->legacy.ring->head = intel_ring_wrap(ring: engine->legacy.ring, pos: head);
451
452	spin_unlock_irqrestore(lock: &engine->sched_engine->lock, flags);
453	}
454
455	static void reset_finish(struct intel_engine_cs *engine)
456	{
457	}
458
459	static void reset_cancel(struct intel_engine_cs *engine)
460	{
461	struct i915_request *request;
462	unsigned long flags;
463
464	spin_lock_irqsave(&engine->sched_engine->lock, flags);
465
466	/ Mark all submitted requests as skipped. /
467	list_for_each_entry(request, &engine->sched_engine->requests, sched.link)
468	i915_request_put(rq: i915_request_mark_eio(rq: request));
469	intel_engine_signal_breadcrumbs(engine);
470
471	/ Remaining _unready_ requests will be nop'ed when submitted /
472
473	spin_unlock_irqrestore(lock: &engine->sched_engine->lock, flags);
474	}
475
476	static void i9xx_submit_request(struct i915_request *request)
477	{
478	i915_request_submit(request);
479	wmb(); / paranoid flush writes out of the WCB before mmio /
480
481	ENGINE_WRITE(request->engine, RING_TAIL,
482	intel_ring_set_tail(request->ring, request->tail));
483	}
484
485	static void __ring_context_fini(struct intel_context *ce)
486	{
487	i915_vma_put(vma: ce->state);
488	}
489
490	static void ring_context_destroy(struct kref *ref)
491	{
492	struct intel_context ce = container_of(ref, typeof(ce), ref);
493
494	GEM_BUG_ON(intel_context_is_pinned(ce));
495
496	if (ce->state)
497	__ring_context_fini(ce);
498
499	intel_context_fini(ce);
500	intel_context_free(ce);
501	}
502
503	static int ring_context_init_default_state(struct intel_context *ce,
504	struct i915_gem_ww_ctx *ww)
505	{
506	struct drm_i915_gem_object *obj = ce->state->obj;
507	void *vaddr;
508
509	vaddr = i915_gem_object_pin_map(obj, type: I915_MAP_WB);
510	if (IS_ERR(ptr: vaddr))
511	return PTR_ERR(ptr: vaddr);
512
513	shmem_read(file: ce->default_state, off: `0`, dst: vaddr, len: ce->engine->context_size);
514
515	i915_gem_object_flush_map(obj);
516	__i915_gem_object_release_map(obj);
517
518	__set_bit(CONTEXT_VALID_BIT, &ce->flags);
519	return `0`;
520	}
521
522	static int ring_context_pre_pin(struct intel_context *ce,
523	struct i915_gem_ww_ctx *ww,
524	void **unused)
525	{
526	struct i915_address_space *vm;
527	int err = `0`;
528
529	if (ce->default_state &&
530	!test_bit(CONTEXT_VALID_BIT, &ce->flags)) {
531	err = ring_context_init_default_state(ce, ww);
532	if (err)
533	return err;
534	}
535
536	vm = vm_alias(vm: ce->vm);
537	if (vm)
538	err = gen6_ppgtt_pin(base: i915_vm_to_ppgtt((vm)), ww);
539
540	return err;
541	}
542
543	static void __context_unpin_ppgtt(struct intel_context *ce)
544	{
545	struct i915_address_space *vm;
546
547	vm = vm_alias(vm: ce->vm);
548	if (vm)
549	gen6_ppgtt_unpin(base: i915_vm_to_ppgtt(vm));
550	}
551
552	static void ring_context_unpin(struct intel_context *ce)
553	{
554	}
555
556	static void ring_context_post_unpin(struct intel_context *ce)
557	{
558	__context_unpin_ppgtt(ce);
559	}
560
561	static struct i915_vma *
562	alloc_context_vma(struct intel_engine_cs *engine)
563	{
564	struct drm_i915_private *i915 = engine->i915;
565	struct drm_i915_gem_object *obj;
566	struct i915_vma *vma;
567	int err;
568
569	obj = i915_gem_object_create_shmem(i915, size: engine->context_size);
570	if (IS_ERR(ptr: obj))
571	return ERR_CAST(ptr: obj);
572
573	/*
574	* Try to make the context utilize L3 as well as LLC.
575	*
576	* On VLV we don't have L3 controls in the PTEs so we
577	* shouldn't touch the cache level, especially as that
578	* would make the object snooped which might have a
579	* negative performance impact.
580	*
581	* Snooping is required on non-llc platforms in execlist
582	* mode, but since all GGTT accesses use PAT entry 0 we
583	* get snooping anyway regardless of cache_level.
584	*
585	* This is only applicable for Ivy Bridge devices since
586	* later platforms don't have L3 control bits in the PTE.
587	*/
588	if (IS_IVYBRIDGE(i915))
589	i915_gem_object_set_cache_coherency(obj, cache_level: I915_CACHE_L3_LLC);
590
591	vma = i915_vma_instance(obj, vm: &engine->gt->ggtt->vm, NULL);
592	if (IS_ERR(ptr: vma)) {
593	err = PTR_ERR(ptr: vma);
594	goto err_obj;
595	}
596
597	return vma;
598
599	err_obj:
600	i915_gem_object_put(obj);
601	return ERR_PTR(error: err);
602	}
603
604	static int ring_context_alloc(struct intel_context *ce)
605	{
606	struct intel_engine_cs *engine = ce->engine;
607
608	if (!intel_context_has_own_state(ce))
609	ce->default_state = engine->default_state;
610
611	/ One ringbuffer to rule them all /
612	GEM_BUG_ON(!engine->legacy.ring);
613	ce->ring = engine->legacy.ring;
614
615	GEM_BUG_ON(ce->state);
616	if (engine->context_size) {
617	struct i915_vma *vma;
618
619	vma = alloc_context_vma(engine);
620	if (IS_ERR(ptr: vma))
621	return PTR_ERR(ptr: vma);
622
623	ce->state = vma;
624	}
625
626	ce->timeline = intel_timeline_get(timeline: engine->legacy.timeline);
627
628	return `0`;
629	}
630
631	static int ring_context_pin(struct intel_context ce, void* *unused)
632	{
633	return `0`;
634	}
635
636	static void ring_context_reset(struct intel_context *ce)
637	{
638	intel_ring_reset(ring: ce->ring, tail: ce->ring->emit);
639	clear_bit(CONTEXT_VALID_BIT, addr: &ce->flags);
640	}
641
642	static void ring_context_revoke(struct intel_context *ce,
643	struct i915_request *rq,
644	unsigned int preempt_timeout_ms)
645	{
646	struct intel_engine_cs *engine;
647
648	if (!rq \|\| !i915_request_is_active(rq))
649	return;
650
651	engine = rq->engine;
652	lockdep_assert_held(&engine->sched_engine->lock);
653	list_for_each_entry_continue(rq, &engine->sched_engine->requests,
654	sched.link)
655	if (rq->context == ce) {
656	i915_request_set_error_once(rq, error: -EIO);
657	__i915_request_skip(rq);
658	}
659	}
660
661	static void ring_context_cancel_request(struct intel_context *ce,
662	struct i915_request *rq)
663	{
664	struct intel_engine_cs *engine = NULL;
665
666	i915_request_active_engine(rq, active: &engine);
667
668	if (engine && intel_engine_pulse(engine))
669	intel_gt_handle_error(gt: engine->gt, engine_mask: engine->mask, flags: `0`,
670	fmt: "request cancellation by %s",
671	current->comm);
672	}
673
674	static const struct intel_context_ops ring_context_ops = {
675	.alloc = ring_context_alloc,
676
677	.cancel_request = ring_context_cancel_request,
678
679	.revoke = ring_context_revoke,
680
681	.pre_pin = ring_context_pre_pin,
682	.pin = ring_context_pin,
683	.unpin = ring_context_unpin,
684	.post_unpin = ring_context_post_unpin,
685
686	.enter = intel_context_enter_engine,
687	.exit = intel_context_exit_engine,
688
689	.reset = ring_context_reset,
690	.destroy = ring_context_destroy,
691	};
692
693	static int load_pd_dir(struct i915_request *rq,
694	struct i915_address_space *vm,
695	u32 valid)
696	{
697	const struct intel_engine_cs * const engine = rq->engine;
698	u32 *cs;
699
700	cs = intel_ring_begin(rq, num_dwords: `12`);
701	if (IS_ERR(ptr: cs))
702	return PTR_ERR(ptr: cs);
703
704	*cs++ = MI_LOAD_REGISTER_IMM(`1`);
705	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
706	*cs++ = valid;
707
708	*cs++ = MI_LOAD_REGISTER_IMM(`1`);
709	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
710	*cs++ = pp_dir(vm);
711
712	/ Stall until the page table load is complete? /
713	*cs++ = MI_STORE_REGISTER_MEM \| MI_SRM_LRM_GLOBAL_GTT;
714	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
715	*cs++ = intel_gt_scratch_offset(gt: engine->gt,
716	field: INTEL_GT_SCRATCH_FIELD_DEFAULT);
717
718	*cs++ = MI_LOAD_REGISTER_IMM(`1`);
719	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
720	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
721
722	intel_ring_advance(rq, cs);
723
724	return rq->engine->emit_flush(rq, EMIT_FLUSH);
725	}
726
727	static int mi_set_context(struct i915_request *rq,
728	struct intel_context *ce,
729	u32 flags)
730	{
731	struct intel_engine_cs *engine = rq->engine;
732	struct drm_i915_private *i915 = engine->i915;
733	enum intel_engine_id id;
734	const int num_engines =
735	IS_HASWELL(i915) ? engine->gt->info.num_engines - `1` : `0`;
736	bool force_restore = false;
737	int len;
738	u32 *cs;
739
740	len = `4`;
741	if (GRAPHICS_VER(i915) == `7`)
742	len += `2` + (num_engines ? `4` * num_engines + `6` : `0`);
743	else if (GRAPHICS_VER(i915) == `5`)
744	len += `2`;
745	if (flags & MI_FORCE_RESTORE) {
746	GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
747	flags &= ~MI_FORCE_RESTORE;
748	force_restore = true;
749	len += `2`;
750	}
751
752	cs = intel_ring_begin(rq, num_dwords: len);
753	if (IS_ERR(ptr: cs))
754	return PTR_ERR(ptr: cs);
755
756	/ WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv /
757	if (GRAPHICS_VER(i915) == `7`) {
758	*cs++ = MI_ARB_ON_OFF \| MI_ARB_DISABLE;
759	if (num_engines) {
760	struct intel_engine_cs *signaller;
761
762	*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
763	for_each_engine(signaller, engine->gt, id) {
764	if (signaller == engine)
765	continue;
766
767	*cs++ = i915_mmio_reg_offset(
768	RING_PSMI_CTL(signaller->mmio_base));
769	*cs++ = _MASKED_BIT_ENABLE(
770	GEN6_PSMI_SLEEP_MSG_DISABLE);
771	}
772	}
773	} else if (GRAPHICS_VER(i915) == `5`) {
774	/*
775	* This w/a is only listed for pre-production ilk a/b steppings,
776	* but is also mentioned for programming the powerctx. To be
777	* safe, just apply the workaround; we do not use SyncFlush so
778	* this should never take effect and so be a no-op!
779	*/
780	*cs++ = MI_SUSPEND_FLUSH \| MI_SUSPEND_FLUSH_EN;
781	}
782
783	if (force_restore) {
784	/*
785	* The HW doesn't handle being told to restore the current
786	* context very well. Quite often it likes goes to go off and
787	* sulk, especially when it is meant to be reloading PP_DIR.
788	* A very simple fix to force the reload is to simply switch
789	* away from the current context and back again.
790	*
791	* Note that the kernel_context will contain random state
792	* following the INHIBIT_RESTORE. We accept this since we
793	* never use the kernel_context state; it is merely a
794	* placeholder we use to flush other contexts.
795	*/
796	*cs++ = MI_SET_CONTEXT;
797	*cs++ = i915_ggtt_offset(vma: engine->kernel_context->state) \|
798	MI_MM_SPACE_GTT \|
799	MI_RESTORE_INHIBIT;
800	}
801
802	*cs++ = MI_NOOP;
803	*cs++ = MI_SET_CONTEXT;
804	*cs++ = i915_ggtt_offset(vma: ce->state) \| flags;
805	/*
806	* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
807	* WaMiSetContext_Hang:snb,ivb,vlv
808	*/
809	*cs++ = MI_NOOP;
810
811	if (GRAPHICS_VER(i915) == `7`) {
812	if (num_engines) {
813	struct intel_engine_cs *signaller;
814	i915_reg_t last_reg = INVALID_MMIO_REG; / keep gcc quiet /
815
816	*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
817	for_each_engine(signaller, engine->gt, id) {
818	if (signaller == engine)
819	continue;
820
821	last_reg = RING_PSMI_CTL(signaller->mmio_base);
822	*cs++ = i915_mmio_reg_offset(last_reg);
823	*cs++ = _MASKED_BIT_DISABLE(
824	GEN6_PSMI_SLEEP_MSG_DISABLE);
825	}
826
827	/ Insert a delay before the next switch! /
828	*cs++ = MI_STORE_REGISTER_MEM \| MI_SRM_LRM_GLOBAL_GTT;
829	*cs++ = i915_mmio_reg_offset(last_reg);
830	*cs++ = intel_gt_scratch_offset(gt: engine->gt,
831	field: INTEL_GT_SCRATCH_FIELD_DEFAULT);
832	*cs++ = MI_NOOP;
833	}
834	*cs++ = MI_ARB_ON_OFF \| MI_ARB_ENABLE;
835	} else if (GRAPHICS_VER(i915) == `5`) {
836	*cs++ = MI_SUSPEND_FLUSH;
837	}
838
839	intel_ring_advance(rq, cs);
840
841	return `0`;
842	}
843
844	static int remap_l3_slice(struct i915_request rq, int* slice)
845	{
846	#define L3LOG_DW (GEN7_L3LOG_SIZE / sizeof(u32))
847	u32 cs, remap_info = rq->i915->l3_parity.remap_info[slice];
848	int i;
849
850	if (!remap_info)
851	return `0`;
852
853	cs = intel_ring_begin(rq, L3LOG_DW * `2` + `2`);
854	if (IS_ERR(ptr: cs))
855	return PTR_ERR(ptr: cs);
856
857	/*
858	* Note: We do not worry about the concurrent register cacheline hang
859	* here because no other code should access these registers other than
860	* at initialization time.
861	*/
862	*cs++ = MI_LOAD_REGISTER_IMM(L3LOG_DW);
863	for (i = `0`; i < L3LOG_DW; i++) {
864	*cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
865	*cs++ = remap_info[i];
866	}
867	*cs++ = MI_NOOP;
868	intel_ring_advance(rq, cs);
869
870	return `0`;
871	#undef L3LOG_DW
872	}
873
874	static int remap_l3(struct i915_request *rq)
875	{
876	struct i915_gem_context *ctx = i915_request_gem_context(rq);
877	int i, err;
878
879	if (!ctx \|\| !ctx->remap_slice)
880	return `0`;
881
882	for (i = `0`; i < MAX_L3_SLICES; i++) {
883	if (!(ctx->remap_slice & BIT(i)))
884	continue;
885
886	err = remap_l3_slice(rq, slice: i);
887	if (err)
888	return err;
889	}
890
891	ctx->remap_slice = `0`;
892	return `0`;
893	}
894
895	static int switch_mm(struct i915_request rq, struct* i915_address_space *vm)
896	{
897	int ret;
898
899	if (!vm)
900	return `0`;
901
902	ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
903	if (ret)
904	return ret;
905
906	/*
907	* Not only do we need a full barrier (post-sync write) after
908	* invalidating the TLBs, but we need to wait a little bit
909	* longer. Whether this is merely delaying us, or the
910	* subsequent flush is a key part of serialising with the
911	* post-sync op, this extra pass appears vital before a
912	* mm switch!
913	*/
914	ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G);
915	if (ret)
916	return ret;
917
918	return rq->engine->emit_flush(rq, EMIT_INVALIDATE);
919	}
920
921	static int clear_residuals(struct i915_request *rq)
922	{
923	struct intel_engine_cs *engine = rq->engine;
924	int ret;
925
926	ret = switch_mm(rq, vm: vm_alias(vm: engine->kernel_context->vm));
927	if (ret)
928	return ret;
929
930	if (engine->kernel_context->state) {
931	ret = mi_set_context(rq,
932	ce: engine->kernel_context,
933	MI_MM_SPACE_GTT \| MI_RESTORE_INHIBIT);
934	if (ret)
935	return ret;
936	}
937
938	ret = engine->emit_bb_start(rq,
939	i915_vma_offset(vma: engine->wa_ctx.vma), `0`,
940	`0`);
941	if (ret)
942	return ret;
943
944	ret = engine->emit_flush(rq, EMIT_FLUSH);
945	if (ret)
946	return ret;
947
948	/ Always invalidate before the next switch_mm() /
949	return engine->emit_flush(rq, EMIT_INVALIDATE);
950	}
951
952	static int switch_context(struct i915_request *rq)
953	{
954	struct intel_engine_cs *engine = rq->engine;
955	struct intel_context *ce = rq->context;
956	void **residuals = NULL;
957	int ret;
958
959	GEM_BUG_ON(HAS_EXECLISTS(engine->i915));
960
961	if (engine->wa_ctx.vma && ce != engine->kernel_context) {
962	if (engine->wa_ctx.vma->private != ce &&
963	i915_mitigate_clear_residuals()) {
964	ret = clear_residuals(rq);
965	if (ret)
966	return ret;
967
968	residuals = &engine->wa_ctx.vma->private;
969	}
970	}
971
972	ret = switch_mm(rq, vm: vm_alias(vm: ce->vm));
973	if (ret)
974	return ret;
975
976	if (ce->state) {
977	u32 flags;
978
979	GEM_BUG_ON(engine->id != RCS0);
980
981	/ For resource streamer on HSW+ and power context elsewhere /
982	BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN);
983	BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN);
984
985	flags = MI_SAVE_EXT_STATE_EN \| MI_MM_SPACE_GTT;
986	if (test_bit(CONTEXT_VALID_BIT, &ce->flags))
987	flags \|= MI_RESTORE_EXT_STATE_EN;
988	else
989	flags \|= MI_RESTORE_INHIBIT;
990
991	ret = mi_set_context(rq, ce, flags);
992	if (ret)
993	return ret;
994	}
995
996	ret = remap_l3(rq);
997	if (ret)
998	return ret;
999
1000	/*
1001	* Now past the point of no return, this request _will_ be emitted.
1002	*
1003	* Or at least this preamble will be emitted, the request may be
1004	* interrupted prior to submitting the user payload. If so, we
1005	* still submit the "empty" request in order to preserve global
1006	* state tracking such as this, our tracking of the current
1007	* dirty context.
1008	*/
1009	if (residuals) {
1010	intel_context_put(ce: *residuals);
1011	*residuals = intel_context_get(ce);
1012	}
1013
1014	return `0`;
1015	}
1016
1017	static int ring_request_alloc(struct i915_request *request)
1018	{
1019	int ret;
1020
1021	GEM_BUG_ON(!intel_context_is_pinned(request->context));
1022	GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb);
1023
1024	/*
1025	* Flush enough space to reduce the likelihood of waiting after
1026	* we start building the request - in which case we will just
1027	* have to repeat work.
1028	*/
1029	request->reserved_space += LEGACY_REQUEST_SIZE;
1030
1031	/ Unconditionally invalidate GPU caches and TLBs. /
1032	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
1033	if (ret)
1034	return ret;
1035
1036	ret = switch_context(rq: request);
1037	if (ret)
1038	return ret;
1039
1040	request->reserved_space -= LEGACY_REQUEST_SIZE;
1041	return `0`;
1042	}
1043
1044	static void gen6_bsd_submit_request(struct i915_request *request)
1045	{
1046	struct intel_uncore *uncore = request->engine->uncore;
1047
1048	intel_uncore_forcewake_get(uncore, domains: FORCEWAKE_ALL);
1049
1050	/ Every tail move must follow the sequence below /
1051
1052	/ Disable notification that the ring is IDLE. The GT*
1053	* will then assume that it is busy and bring it out of rc6.
1054	*/
1055	intel_uncore_write_fw(uncore, RING_PSMI_CTL(GEN6_BSD_RING_BASE),
1056	_MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
1057
1058	/ Clear the context id. Here be magic! /
1059	intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, `0x0`);
1060
1061	/ Wait for the ring not to be idle, i.e. for it to wake up. /
1062	if (__intel_wait_for_register_fw(uncore,
1063	RING_PSMI_CTL(GEN6_BSD_RING_BASE),
1064	GEN6_BSD_SLEEP_INDICATOR,
1065	value: `0`,
1066	fast_timeout_us: `1000`, slow_timeout_ms: `0`, NULL))
1067	drm_err(&uncore->i915->drm,
1068	"timed out waiting for the BSD ring to wake up\n");
1069
1070	/ Now that the ring is fully powered up, update the tail /
1071	i9xx_submit_request(request);
1072
1073	/ Let the ring send IDLE messages to the GT again,*
1074	* and so let it sleep to conserve power when idle.
1075	*/
1076	intel_uncore_write_fw(uncore, RING_PSMI_CTL(GEN6_BSD_RING_BASE),
1077	_MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
1078
1079	intel_uncore_forcewake_put(uncore, domains: FORCEWAKE_ALL);
1080	}
1081
1082	static void i9xx_set_default_submission(struct intel_engine_cs *engine)
1083	{
1084	engine->submit_request = i9xx_submit_request;
1085	}
1086
1087	static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
1088	{
1089	engine->submit_request = gen6_bsd_submit_request;
1090	}
1091
1092	static void ring_release(struct intel_engine_cs *engine)
1093	{
1094	struct drm_i915_private *i915 = engine->i915;
1095
1096	drm_WARN_ON(&i915->drm, GRAPHICS_VER(i915) > `2` &&
1097	(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == `0`);
1098
1099	intel_engine_cleanup_common(engine);
1100
1101	if (engine->wa_ctx.vma) {
1102	intel_context_put(ce: engine->wa_ctx.vma->private);
1103	i915_vma_unpin_and_release(p_vma: &engine->wa_ctx.vma, flags: `0`);
1104	}
1105
1106	intel_ring_unpin(ring: engine->legacy.ring);
1107	intel_ring_put(ring: engine->legacy.ring);
1108
1109	intel_timeline_unpin(tl: engine->legacy.timeline);
1110	intel_timeline_put(timeline: engine->legacy.timeline);
1111	}
1112
1113	static void irq_handler(struct intel_engine_cs *engine, u16 iir)
1114	{
1115	intel_engine_signal_breadcrumbs(engine);
1116	}
1117
1118	static void setup_irq(struct intel_engine_cs *engine)
1119	{
1120	struct drm_i915_private *i915 = engine->i915;
1121
1122	intel_engine_set_irq_handler(engine, fn: irq_handler);
1123
1124	if (GRAPHICS_VER(i915) >= `6`) {
1125	engine->irq_enable = gen6_irq_enable;
1126	engine->irq_disable = gen6_irq_disable;
1127	} else if (GRAPHICS_VER(i915) >= `5`) {
1128	engine->irq_enable = gen5_irq_enable;
1129	engine->irq_disable = gen5_irq_disable;
1130	} else {
1131	engine->irq_enable = gen2_irq_enable;
1132	engine->irq_disable = gen2_irq_disable;
1133	}
1134	}
1135
1136	static void add_to_engine(struct i915_request *rq)
1137	{
1138	lockdep_assert_held(&rq->engine->sched_engine->lock);
1139	list_move_tail(list: &rq->sched.link, head: &rq->engine->sched_engine->requests);
1140	}
1141
1142	static void remove_from_engine(struct i915_request *rq)
1143	{
1144	spin_lock_irq(lock: &rq->engine->sched_engine->lock);
1145	list_del_init(entry: &rq->sched.link);
1146
1147	/ Prevent further __await_execution() registering a cb, then flush /
1148	set_bit(nr: I915_FENCE_FLAG_ACTIVE, addr: &rq->fence.flags);
1149
1150	spin_unlock_irq(lock: &rq->engine->sched_engine->lock);
1151
1152	i915_request_notify_execute_cb_imm(rq);
1153	}
1154
1155	static void setup_common(struct intel_engine_cs *engine)
1156	{
1157	struct drm_i915_private *i915 = engine->i915;
1158
1159	/ gen8+ are only supported with execlists /
1160	GEM_BUG_ON(GRAPHICS_VER(i915) >= `8`);
1161
1162	setup_irq(engine);
1163
1164	engine->resume = xcs_resume;
1165	engine->sanitize = xcs_sanitize;
1166
1167	engine->reset.prepare = reset_prepare;
1168	engine->reset.rewind = reset_rewind;
1169	engine->reset.cancel = reset_cancel;
1170	engine->reset.finish = reset_finish;
1171
1172	engine->add_active_request = add_to_engine;
1173	engine->remove_active_request = remove_from_engine;
1174
1175	engine->cops = &ring_context_ops;
1176	engine->request_alloc = ring_request_alloc;
1177
1178	/*
1179	* Using a global execution timeline; the previous final breadcrumb is
1180	* equivalent to our next initial bread so we can elide
1181	* engine->emit_init_breadcrumb().
1182	*/
1183	engine->emit_fini_breadcrumb = gen2_emit_breadcrumb;
1184	if (GRAPHICS_VER(i915) == `5`)
1185	engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
1186
1187	engine->set_default_submission = i9xx_set_default_submission;
1188
1189	if (GRAPHICS_VER(i915) >= `6`)
1190	engine->emit_bb_start = gen6_emit_bb_start;
1191	else if (GRAPHICS_VER(i915) >= `4`)
1192	engine->emit_bb_start = gen4_emit_bb_start;
1193	else if (IS_I830(i915) \|\| IS_I845G(i915))
1194	engine->emit_bb_start = i830_emit_bb_start;
1195	else
1196	engine->emit_bb_start = gen2_emit_bb_start;
1197	}
1198
1199	static void setup_rcs(struct intel_engine_cs *engine)
1200	{
1201	struct drm_i915_private *i915 = engine->i915;
1202
1203	if (HAS_L3_DPF(i915))
1204	engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
1205
1206	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
1207
1208	if (GRAPHICS_VER(i915) >= `7`) {
1209	engine->emit_flush = gen7_emit_flush_rcs;
1210	engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_rcs;
1211	} else if (GRAPHICS_VER(i915) == `6`) {
1212	engine->emit_flush = gen6_emit_flush_rcs;
1213	engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_rcs;
1214	} else if (GRAPHICS_VER(i915) == `5`) {
1215	engine->emit_flush = gen4_emit_flush_rcs;
1216	} else {
1217	if (GRAPHICS_VER(i915) < `4`)
1218	engine->emit_flush = gen2_emit_flush;
1219	else
1220	engine->emit_flush = gen4_emit_flush_rcs;
1221	engine->irq_enable_mask = I915_USER_INTERRUPT;
1222	}
1223
1224	if (IS_HASWELL(i915))
1225	engine->emit_bb_start = hsw_emit_bb_start;
1226	}
1227
1228	static void setup_vcs(struct intel_engine_cs *engine)
1229	{
1230	struct drm_i915_private *i915 = engine->i915;
1231
1232	if (GRAPHICS_VER(i915) >= `6`) {
1233	/ gen6 bsd needs a special wa for tail updates /
1234	if (GRAPHICS_VER(i915) == `6`)
1235	engine->set_default_submission = gen6_bsd_set_default_submission;
1236	engine->emit_flush = gen6_emit_flush_vcs;
1237	engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
1238
1239	if (GRAPHICS_VER(i915) == `6`)
1240	engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
1241	else
1242	engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
1243	} else {
1244	engine->emit_flush = gen4_emit_flush_vcs;
1245	if (GRAPHICS_VER(i915) == `5`)
1246	engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
1247	else
1248	engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
1249	}
1250	}
1251
1252	static void setup_bcs(struct intel_engine_cs *engine)
1253	{
1254	struct drm_i915_private *i915 = engine->i915;
1255
1256	engine->emit_flush = gen6_emit_flush_xcs;
1257	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
1258
1259	if (GRAPHICS_VER(i915) == `6`)
1260	engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
1261	else
1262	engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
1263	}
1264
1265	static void setup_vecs(struct intel_engine_cs *engine)
1266	{
1267	struct drm_i915_private *i915 = engine->i915;
1268
1269	GEM_BUG_ON(GRAPHICS_VER(i915) < `7`);
1270
1271	engine->emit_flush = gen6_emit_flush_xcs;
1272	engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
1273	engine->irq_enable = hsw_irq_enable_vecs;
1274	engine->irq_disable = hsw_irq_disable_vecs;
1275
1276	engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
1277	}
1278
1279	static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine,
1280	struct i915_vma * const vma)
1281	{
1282	return gen7_setup_clear_gpr_bb(engine, vma);
1283	}
1284
1285	static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine,
1286	struct i915_gem_ww_ctx *ww,
1287	struct i915_vma *vma)
1288	{
1289	int err;
1290
1291	err = i915_vma_pin_ww(vma, ww, size: `0`, alignment: `0`, PIN_USER \| PIN_HIGH);
1292	if (err)
1293	return err;
1294
1295	err = i915_vma_sync(vma);
1296	if (err)
1297	goto err_unpin;
1298
1299	err = gen7_ctx_switch_bb_setup(engine, vma);
1300	if (err)
1301	goto err_unpin;
1302
1303	engine->wa_ctx.vma = vma;
1304	return `0`;
1305
1306	err_unpin:
1307	i915_vma_unpin(vma);
1308	return err;
1309	}
1310
1311	static struct i915_vma gen7_ctx_vma(struct* intel_engine_cs *engine)
1312	{
1313	struct drm_i915_gem_object *obj;
1314	struct i915_vma *vma;
1315	int size, err;
1316
1317	if (GRAPHICS_VER(engine->i915) != `7` \|\| engine->class != RENDER_CLASS)
1318	return NULL;
1319
1320	err = gen7_ctx_switch_bb_setup(engine, NULL / probe size /);
1321	if (err < `0`)
1322	return ERR_PTR(error: err);
1323	if (!err)
1324	return NULL;
1325
1326	size = ALIGN(err, PAGE_SIZE);
1327
1328	obj = i915_gem_object_create_internal(i915: engine->i915, size);
1329	if (IS_ERR(ptr: obj))
1330	return ERR_CAST(ptr: obj);
1331
1332	vma = i915_vma_instance(obj, vm: engine->gt->vm, NULL);
1333	if (IS_ERR(ptr: vma)) {
1334	i915_gem_object_put(obj);
1335	return ERR_CAST(ptr: vma);
1336	}
1337
1338	vma->private = intel_context_create(engine); / dummy residuals /
1339	if (IS_ERR(ptr: vma->private)) {
1340	err = PTR_ERR(ptr: vma->private);
1341	vma->private = NULL;
1342	i915_gem_object_put(obj);
1343	return ERR_PTR(error: err);
1344	}
1345
1346	return vma;
1347	}
1348
1349	int intel_ring_submission_setup(struct intel_engine_cs *engine)
1350	{
1351	struct i915_gem_ww_ctx ww;
1352	struct intel_timeline *timeline;
1353	struct intel_ring *ring;
1354	struct i915_vma *gen7_wa_vma;
1355	int err;
1356
1357	setup_common(engine);
1358
1359	switch (engine->class) {
1360	case RENDER_CLASS:
1361	setup_rcs(engine);
1362	break;
1363	case VIDEO_DECODE_CLASS:
1364	setup_vcs(engine);
1365	break;
1366	case COPY_ENGINE_CLASS:
1367	setup_bcs(engine);
1368	break;
1369	case VIDEO_ENHANCEMENT_CLASS:
1370	setup_vecs(engine);
1371	break;
1372	default:
1373	MISSING_CASE(engine->class);
1374	return -ENODEV;
1375	}
1376
1377	timeline = intel_timeline_create_from_engine(engine,
1378	I915_GEM_HWS_SEQNO_ADDR);
1379	if (IS_ERR(ptr: timeline)) {
1380	err = PTR_ERR(ptr: timeline);
1381	goto err;
1382	}
1383	GEM_BUG_ON(timeline->has_initial_breadcrumb);
1384
1385	ring = intel_engine_create_ring(engine, SZ_16K);
1386	if (IS_ERR(ptr: ring)) {
1387	err = PTR_ERR(ptr: ring);
1388	goto err_timeline;
1389	}
1390
1391	GEM_BUG_ON(engine->legacy.ring);
1392	engine->legacy.ring = ring;
1393	engine->legacy.timeline = timeline;
1394
1395	gen7_wa_vma = gen7_ctx_vma(engine);
1396	if (IS_ERR(ptr: gen7_wa_vma)) {
1397	err = PTR_ERR(ptr: gen7_wa_vma);
1398	goto err_ring;
1399	}
1400
1401	i915_gem_ww_ctx_init(ctx: &ww, intr: false);
1402
1403	retry:
1404	err = i915_gem_object_lock(obj: timeline->hwsp_ggtt->obj, ww: &ww);
1405	if (!err && gen7_wa_vma)
1406	err = i915_gem_object_lock(obj: gen7_wa_vma->obj, ww: &ww);
1407	if (!err)
1408	err = i915_gem_object_lock(obj: engine->legacy.ring->vma->obj, ww: &ww);
1409	if (!err)
1410	err = intel_timeline_pin(tl: timeline, ww: &ww);
1411	if (!err) {
1412	err = intel_ring_pin(ring, ww: &ww);
1413	if (err)
1414	intel_timeline_unpin(tl: timeline);
1415	}
1416	if (err)
1417	goto out;
1418
1419	GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
1420
1421	if (gen7_wa_vma) {
1422	err = gen7_ctx_switch_bb_init(engine, ww: &ww, vma: gen7_wa_vma);
1423	if (err) {
1424	intel_ring_unpin(ring);
1425	intel_timeline_unpin(tl: timeline);
1426	}
1427	}
1428
1429	out:
1430	if (err == -EDEADLK) {
1431	err = i915_gem_ww_ctx_backoff(ctx: &ww);
1432	if (!err)
1433	goto retry;
1434	}
1435	i915_gem_ww_ctx_fini(ctx: &ww);
1436	if (err)
1437	goto err_gen7_put;
1438
1439	/ Finally, take ownership and responsibility for cleanup! /
1440	engine->release = ring_release;
1441
1442	return `0`;
1443
1444	err_gen7_put:
1445	if (gen7_wa_vma) {
1446	intel_context_put(ce: gen7_wa_vma->private);
1447	i915_gem_object_put(obj: gen7_wa_vma->obj);
1448	}
1449	err_ring:
1450	intel_ring_put(ring);
1451	err_timeline:
1452	intel_timeline_put(timeline);
1453	err:
1454	intel_engine_cleanup_common(engine);
1455	return err;
1456	}
1457
1458	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1459	#include "selftest_ring_submission.c"
1460	#endif
1461

Browse the source code of Linux/drivers/gpu/drm/i915/gt/intel_ring_submission.c