1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2008-2021 Intel Corporation
4 */
5
6#include <drm/drm_cache.h>
7
8#include "gem/i915_gem_internal.h"
9
10#include "gen2_engine_cs.h"
11#include "gen6_engine_cs.h"
12#include "gen6_ppgtt.h"
13#include "gen7_renderclear.h"
14#include "i915_drv.h"
15#include "i915_irq.h"
16#include "i915_mitigations.h"
17#include "i915_reg.h"
18#include "i915_wait_util.h"
19#include "intel_breadcrumbs.h"
20#include "intel_context.h"
21#include "intel_engine_heartbeat.h"
22#include "intel_engine_pm.h"
23#include "intel_engine_regs.h"
24#include "intel_gt.h"
25#include "intel_gt_irq.h"
26#include "intel_gt_print.h"
27#include "intel_gt_regs.h"
28#include "intel_reset.h"
29#include "intel_ring.h"
30#include "shmem_utils.h"
31
32/* Rough estimate of the typical request size, performing a flush,
33 * set-context and then emitting the batch.
34 */
35#define LEGACY_REQUEST_SIZE 200
36
37static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
38{
39 /*
40 * Keep the render interrupt unmasked as this papers over
41 * lost interrupts following a reset.
42 */
43 if (engine->class == RENDER_CLASS) {
44 if (GRAPHICS_VER(engine->i915) >= 6)
45 mask &= ~BIT(0);
46 else
47 mask &= ~I915_USER_INTERRUPT;
48 }
49
50 intel_engine_set_hwsp_writemask(engine, mask);
51}
52
53static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
54{
55 u32 addr;
56
57 addr = lower_32_bits(phys);
58 if (GRAPHICS_VER(engine->i915) >= 4)
59 addr |= (phys >> 28) & 0xf0;
60
61 intel_uncore_write(uncore: engine->uncore, HWS_PGA, val: addr);
62}
63
64static struct page *status_page(struct intel_engine_cs *engine)
65{
66 struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
67
68 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
69 return sg_page(sg: obj->mm.pages->sgl);
70}
71
72static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
73{
74 set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
75 set_hwstam(engine, mask: ~0u);
76}
77
78static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
79{
80 i915_reg_t hwsp;
81
82 /*
83 * The ring status page addresses are no longer next to the rest of
84 * the ring registers as of gen7.
85 */
86 if (GRAPHICS_VER(engine->i915) == 7) {
87 switch (engine->id) {
88 /*
89 * No more rings exist on Gen7. Default case is only to shut up
90 * gcc switch check warning.
91 */
92 default:
93 GEM_BUG_ON(engine->id);
94 fallthrough;
95 case RCS0:
96 hwsp = RENDER_HWS_PGA_GEN7;
97 break;
98 case BCS0:
99 hwsp = BLT_HWS_PGA_GEN7;
100 break;
101 case VCS0:
102 hwsp = BSD_HWS_PGA_GEN7;
103 break;
104 case VECS0:
105 hwsp = VEBOX_HWS_PGA_GEN7;
106 break;
107 }
108 } else if (GRAPHICS_VER(engine->i915) == 6) {
109 hwsp = RING_HWS_PGA_GEN6(engine->mmio_base);
110 } else {
111 hwsp = RING_HWS_PGA(engine->mmio_base);
112 }
113
114 intel_uncore_write_fw(engine->uncore, hwsp, offset);
115 intel_uncore_posting_read_fw(engine->uncore, hwsp);
116}
117
118static void flush_cs_tlb(struct intel_engine_cs *engine)
119{
120 if (!IS_GRAPHICS_VER(engine->i915, 6, 7))
121 return;
122
123 /* ring should be idle before issuing a sync flush*/
124 if ((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0)
125 drm_warn(&engine->i915->drm, "%s not idle before sync flush!\n",
126 engine->name);
127
128 ENGINE_WRITE_FW(engine, RING_INSTPM,
129 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
130 INSTPM_SYNC_FLUSH));
131 if (__intel_wait_for_register_fw(uncore: engine->uncore,
132 RING_INSTPM(engine->mmio_base),
133 INSTPM_SYNC_FLUSH, value: 0,
134 fast_timeout_us: 2000, slow_timeout_ms: 0, NULL))
135 ENGINE_TRACE(engine,
136 "wait for SyncFlush to complete for TLB invalidation timed out\n");
137}
138
139static void ring_setup_status_page(struct intel_engine_cs *engine)
140{
141 set_hwsp(engine, offset: i915_ggtt_offset(vma: engine->status_page.vma));
142 set_hwstam(engine, mask: ~0u);
143
144 flush_cs_tlb(engine);
145}
146
147static struct i915_address_space *vm_alias(struct i915_address_space *vm)
148{
149 if (i915_is_ggtt(vm))
150 vm = &i915_vm_to_ggtt(vm)->alias->vm;
151
152 return vm;
153}
154
155static u32 pp_dir(struct i915_address_space *vm)
156{
157 return to_gen6_ppgtt(base: i915_vm_to_ppgtt(vm))->pp_dir;
158}
159
160static void set_pp_dir(struct intel_engine_cs *engine)
161{
162 struct i915_address_space *vm = vm_alias(vm: engine->gt->vm);
163
164 if (!vm)
165 return;
166
167 ENGINE_WRITE_FW(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G);
168 ENGINE_WRITE_FW(engine, RING_PP_DIR_BASE, pp_dir(vm));
169
170 if (GRAPHICS_VER(engine->i915) >= 7) {
171 ENGINE_WRITE_FW(engine,
172 RING_MODE_GEN7,
173 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
174 }
175}
176
177static bool stop_ring(struct intel_engine_cs *engine)
178{
179 /* Empty the ring by skipping to the end */
180 ENGINE_WRITE_FW(engine, RING_HEAD, ENGINE_READ_FW(engine, RING_TAIL));
181 ENGINE_POSTING_READ(engine, RING_HEAD);
182
183 /* The ring must be empty before it is disabled */
184 ENGINE_WRITE_FW(engine, RING_CTL, 0);
185 ENGINE_POSTING_READ(engine, RING_CTL);
186
187 /* Then reset the disabled ring */
188 ENGINE_WRITE_FW(engine, RING_HEAD, 0);
189 ENGINE_WRITE_FW(engine, RING_TAIL, 0);
190
191 return (ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) == 0;
192}
193
194static int xcs_resume(struct intel_engine_cs *engine)
195{
196 struct intel_ring *ring = engine->legacy.ring;
197 ktime_t kt;
198
199 ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
200 ring->head, ring->tail);
201
202 /*
203 * Double check the ring is empty & disabled before we resume. Called
204 * from atomic context during PCI probe, so _hardirq().
205 */
206 intel_synchronize_hardirq(i915: engine->i915);
207 if (!stop_ring(engine))
208 goto err;
209
210 if (HWS_NEEDS_PHYSICAL(engine->i915))
211 ring_setup_phys_status_page(engine);
212 else
213 ring_setup_status_page(engine);
214
215 intel_breadcrumbs_reset(b: engine->breadcrumbs);
216
217 /* Enforce ordering by reading HEAD register back */
218 ENGINE_POSTING_READ(engine, RING_HEAD);
219
220 /*
221 * Initialize the ring. This must happen _after_ we've cleared the ring
222 * registers with the above sequence (the readback of the HEAD registers
223 * also enforces ordering), otherwise the hw might lose the new ring
224 * register values.
225 */
226 ENGINE_WRITE_FW(engine, RING_START, i915_ggtt_offset(ring->vma));
227
228 /* Check that the ring offsets point within the ring! */
229 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
230 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
231 intel_ring_update_space(ring);
232
233 set_pp_dir(engine);
234
235 /*
236 * First wake the ring up to an empty/idle ring.
237 * Use 50ms of delay to let the engine write successfully
238 * for all platforms. Experimented with different values and
239 * determined that 50ms works best based on testing.
240 */
241 for ((kt) = ktime_get() + (50 * NSEC_PER_MSEC);
242 ktime_before(cmp1: ktime_get(), cmp2: (kt)); cpu_relax()) {
243 /*
244 * In case of resets fails because engine resumes from
245 * incorrect RING_HEAD and then GPU may be then fed
246 * to invalid instructions, which may lead to unrecoverable
247 * hang. So at first write doesn't succeed then try again.
248 */
249 ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
250 if (ENGINE_READ_FW(engine, RING_HEAD) == ring->head)
251 break;
252 }
253
254 ENGINE_WRITE_FW(engine, RING_TAIL, ring->head);
255 if (ENGINE_READ_FW(engine, RING_HEAD) != ENGINE_READ_FW(engine, RING_TAIL)) {
256 ENGINE_TRACE(engine, "failed to reset empty ring: [%x, %x]: %x\n",
257 ENGINE_READ_FW(engine, RING_HEAD),
258 ENGINE_READ_FW(engine, RING_TAIL),
259 ring->head);
260 goto err;
261 }
262
263 ENGINE_WRITE_FW(engine, RING_CTL,
264 RING_CTL_SIZE(ring->size) | RING_VALID);
265
266 /* If the head is still not zero, the ring is dead */
267 if (__intel_wait_for_register_fw(uncore: engine->uncore,
268 RING_CTL(engine->mmio_base),
269 RING_VALID, RING_VALID,
270 fast_timeout_us: 5000, slow_timeout_ms: 0, NULL)) {
271 ENGINE_TRACE(engine, "failed to restart\n");
272 goto err;
273 }
274
275 if (GRAPHICS_VER(engine->i915) > 2) {
276 ENGINE_WRITE_FW(engine,
277 RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
278 ENGINE_POSTING_READ(engine, RING_MI_MODE);
279 }
280
281 /* Now awake, let it get started */
282 if (ring->tail != ring->head) {
283 ENGINE_WRITE_FW(engine, RING_TAIL, ring->tail);
284 ENGINE_POSTING_READ(engine, RING_TAIL);
285 }
286
287 /* Papering over lost _interrupts_ immediately following the restart */
288 intel_engine_signal_breadcrumbs(engine);
289 return 0;
290
291err:
292 gt_err(engine->gt, "%s initialization failed\n", engine->name);
293 ENGINE_TRACE(engine,
294 "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
295 ENGINE_READ(engine, RING_CTL),
296 ENGINE_READ(engine, RING_CTL) & RING_VALID,
297 ENGINE_READ(engine, RING_HEAD), ring->head,
298 ENGINE_READ(engine, RING_TAIL), ring->tail,
299 ENGINE_READ(engine, RING_START),
300 i915_ggtt_offset(ring->vma));
301 GEM_TRACE_DUMP();
302 return -EIO;
303}
304
305static void sanitize_hwsp(struct intel_engine_cs *engine)
306{
307 struct intel_timeline *tl;
308
309 list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
310 intel_timeline_reset_seqno(tl);
311}
312
313static void xcs_sanitize(struct intel_engine_cs *engine)
314{
315 /*
316 * Poison residual state on resume, in case the suspend didn't!
317 *
318 * We have to assume that across suspend/resume (or other loss
319 * of control) that the contents of our pinned buffers has been
320 * lost, replaced by garbage. Since this doesn't always happen,
321 * let's poison such state so that we more quickly spot when
322 * we falsely assume it has been preserved.
323 */
324 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
325 memset(s: engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
326
327 /*
328 * The kernel_context HWSP is stored in the status_page. As above,
329 * that may be lost on resume/initialisation, and so we need to
330 * reset the value in the HWSP.
331 */
332 sanitize_hwsp(engine);
333
334 /* And scrub the dirty cachelines for the HWSP */
335 drm_clflush_virt_range(addr: engine->status_page.addr, PAGE_SIZE);
336
337 intel_engine_reset_pinned_contexts(engine);
338}
339
340static void reset_prepare(struct intel_engine_cs *engine)
341{
342 /*
343 * We stop engines, otherwise we might get failed reset and a
344 * dead gpu (on elk). Also as modern gpu as kbl can suffer
345 * from system hang if batchbuffer is progressing when
346 * the reset is issued, regardless of READY_TO_RESET ack.
347 * Thus assume it is best to stop engines on all gens
348 * where we have a gpu reset.
349 *
350 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
351 *
352 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
353 * WaClearRingBufHeadRegAtInit:ctg,elk
354 *
355 * FIXME: Wa for more modern gens needs to be validated
356 */
357 ENGINE_TRACE(engine, "\n");
358 intel_engine_stop_cs(engine);
359
360 if (!stop_ring(engine)) {
361 /* G45 ring initialization often fails to reset head to zero */
362 ENGINE_TRACE(engine,
363 "HEAD not reset to zero, "
364 "{ CTL:%08x, HEAD:%08x, TAIL:%08x, START:%08x }\n",
365 ENGINE_READ_FW(engine, RING_CTL),
366 ENGINE_READ_FW(engine, RING_HEAD),
367 ENGINE_READ_FW(engine, RING_TAIL),
368 ENGINE_READ_FW(engine, RING_START));
369 /*
370 * Sometimes engine head failed to set to zero even after writing into it.
371 * Use wait_for_atomic() with 20ms delay to let engine resumes from
372 * correct RING_HEAD. Experimented different values and determined
373 * that 20ms works best based on testing.
374 */
375 if (wait_for_atomic((!stop_ring(engine) == 0), 20)) {
376 drm_err(&engine->i915->drm,
377 "failed to set %s head to zero "
378 "ctl %08x head %08x tail %08x start %08x\n",
379 engine->name,
380 ENGINE_READ_FW(engine, RING_CTL),
381 ENGINE_READ_FW(engine, RING_HEAD),
382 ENGINE_READ_FW(engine, RING_TAIL),
383 ENGINE_READ_FW(engine, RING_START));
384 }
385 }
386}
387
388static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
389{
390 struct i915_request *pos, *rq;
391 unsigned long flags;
392 u32 head;
393
394 rq = NULL;
395 spin_lock_irqsave(&engine->sched_engine->lock, flags);
396 rcu_read_lock();
397 list_for_each_entry(pos, &engine->sched_engine->requests, sched.link) {
398 if (!__i915_request_is_complete(rq: pos)) {
399 rq = pos;
400 break;
401 }
402 }
403 rcu_read_unlock();
404
405 /*
406 * The guilty request will get skipped on a hung engine.
407 *
408 * Users of client default contexts do not rely on logical
409 * state preserved between batches so it is safe to execute
410 * queued requests following the hang. Non default contexts
411 * rely on preserved state, so skipping a batch loses the
412 * evolution of the state and it needs to be considered corrupted.
413 * Executing more queued batches on top of corrupted state is
414 * risky. But we take the risk by trying to advance through
415 * the queued requests in order to make the client behaviour
416 * more predictable around resets, by not throwing away random
417 * amount of batches it has prepared for execution. Sophisticated
418 * clients can use gem_reset_stats_ioctl and dma fence status
419 * (exported via sync_file info ioctl on explicit fences) to observe
420 * when it loses the context state and should rebuild accordingly.
421 *
422 * The context ban, and ultimately the client ban, mechanism are safety
423 * valves if client submission ends up resulting in nothing more than
424 * subsequent hangs.
425 */
426
427 if (rq) {
428 /*
429 * Try to restore the logical GPU state to match the
430 * continuation of the request queue. If we skip the
431 * context/PD restore, then the next request may try to execute
432 * assuming that its context is valid and loaded on the GPU and
433 * so may try to access invalid memory, prompting repeated GPU
434 * hangs.
435 *
436 * If the request was guilty, we still restore the logical
437 * state in case the next request requires it (e.g. the
438 * aliasing ppgtt), but skip over the hung batch.
439 *
440 * If the request was innocent, we try to replay the request
441 * with the restored context.
442 */
443 __i915_request_reset(rq, guilty: stalled);
444
445 GEM_BUG_ON(rq->ring != engine->legacy.ring);
446 head = rq->head;
447 } else {
448 head = engine->legacy.ring->tail;
449 }
450 engine->legacy.ring->head = intel_ring_wrap(ring: engine->legacy.ring, pos: head);
451
452 spin_unlock_irqrestore(lock: &engine->sched_engine->lock, flags);
453}
454
455static void reset_finish(struct intel_engine_cs *engine)
456{
457}
458
459static void reset_cancel(struct intel_engine_cs *engine)
460{
461 struct i915_request *request;
462 unsigned long flags;
463
464 spin_lock_irqsave(&engine->sched_engine->lock, flags);
465
466 /* Mark all submitted requests as skipped. */
467 list_for_each_entry(request, &engine->sched_engine->requests, sched.link)
468 i915_request_put(rq: i915_request_mark_eio(rq: request));
469 intel_engine_signal_breadcrumbs(engine);
470
471 /* Remaining _unready_ requests will be nop'ed when submitted */
472
473 spin_unlock_irqrestore(lock: &engine->sched_engine->lock, flags);
474}
475
476static void i9xx_submit_request(struct i915_request *request)
477{
478 i915_request_submit(request);
479 wmb(); /* paranoid flush writes out of the WCB before mmio */
480
481 ENGINE_WRITE(request->engine, RING_TAIL,
482 intel_ring_set_tail(request->ring, request->tail));
483}
484
485static void __ring_context_fini(struct intel_context *ce)
486{
487 i915_vma_put(vma: ce->state);
488}
489
490static void ring_context_destroy(struct kref *ref)
491{
492 struct intel_context *ce = container_of(ref, typeof(*ce), ref);
493
494 GEM_BUG_ON(intel_context_is_pinned(ce));
495
496 if (ce->state)
497 __ring_context_fini(ce);
498
499 intel_context_fini(ce);
500 intel_context_free(ce);
501}
502
503static int ring_context_init_default_state(struct intel_context *ce,
504 struct i915_gem_ww_ctx *ww)
505{
506 struct drm_i915_gem_object *obj = ce->state->obj;
507 void *vaddr;
508
509 vaddr = i915_gem_object_pin_map(obj, type: I915_MAP_WB);
510 if (IS_ERR(ptr: vaddr))
511 return PTR_ERR(ptr: vaddr);
512
513 shmem_read(file: ce->default_state, off: 0, dst: vaddr, len: ce->engine->context_size);
514
515 i915_gem_object_flush_map(obj);
516 __i915_gem_object_release_map(obj);
517
518 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
519 return 0;
520}
521
522static int ring_context_pre_pin(struct intel_context *ce,
523 struct i915_gem_ww_ctx *ww,
524 void **unused)
525{
526 struct i915_address_space *vm;
527 int err = 0;
528
529 if (ce->default_state &&
530 !test_bit(CONTEXT_VALID_BIT, &ce->flags)) {
531 err = ring_context_init_default_state(ce, ww);
532 if (err)
533 return err;
534 }
535
536 vm = vm_alias(vm: ce->vm);
537 if (vm)
538 err = gen6_ppgtt_pin(base: i915_vm_to_ppgtt((vm)), ww);
539
540 return err;
541}
542
543static void __context_unpin_ppgtt(struct intel_context *ce)
544{
545 struct i915_address_space *vm;
546
547 vm = vm_alias(vm: ce->vm);
548 if (vm)
549 gen6_ppgtt_unpin(base: i915_vm_to_ppgtt(vm));
550}
551
552static void ring_context_unpin(struct intel_context *ce)
553{
554}
555
556static void ring_context_post_unpin(struct intel_context *ce)
557{
558 __context_unpin_ppgtt(ce);
559}
560
561static struct i915_vma *
562alloc_context_vma(struct intel_engine_cs *engine)
563{
564 struct drm_i915_private *i915 = engine->i915;
565 struct drm_i915_gem_object *obj;
566 struct i915_vma *vma;
567 int err;
568
569 obj = i915_gem_object_create_shmem(i915, size: engine->context_size);
570 if (IS_ERR(ptr: obj))
571 return ERR_CAST(ptr: obj);
572
573 /*
574 * Try to make the context utilize L3 as well as LLC.
575 *
576 * On VLV we don't have L3 controls in the PTEs so we
577 * shouldn't touch the cache level, especially as that
578 * would make the object snooped which might have a
579 * negative performance impact.
580 *
581 * Snooping is required on non-llc platforms in execlist
582 * mode, but since all GGTT accesses use PAT entry 0 we
583 * get snooping anyway regardless of cache_level.
584 *
585 * This is only applicable for Ivy Bridge devices since
586 * later platforms don't have L3 control bits in the PTE.
587 */
588 if (IS_IVYBRIDGE(i915))
589 i915_gem_object_set_cache_coherency(obj, cache_level: I915_CACHE_L3_LLC);
590
591 vma = i915_vma_instance(obj, vm: &engine->gt->ggtt->vm, NULL);
592 if (IS_ERR(ptr: vma)) {
593 err = PTR_ERR(ptr: vma);
594 goto err_obj;
595 }
596
597 return vma;
598
599err_obj:
600 i915_gem_object_put(obj);
601 return ERR_PTR(error: err);
602}
603
604static int ring_context_alloc(struct intel_context *ce)
605{
606 struct intel_engine_cs *engine = ce->engine;
607
608 if (!intel_context_has_own_state(ce))
609 ce->default_state = engine->default_state;
610
611 /* One ringbuffer to rule them all */
612 GEM_BUG_ON(!engine->legacy.ring);
613 ce->ring = engine->legacy.ring;
614
615 GEM_BUG_ON(ce->state);
616 if (engine->context_size) {
617 struct i915_vma *vma;
618
619 vma = alloc_context_vma(engine);
620 if (IS_ERR(ptr: vma))
621 return PTR_ERR(ptr: vma);
622
623 ce->state = vma;
624 }
625
626 ce->timeline = intel_timeline_get(timeline: engine->legacy.timeline);
627
628 return 0;
629}
630
631static int ring_context_pin(struct intel_context *ce, void *unused)
632{
633 return 0;
634}
635
636static void ring_context_reset(struct intel_context *ce)
637{
638 intel_ring_reset(ring: ce->ring, tail: ce->ring->emit);
639 clear_bit(CONTEXT_VALID_BIT, addr: &ce->flags);
640}
641
642static void ring_context_revoke(struct intel_context *ce,
643 struct i915_request *rq,
644 unsigned int preempt_timeout_ms)
645{
646 struct intel_engine_cs *engine;
647
648 if (!rq || !i915_request_is_active(rq))
649 return;
650
651 engine = rq->engine;
652 lockdep_assert_held(&engine->sched_engine->lock);
653 list_for_each_entry_continue(rq, &engine->sched_engine->requests,
654 sched.link)
655 if (rq->context == ce) {
656 i915_request_set_error_once(rq, error: -EIO);
657 __i915_request_skip(rq);
658 }
659}
660
661static void ring_context_cancel_request(struct intel_context *ce,
662 struct i915_request *rq)
663{
664 struct intel_engine_cs *engine = NULL;
665
666 i915_request_active_engine(rq, active: &engine);
667
668 if (engine && intel_engine_pulse(engine))
669 intel_gt_handle_error(gt: engine->gt, engine_mask: engine->mask, flags: 0,
670 fmt: "request cancellation by %s",
671 current->comm);
672}
673
674static const struct intel_context_ops ring_context_ops = {
675 .alloc = ring_context_alloc,
676
677 .cancel_request = ring_context_cancel_request,
678
679 .revoke = ring_context_revoke,
680
681 .pre_pin = ring_context_pre_pin,
682 .pin = ring_context_pin,
683 .unpin = ring_context_unpin,
684 .post_unpin = ring_context_post_unpin,
685
686 .enter = intel_context_enter_engine,
687 .exit = intel_context_exit_engine,
688
689 .reset = ring_context_reset,
690 .destroy = ring_context_destroy,
691};
692
693static int load_pd_dir(struct i915_request *rq,
694 struct i915_address_space *vm,
695 u32 valid)
696{
697 const struct intel_engine_cs * const engine = rq->engine;
698 u32 *cs;
699
700 cs = intel_ring_begin(rq, num_dwords: 12);
701 if (IS_ERR(ptr: cs))
702 return PTR_ERR(ptr: cs);
703
704 *cs++ = MI_LOAD_REGISTER_IMM(1);
705 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
706 *cs++ = valid;
707
708 *cs++ = MI_LOAD_REGISTER_IMM(1);
709 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
710 *cs++ = pp_dir(vm);
711
712 /* Stall until the page table load is complete? */
713 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
714 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
715 *cs++ = intel_gt_scratch_offset(gt: engine->gt,
716 field: INTEL_GT_SCRATCH_FIELD_DEFAULT);
717
718 *cs++ = MI_LOAD_REGISTER_IMM(1);
719 *cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
720 *cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
721
722 intel_ring_advance(rq, cs);
723
724 return rq->engine->emit_flush(rq, EMIT_FLUSH);
725}
726
727static int mi_set_context(struct i915_request *rq,
728 struct intel_context *ce,
729 u32 flags)
730{
731 struct intel_engine_cs *engine = rq->engine;
732 struct drm_i915_private *i915 = engine->i915;
733 enum intel_engine_id id;
734 const int num_engines =
735 IS_HASWELL(i915) ? engine->gt->info.num_engines - 1 : 0;
736 bool force_restore = false;
737 int len;
738 u32 *cs;
739
740 len = 4;
741 if (GRAPHICS_VER(i915) == 7)
742 len += 2 + (num_engines ? 4 * num_engines + 6 : 0);
743 else if (GRAPHICS_VER(i915) == 5)
744 len += 2;
745 if (flags & MI_FORCE_RESTORE) {
746 GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
747 flags &= ~MI_FORCE_RESTORE;
748 force_restore = true;
749 len += 2;
750 }
751
752 cs = intel_ring_begin(rq, num_dwords: len);
753 if (IS_ERR(ptr: cs))
754 return PTR_ERR(ptr: cs);
755
756 /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
757 if (GRAPHICS_VER(i915) == 7) {
758 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
759 if (num_engines) {
760 struct intel_engine_cs *signaller;
761
762 *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
763 for_each_engine(signaller, engine->gt, id) {
764 if (signaller == engine)
765 continue;
766
767 *cs++ = i915_mmio_reg_offset(
768 RING_PSMI_CTL(signaller->mmio_base));
769 *cs++ = _MASKED_BIT_ENABLE(
770 GEN6_PSMI_SLEEP_MSG_DISABLE);
771 }
772 }
773 } else if (GRAPHICS_VER(i915) == 5) {
774 /*
775 * This w/a is only listed for pre-production ilk a/b steppings,
776 * but is also mentioned for programming the powerctx. To be
777 * safe, just apply the workaround; we do not use SyncFlush so
778 * this should never take effect and so be a no-op!
779 */
780 *cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
781 }
782
783 if (force_restore) {
784 /*
785 * The HW doesn't handle being told to restore the current
786 * context very well. Quite often it likes goes to go off and
787 * sulk, especially when it is meant to be reloading PP_DIR.
788 * A very simple fix to force the reload is to simply switch
789 * away from the current context and back again.
790 *
791 * Note that the kernel_context will contain random state
792 * following the INHIBIT_RESTORE. We accept this since we
793 * never use the kernel_context state; it is merely a
794 * placeholder we use to flush other contexts.
795 */
796 *cs++ = MI_SET_CONTEXT;
797 *cs++ = i915_ggtt_offset(vma: engine->kernel_context->state) |
798 MI_MM_SPACE_GTT |
799 MI_RESTORE_INHIBIT;
800 }
801
802 *cs++ = MI_NOOP;
803 *cs++ = MI_SET_CONTEXT;
804 *cs++ = i915_ggtt_offset(vma: ce->state) | flags;
805 /*
806 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
807 * WaMiSetContext_Hang:snb,ivb,vlv
808 */
809 *cs++ = MI_NOOP;
810
811 if (GRAPHICS_VER(i915) == 7) {
812 if (num_engines) {
813 struct intel_engine_cs *signaller;
814 i915_reg_t last_reg = INVALID_MMIO_REG; /* keep gcc quiet */
815
816 *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
817 for_each_engine(signaller, engine->gt, id) {
818 if (signaller == engine)
819 continue;
820
821 last_reg = RING_PSMI_CTL(signaller->mmio_base);
822 *cs++ = i915_mmio_reg_offset(last_reg);
823 *cs++ = _MASKED_BIT_DISABLE(
824 GEN6_PSMI_SLEEP_MSG_DISABLE);
825 }
826
827 /* Insert a delay before the next switch! */
828 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
829 *cs++ = i915_mmio_reg_offset(last_reg);
830 *cs++ = intel_gt_scratch_offset(gt: engine->gt,
831 field: INTEL_GT_SCRATCH_FIELD_DEFAULT);
832 *cs++ = MI_NOOP;
833 }
834 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
835 } else if (GRAPHICS_VER(i915) == 5) {
836 *cs++ = MI_SUSPEND_FLUSH;
837 }
838
839 intel_ring_advance(rq, cs);
840
841 return 0;
842}
843
844static int remap_l3_slice(struct i915_request *rq, int slice)
845{
846#define L3LOG_DW (GEN7_L3LOG_SIZE / sizeof(u32))
847 u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
848 int i;
849
850 if (!remap_info)
851 return 0;
852
853 cs = intel_ring_begin(rq, L3LOG_DW * 2 + 2);
854 if (IS_ERR(ptr: cs))
855 return PTR_ERR(ptr: cs);
856
857 /*
858 * Note: We do not worry about the concurrent register cacheline hang
859 * here because no other code should access these registers other than
860 * at initialization time.
861 */
862 *cs++ = MI_LOAD_REGISTER_IMM(L3LOG_DW);
863 for (i = 0; i < L3LOG_DW; i++) {
864 *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
865 *cs++ = remap_info[i];
866 }
867 *cs++ = MI_NOOP;
868 intel_ring_advance(rq, cs);
869
870 return 0;
871#undef L3LOG_DW
872}
873
874static int remap_l3(struct i915_request *rq)
875{
876 struct i915_gem_context *ctx = i915_request_gem_context(rq);
877 int i, err;
878
879 if (!ctx || !ctx->remap_slice)
880 return 0;
881
882 for (i = 0; i < MAX_L3_SLICES; i++) {
883 if (!(ctx->remap_slice & BIT(i)))
884 continue;
885
886 err = remap_l3_slice(rq, slice: i);
887 if (err)
888 return err;
889 }
890
891 ctx->remap_slice = 0;
892 return 0;
893}
894
895static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
896{
897 int ret;
898
899 if (!vm)
900 return 0;
901
902 ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
903 if (ret)
904 return ret;
905
906 /*
907 * Not only do we need a full barrier (post-sync write) after
908 * invalidating the TLBs, but we need to wait a little bit
909 * longer. Whether this is merely delaying us, or the
910 * subsequent flush is a key part of serialising with the
911 * post-sync op, this extra pass appears vital before a
912 * mm switch!
913 */
914 ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G);
915 if (ret)
916 return ret;
917
918 return rq->engine->emit_flush(rq, EMIT_INVALIDATE);
919}
920
921static int clear_residuals(struct i915_request *rq)
922{
923 struct intel_engine_cs *engine = rq->engine;
924 int ret;
925
926 ret = switch_mm(rq, vm: vm_alias(vm: engine->kernel_context->vm));
927 if (ret)
928 return ret;
929
930 if (engine->kernel_context->state) {
931 ret = mi_set_context(rq,
932 ce: engine->kernel_context,
933 MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT);
934 if (ret)
935 return ret;
936 }
937
938 ret = engine->emit_bb_start(rq,
939 i915_vma_offset(vma: engine->wa_ctx.vma), 0,
940 0);
941 if (ret)
942 return ret;
943
944 ret = engine->emit_flush(rq, EMIT_FLUSH);
945 if (ret)
946 return ret;
947
948 /* Always invalidate before the next switch_mm() */
949 return engine->emit_flush(rq, EMIT_INVALIDATE);
950}
951
952static int switch_context(struct i915_request *rq)
953{
954 struct intel_engine_cs *engine = rq->engine;
955 struct intel_context *ce = rq->context;
956 void **residuals = NULL;
957 int ret;
958
959 GEM_BUG_ON(HAS_EXECLISTS(engine->i915));
960
961 if (engine->wa_ctx.vma && ce != engine->kernel_context) {
962 if (engine->wa_ctx.vma->private != ce &&
963 i915_mitigate_clear_residuals()) {
964 ret = clear_residuals(rq);
965 if (ret)
966 return ret;
967
968 residuals = &engine->wa_ctx.vma->private;
969 }
970 }
971
972 ret = switch_mm(rq, vm: vm_alias(vm: ce->vm));
973 if (ret)
974 return ret;
975
976 if (ce->state) {
977 u32 flags;
978
979 GEM_BUG_ON(engine->id != RCS0);
980
981 /* For resource streamer on HSW+ and power context elsewhere */
982 BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN);
983 BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN);
984
985 flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT;
986 if (test_bit(CONTEXT_VALID_BIT, &ce->flags))
987 flags |= MI_RESTORE_EXT_STATE_EN;
988 else
989 flags |= MI_RESTORE_INHIBIT;
990
991 ret = mi_set_context(rq, ce, flags);
992 if (ret)
993 return ret;
994 }
995
996 ret = remap_l3(rq);
997 if (ret)
998 return ret;
999
1000 /*
1001 * Now past the point of no return, this request _will_ be emitted.
1002 *
1003 * Or at least this preamble will be emitted, the request may be
1004 * interrupted prior to submitting the user payload. If so, we
1005 * still submit the "empty" request in order to preserve global
1006 * state tracking such as this, our tracking of the current
1007 * dirty context.
1008 */
1009 if (residuals) {
1010 intel_context_put(ce: *residuals);
1011 *residuals = intel_context_get(ce);
1012 }
1013
1014 return 0;
1015}
1016
1017static int ring_request_alloc(struct i915_request *request)
1018{
1019 int ret;
1020
1021 GEM_BUG_ON(!intel_context_is_pinned(request->context));
1022 GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb);
1023
1024 /*
1025 * Flush enough space to reduce the likelihood of waiting after
1026 * we start building the request - in which case we will just
1027 * have to repeat work.
1028 */
1029 request->reserved_space += LEGACY_REQUEST_SIZE;
1030
1031 /* Unconditionally invalidate GPU caches and TLBs. */
1032 ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
1033 if (ret)
1034 return ret;
1035
1036 ret = switch_context(rq: request);
1037 if (ret)
1038 return ret;
1039
1040 request->reserved_space -= LEGACY_REQUEST_SIZE;
1041 return 0;
1042}
1043
1044static void gen6_bsd_submit_request(struct i915_request *request)
1045{
1046 struct intel_uncore *uncore = request->engine->uncore;
1047
1048 intel_uncore_forcewake_get(uncore, domains: FORCEWAKE_ALL);
1049
1050 /* Every tail move must follow the sequence below */
1051
1052 /* Disable notification that the ring is IDLE. The GT
1053 * will then assume that it is busy and bring it out of rc6.
1054 */
1055 intel_uncore_write_fw(uncore, RING_PSMI_CTL(GEN6_BSD_RING_BASE),
1056 _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
1057
1058 /* Clear the context id. Here be magic! */
1059 intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0);
1060
1061 /* Wait for the ring not to be idle, i.e. for it to wake up. */
1062 if (__intel_wait_for_register_fw(uncore,
1063 RING_PSMI_CTL(GEN6_BSD_RING_BASE),
1064 GEN6_BSD_SLEEP_INDICATOR,
1065 value: 0,
1066 fast_timeout_us: 1000, slow_timeout_ms: 0, NULL))
1067 drm_err(&uncore->i915->drm,
1068 "timed out waiting for the BSD ring to wake up\n");
1069
1070 /* Now that the ring is fully powered up, update the tail */
1071 i9xx_submit_request(request);
1072
1073 /* Let the ring send IDLE messages to the GT again,
1074 * and so let it sleep to conserve power when idle.
1075 */
1076 intel_uncore_write_fw(uncore, RING_PSMI_CTL(GEN6_BSD_RING_BASE),
1077 _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
1078
1079 intel_uncore_forcewake_put(uncore, domains: FORCEWAKE_ALL);
1080}
1081
1082static void i9xx_set_default_submission(struct intel_engine_cs *engine)
1083{
1084 engine->submit_request = i9xx_submit_request;
1085}
1086
1087static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
1088{
1089 engine->submit_request = gen6_bsd_submit_request;
1090}
1091
1092static void ring_release(struct intel_engine_cs *engine)
1093{
1094 struct drm_i915_private *i915 = engine->i915;
1095
1096 drm_WARN_ON(&i915->drm, GRAPHICS_VER(i915) > 2 &&
1097 (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
1098
1099 intel_engine_cleanup_common(engine);
1100
1101 if (engine->wa_ctx.vma) {
1102 intel_context_put(ce: engine->wa_ctx.vma->private);
1103 i915_vma_unpin_and_release(p_vma: &engine->wa_ctx.vma, flags: 0);
1104 }
1105
1106 intel_ring_unpin(ring: engine->legacy.ring);
1107 intel_ring_put(ring: engine->legacy.ring);
1108
1109 intel_timeline_unpin(tl: engine->legacy.timeline);
1110 intel_timeline_put(timeline: engine->legacy.timeline);
1111}
1112
1113static void irq_handler(struct intel_engine_cs *engine, u16 iir)
1114{
1115 intel_engine_signal_breadcrumbs(engine);
1116}
1117
1118static void setup_irq(struct intel_engine_cs *engine)
1119{
1120 struct drm_i915_private *i915 = engine->i915;
1121
1122 intel_engine_set_irq_handler(engine, fn: irq_handler);
1123
1124 if (GRAPHICS_VER(i915) >= 6) {
1125 engine->irq_enable = gen6_irq_enable;
1126 engine->irq_disable = gen6_irq_disable;
1127 } else if (GRAPHICS_VER(i915) >= 5) {
1128 engine->irq_enable = gen5_irq_enable;
1129 engine->irq_disable = gen5_irq_disable;
1130 } else {
1131 engine->irq_enable = gen2_irq_enable;
1132 engine->irq_disable = gen2_irq_disable;
1133 }
1134}
1135
1136static void add_to_engine(struct i915_request *rq)
1137{
1138 lockdep_assert_held(&rq->engine->sched_engine->lock);
1139 list_move_tail(list: &rq->sched.link, head: &rq->engine->sched_engine->requests);
1140}
1141
1142static void remove_from_engine(struct i915_request *rq)
1143{
1144 spin_lock_irq(lock: &rq->engine->sched_engine->lock);
1145 list_del_init(entry: &rq->sched.link);
1146
1147 /* Prevent further __await_execution() registering a cb, then flush */
1148 set_bit(nr: I915_FENCE_FLAG_ACTIVE, addr: &rq->fence.flags);
1149
1150 spin_unlock_irq(lock: &rq->engine->sched_engine->lock);
1151
1152 i915_request_notify_execute_cb_imm(rq);
1153}
1154
1155static void setup_common(struct intel_engine_cs *engine)
1156{
1157 struct drm_i915_private *i915 = engine->i915;
1158
1159 /* gen8+ are only supported with execlists */
1160 GEM_BUG_ON(GRAPHICS_VER(i915) >= 8);
1161
1162 setup_irq(engine);
1163
1164 engine->resume = xcs_resume;
1165 engine->sanitize = xcs_sanitize;
1166
1167 engine->reset.prepare = reset_prepare;
1168 engine->reset.rewind = reset_rewind;
1169 engine->reset.cancel = reset_cancel;
1170 engine->reset.finish = reset_finish;
1171
1172 engine->add_active_request = add_to_engine;
1173 engine->remove_active_request = remove_from_engine;
1174
1175 engine->cops = &ring_context_ops;
1176 engine->request_alloc = ring_request_alloc;
1177
1178 /*
1179 * Using a global execution timeline; the previous final breadcrumb is
1180 * equivalent to our next initial bread so we can elide
1181 * engine->emit_init_breadcrumb().
1182 */
1183 engine->emit_fini_breadcrumb = gen2_emit_breadcrumb;
1184 if (GRAPHICS_VER(i915) == 5)
1185 engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
1186
1187 engine->set_default_submission = i9xx_set_default_submission;
1188
1189 if (GRAPHICS_VER(i915) >= 6)
1190 engine->emit_bb_start = gen6_emit_bb_start;
1191 else if (GRAPHICS_VER(i915) >= 4)
1192 engine->emit_bb_start = gen4_emit_bb_start;
1193 else if (IS_I830(i915) || IS_I845G(i915))
1194 engine->emit_bb_start = i830_emit_bb_start;
1195 else
1196 engine->emit_bb_start = gen2_emit_bb_start;
1197}
1198
1199static void setup_rcs(struct intel_engine_cs *engine)
1200{
1201 struct drm_i915_private *i915 = engine->i915;
1202
1203 if (HAS_L3_DPF(i915))
1204 engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
1205
1206 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
1207
1208 if (GRAPHICS_VER(i915) >= 7) {
1209 engine->emit_flush = gen7_emit_flush_rcs;
1210 engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_rcs;
1211 } else if (GRAPHICS_VER(i915) == 6) {
1212 engine->emit_flush = gen6_emit_flush_rcs;
1213 engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_rcs;
1214 } else if (GRAPHICS_VER(i915) == 5) {
1215 engine->emit_flush = gen4_emit_flush_rcs;
1216 } else {
1217 if (GRAPHICS_VER(i915) < 4)
1218 engine->emit_flush = gen2_emit_flush;
1219 else
1220 engine->emit_flush = gen4_emit_flush_rcs;
1221 engine->irq_enable_mask = I915_USER_INTERRUPT;
1222 }
1223
1224 if (IS_HASWELL(i915))
1225 engine->emit_bb_start = hsw_emit_bb_start;
1226}
1227
1228static void setup_vcs(struct intel_engine_cs *engine)
1229{
1230 struct drm_i915_private *i915 = engine->i915;
1231
1232 if (GRAPHICS_VER(i915) >= 6) {
1233 /* gen6 bsd needs a special wa for tail updates */
1234 if (GRAPHICS_VER(i915) == 6)
1235 engine->set_default_submission = gen6_bsd_set_default_submission;
1236 engine->emit_flush = gen6_emit_flush_vcs;
1237 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
1238
1239 if (GRAPHICS_VER(i915) == 6)
1240 engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
1241 else
1242 engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
1243 } else {
1244 engine->emit_flush = gen4_emit_flush_vcs;
1245 if (GRAPHICS_VER(i915) == 5)
1246 engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
1247 else
1248 engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
1249 }
1250}
1251
1252static void setup_bcs(struct intel_engine_cs *engine)
1253{
1254 struct drm_i915_private *i915 = engine->i915;
1255
1256 engine->emit_flush = gen6_emit_flush_xcs;
1257 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
1258
1259 if (GRAPHICS_VER(i915) == 6)
1260 engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
1261 else
1262 engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
1263}
1264
1265static void setup_vecs(struct intel_engine_cs *engine)
1266{
1267 struct drm_i915_private *i915 = engine->i915;
1268
1269 GEM_BUG_ON(GRAPHICS_VER(i915) < 7);
1270
1271 engine->emit_flush = gen6_emit_flush_xcs;
1272 engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
1273 engine->irq_enable = hsw_irq_enable_vecs;
1274 engine->irq_disable = hsw_irq_disable_vecs;
1275
1276 engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
1277}
1278
1279static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine,
1280 struct i915_vma * const vma)
1281{
1282 return gen7_setup_clear_gpr_bb(engine, vma);
1283}
1284
1285static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine,
1286 struct i915_gem_ww_ctx *ww,
1287 struct i915_vma *vma)
1288{
1289 int err;
1290
1291 err = i915_vma_pin_ww(vma, ww, size: 0, alignment: 0, PIN_USER | PIN_HIGH);
1292 if (err)
1293 return err;
1294
1295 err = i915_vma_sync(vma);
1296 if (err)
1297 goto err_unpin;
1298
1299 err = gen7_ctx_switch_bb_setup(engine, vma);
1300 if (err)
1301 goto err_unpin;
1302
1303 engine->wa_ctx.vma = vma;
1304 return 0;
1305
1306err_unpin:
1307 i915_vma_unpin(vma);
1308 return err;
1309}
1310
1311static struct i915_vma *gen7_ctx_vma(struct intel_engine_cs *engine)
1312{
1313 struct drm_i915_gem_object *obj;
1314 struct i915_vma *vma;
1315 int size, err;
1316
1317 if (GRAPHICS_VER(engine->i915) != 7 || engine->class != RENDER_CLASS)
1318 return NULL;
1319
1320 err = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */);
1321 if (err < 0)
1322 return ERR_PTR(error: err);
1323 if (!err)
1324 return NULL;
1325
1326 size = ALIGN(err, PAGE_SIZE);
1327
1328 obj = i915_gem_object_create_internal(i915: engine->i915, size);
1329 if (IS_ERR(ptr: obj))
1330 return ERR_CAST(ptr: obj);
1331
1332 vma = i915_vma_instance(obj, vm: engine->gt->vm, NULL);
1333 if (IS_ERR(ptr: vma)) {
1334 i915_gem_object_put(obj);
1335 return ERR_CAST(ptr: vma);
1336 }
1337
1338 vma->private = intel_context_create(engine); /* dummy residuals */
1339 if (IS_ERR(ptr: vma->private)) {
1340 err = PTR_ERR(ptr: vma->private);
1341 vma->private = NULL;
1342 i915_gem_object_put(obj);
1343 return ERR_PTR(error: err);
1344 }
1345
1346 return vma;
1347}
1348
1349int intel_ring_submission_setup(struct intel_engine_cs *engine)
1350{
1351 struct i915_gem_ww_ctx ww;
1352 struct intel_timeline *timeline;
1353 struct intel_ring *ring;
1354 struct i915_vma *gen7_wa_vma;
1355 int err;
1356
1357 setup_common(engine);
1358
1359 switch (engine->class) {
1360 case RENDER_CLASS:
1361 setup_rcs(engine);
1362 break;
1363 case VIDEO_DECODE_CLASS:
1364 setup_vcs(engine);
1365 break;
1366 case COPY_ENGINE_CLASS:
1367 setup_bcs(engine);
1368 break;
1369 case VIDEO_ENHANCEMENT_CLASS:
1370 setup_vecs(engine);
1371 break;
1372 default:
1373 MISSING_CASE(engine->class);
1374 return -ENODEV;
1375 }
1376
1377 timeline = intel_timeline_create_from_engine(engine,
1378 I915_GEM_HWS_SEQNO_ADDR);
1379 if (IS_ERR(ptr: timeline)) {
1380 err = PTR_ERR(ptr: timeline);
1381 goto err;
1382 }
1383 GEM_BUG_ON(timeline->has_initial_breadcrumb);
1384
1385 ring = intel_engine_create_ring(engine, SZ_16K);
1386 if (IS_ERR(ptr: ring)) {
1387 err = PTR_ERR(ptr: ring);
1388 goto err_timeline;
1389 }
1390
1391 GEM_BUG_ON(engine->legacy.ring);
1392 engine->legacy.ring = ring;
1393 engine->legacy.timeline = timeline;
1394
1395 gen7_wa_vma = gen7_ctx_vma(engine);
1396 if (IS_ERR(ptr: gen7_wa_vma)) {
1397 err = PTR_ERR(ptr: gen7_wa_vma);
1398 goto err_ring;
1399 }
1400
1401 i915_gem_ww_ctx_init(ctx: &ww, intr: false);
1402
1403retry:
1404 err = i915_gem_object_lock(obj: timeline->hwsp_ggtt->obj, ww: &ww);
1405 if (!err && gen7_wa_vma)
1406 err = i915_gem_object_lock(obj: gen7_wa_vma->obj, ww: &ww);
1407 if (!err)
1408 err = i915_gem_object_lock(obj: engine->legacy.ring->vma->obj, ww: &ww);
1409 if (!err)
1410 err = intel_timeline_pin(tl: timeline, ww: &ww);
1411 if (!err) {
1412 err = intel_ring_pin(ring, ww: &ww);
1413 if (err)
1414 intel_timeline_unpin(tl: timeline);
1415 }
1416 if (err)
1417 goto out;
1418
1419 GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
1420
1421 if (gen7_wa_vma) {
1422 err = gen7_ctx_switch_bb_init(engine, ww: &ww, vma: gen7_wa_vma);
1423 if (err) {
1424 intel_ring_unpin(ring);
1425 intel_timeline_unpin(tl: timeline);
1426 }
1427 }
1428
1429out:
1430 if (err == -EDEADLK) {
1431 err = i915_gem_ww_ctx_backoff(ctx: &ww);
1432 if (!err)
1433 goto retry;
1434 }
1435 i915_gem_ww_ctx_fini(ctx: &ww);
1436 if (err)
1437 goto err_gen7_put;
1438
1439 /* Finally, take ownership and responsibility for cleanup! */
1440 engine->release = ring_release;
1441
1442 return 0;
1443
1444err_gen7_put:
1445 if (gen7_wa_vma) {
1446 intel_context_put(ce: gen7_wa_vma->private);
1447 i915_gem_object_put(obj: gen7_wa_vma->obj);
1448 }
1449err_ring:
1450 intel_ring_put(ring);
1451err_timeline:
1452 intel_timeline_put(timeline);
1453err:
1454 intel_engine_cleanup_common(engine);
1455 return err;
1456}
1457
1458#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1459#include "selftest_ring_submission.c"
1460#endif
1461