| 1 | // SPDX-License-Identifier: MIT | 
|---|
| 2 | /* | 
|---|
| 3 | * Copyright © 2008-2021 Intel Corporation | 
|---|
| 4 | */ | 
|---|
| 5 |  | 
|---|
| 6 | #include <drm/drm_cache.h> | 
|---|
| 7 |  | 
|---|
| 8 | #include "gem/i915_gem_internal.h" | 
|---|
| 9 |  | 
|---|
| 10 | #include "gen2_engine_cs.h" | 
|---|
| 11 | #include "gen6_engine_cs.h" | 
|---|
| 12 | #include "gen6_ppgtt.h" | 
|---|
| 13 | #include "gen7_renderclear.h" | 
|---|
| 14 | #include "i915_drv.h" | 
|---|
| 15 | #include "i915_irq.h" | 
|---|
| 16 | #include "i915_mitigations.h" | 
|---|
| 17 | #include "i915_reg.h" | 
|---|
| 18 | #include "i915_wait_util.h" | 
|---|
| 19 | #include "intel_breadcrumbs.h" | 
|---|
| 20 | #include "intel_context.h" | 
|---|
| 21 | #include "intel_engine_heartbeat.h" | 
|---|
| 22 | #include "intel_engine_pm.h" | 
|---|
| 23 | #include "intel_engine_regs.h" | 
|---|
| 24 | #include "intel_gt.h" | 
|---|
| 25 | #include "intel_gt_irq.h" | 
|---|
| 26 | #include "intel_gt_print.h" | 
|---|
| 27 | #include "intel_gt_regs.h" | 
|---|
| 28 | #include "intel_reset.h" | 
|---|
| 29 | #include "intel_ring.h" | 
|---|
| 30 | #include "shmem_utils.h" | 
|---|
| 31 |  | 
|---|
| 32 | /* Rough estimate of the typical request size, performing a flush, | 
|---|
| 33 | * set-context and then emitting the batch. | 
|---|
| 34 | */ | 
|---|
| 35 | #define LEGACY_REQUEST_SIZE 200 | 
|---|
| 36 |  | 
|---|
| 37 | static void set_hwstam(struct intel_engine_cs *engine, u32 mask) | 
|---|
| 38 | { | 
|---|
| 39 | /* | 
|---|
| 40 | * Keep the render interrupt unmasked as this papers over | 
|---|
| 41 | * lost interrupts following a reset. | 
|---|
| 42 | */ | 
|---|
| 43 | if (engine->class == RENDER_CLASS) { | 
|---|
| 44 | if (GRAPHICS_VER(engine->i915) >= 6) | 
|---|
| 45 | mask &= ~BIT(0); | 
|---|
| 46 | else | 
|---|
| 47 | mask &= ~I915_USER_INTERRUPT; | 
|---|
| 48 | } | 
|---|
| 49 |  | 
|---|
| 50 | intel_engine_set_hwsp_writemask(engine, mask); | 
|---|
| 51 | } | 
|---|
| 52 |  | 
|---|
| 53 | static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys) | 
|---|
| 54 | { | 
|---|
| 55 | u32 addr; | 
|---|
| 56 |  | 
|---|
| 57 | addr = lower_32_bits(phys); | 
|---|
| 58 | if (GRAPHICS_VER(engine->i915) >= 4) | 
|---|
| 59 | addr |= (phys >> 28) & 0xf0; | 
|---|
| 60 |  | 
|---|
| 61 | intel_uncore_write(uncore: engine->uncore, HWS_PGA, val: addr); | 
|---|
| 62 | } | 
|---|
| 63 |  | 
|---|
| 64 | static struct page *status_page(struct intel_engine_cs *engine) | 
|---|
| 65 | { | 
|---|
| 66 | struct drm_i915_gem_object *obj = engine->status_page.vma->obj; | 
|---|
| 67 |  | 
|---|
| 68 | GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); | 
|---|
| 69 | return sg_page(sg: obj->mm.pages->sgl); | 
|---|
| 70 | } | 
|---|
| 71 |  | 
|---|
| 72 | static void ring_setup_phys_status_page(struct intel_engine_cs *engine) | 
|---|
| 73 | { | 
|---|
| 74 | set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine)))); | 
|---|
| 75 | set_hwstam(engine, mask: ~0u); | 
|---|
| 76 | } | 
|---|
| 77 |  | 
|---|
| 78 | static void set_hwsp(struct intel_engine_cs *engine, u32 offset) | 
|---|
| 79 | { | 
|---|
| 80 | i915_reg_t hwsp; | 
|---|
| 81 |  | 
|---|
| 82 | /* | 
|---|
| 83 | * The ring status page addresses are no longer next to the rest of | 
|---|
| 84 | * the ring registers as of gen7. | 
|---|
| 85 | */ | 
|---|
| 86 | if (GRAPHICS_VER(engine->i915) == 7) { | 
|---|
| 87 | switch (engine->id) { | 
|---|
| 88 | /* | 
|---|
| 89 | * No more rings exist on Gen7. Default case is only to shut up | 
|---|
| 90 | * gcc switch check warning. | 
|---|
| 91 | */ | 
|---|
| 92 | default: | 
|---|
| 93 | GEM_BUG_ON(engine->id); | 
|---|
| 94 | fallthrough; | 
|---|
| 95 | case RCS0: | 
|---|
| 96 | hwsp = RENDER_HWS_PGA_GEN7; | 
|---|
| 97 | break; | 
|---|
| 98 | case BCS0: | 
|---|
| 99 | hwsp = BLT_HWS_PGA_GEN7; | 
|---|
| 100 | break; | 
|---|
| 101 | case VCS0: | 
|---|
| 102 | hwsp = BSD_HWS_PGA_GEN7; | 
|---|
| 103 | break; | 
|---|
| 104 | case VECS0: | 
|---|
| 105 | hwsp = VEBOX_HWS_PGA_GEN7; | 
|---|
| 106 | break; | 
|---|
| 107 | } | 
|---|
| 108 | } else if (GRAPHICS_VER(engine->i915) == 6) { | 
|---|
| 109 | hwsp = RING_HWS_PGA_GEN6(engine->mmio_base); | 
|---|
| 110 | } else { | 
|---|
| 111 | hwsp = RING_HWS_PGA(engine->mmio_base); | 
|---|
| 112 | } | 
|---|
| 113 |  | 
|---|
| 114 | intel_uncore_write_fw(engine->uncore, hwsp, offset); | 
|---|
| 115 | intel_uncore_posting_read_fw(engine->uncore, hwsp); | 
|---|
| 116 | } | 
|---|
| 117 |  | 
|---|
| 118 | static void flush_cs_tlb(struct intel_engine_cs *engine) | 
|---|
| 119 | { | 
|---|
| 120 | if (!IS_GRAPHICS_VER(engine->i915, 6, 7)) | 
|---|
| 121 | return; | 
|---|
| 122 |  | 
|---|
| 123 | /* ring should be idle before issuing a sync flush*/ | 
|---|
| 124 | if ((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0) | 
|---|
| 125 | drm_warn(&engine->i915->drm, "%s not idle before sync flush!\n", | 
|---|
| 126 | engine->name); | 
|---|
| 127 |  | 
|---|
| 128 | ENGINE_WRITE_FW(engine, RING_INSTPM, | 
|---|
| 129 | _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | | 
|---|
| 130 | INSTPM_SYNC_FLUSH)); | 
|---|
| 131 | if (__intel_wait_for_register_fw(uncore: engine->uncore, | 
|---|
| 132 | RING_INSTPM(engine->mmio_base), | 
|---|
| 133 | INSTPM_SYNC_FLUSH, value: 0, | 
|---|
| 134 | fast_timeout_us: 2000, slow_timeout_ms: 0, NULL)) | 
|---|
| 135 | ENGINE_TRACE(engine, | 
|---|
| 136 | "wait for SyncFlush to complete for TLB invalidation timed out\n"); | 
|---|
| 137 | } | 
|---|
| 138 |  | 
|---|
| 139 | static void ring_setup_status_page(struct intel_engine_cs *engine) | 
|---|
| 140 | { | 
|---|
| 141 | set_hwsp(engine, offset: i915_ggtt_offset(vma: engine->status_page.vma)); | 
|---|
| 142 | set_hwstam(engine, mask: ~0u); | 
|---|
| 143 |  | 
|---|
| 144 | flush_cs_tlb(engine); | 
|---|
| 145 | } | 
|---|
| 146 |  | 
|---|
| 147 | static struct i915_address_space *vm_alias(struct i915_address_space *vm) | 
|---|
| 148 | { | 
|---|
| 149 | if (i915_is_ggtt(vm)) | 
|---|
| 150 | vm = &i915_vm_to_ggtt(vm)->alias->vm; | 
|---|
| 151 |  | 
|---|
| 152 | return vm; | 
|---|
| 153 | } | 
|---|
| 154 |  | 
|---|
| 155 | static u32 pp_dir(struct i915_address_space *vm) | 
|---|
| 156 | { | 
|---|
| 157 | return to_gen6_ppgtt(base: i915_vm_to_ppgtt(vm))->pp_dir; | 
|---|
| 158 | } | 
|---|
| 159 |  | 
|---|
| 160 | static void set_pp_dir(struct intel_engine_cs *engine) | 
|---|
| 161 | { | 
|---|
| 162 | struct i915_address_space *vm = vm_alias(vm: engine->gt->vm); | 
|---|
| 163 |  | 
|---|
| 164 | if (!vm) | 
|---|
| 165 | return; | 
|---|
| 166 |  | 
|---|
| 167 | ENGINE_WRITE_FW(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G); | 
|---|
| 168 | ENGINE_WRITE_FW(engine, RING_PP_DIR_BASE, pp_dir(vm)); | 
|---|
| 169 |  | 
|---|
| 170 | if (GRAPHICS_VER(engine->i915) >= 7) { | 
|---|
| 171 | ENGINE_WRITE_FW(engine, | 
|---|
| 172 | RING_MODE_GEN7, | 
|---|
| 173 | _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); | 
|---|
| 174 | } | 
|---|
| 175 | } | 
|---|
| 176 |  | 
|---|
| 177 | static bool stop_ring(struct intel_engine_cs *engine) | 
|---|
| 178 | { | 
|---|
| 179 | /* Empty the ring by skipping to the end */ | 
|---|
| 180 | ENGINE_WRITE_FW(engine, RING_HEAD, ENGINE_READ_FW(engine, RING_TAIL)); | 
|---|
| 181 | ENGINE_POSTING_READ(engine, RING_HEAD); | 
|---|
| 182 |  | 
|---|
| 183 | /* The ring must be empty before it is disabled */ | 
|---|
| 184 | ENGINE_WRITE_FW(engine, RING_CTL, 0); | 
|---|
| 185 | ENGINE_POSTING_READ(engine, RING_CTL); | 
|---|
| 186 |  | 
|---|
| 187 | /* Then reset the disabled ring */ | 
|---|
| 188 | ENGINE_WRITE_FW(engine, RING_HEAD, 0); | 
|---|
| 189 | ENGINE_WRITE_FW(engine, RING_TAIL, 0); | 
|---|
| 190 |  | 
|---|
| 191 | return (ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) == 0; | 
|---|
| 192 | } | 
|---|
| 193 |  | 
|---|
| 194 | static int xcs_resume(struct intel_engine_cs *engine) | 
|---|
| 195 | { | 
|---|
| 196 | struct intel_ring *ring = engine->legacy.ring; | 
|---|
| 197 | ktime_t kt; | 
|---|
| 198 |  | 
|---|
| 199 | ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n", | 
|---|
| 200 | ring->head, ring->tail); | 
|---|
| 201 |  | 
|---|
| 202 | /* | 
|---|
| 203 | * Double check the ring is empty & disabled before we resume. Called | 
|---|
| 204 | * from atomic context during PCI probe, so _hardirq(). | 
|---|
| 205 | */ | 
|---|
| 206 | intel_synchronize_hardirq(i915: engine->i915); | 
|---|
| 207 | if (!stop_ring(engine)) | 
|---|
| 208 | goto err; | 
|---|
| 209 |  | 
|---|
| 210 | if (HWS_NEEDS_PHYSICAL(engine->i915)) | 
|---|
| 211 | ring_setup_phys_status_page(engine); | 
|---|
| 212 | else | 
|---|
| 213 | ring_setup_status_page(engine); | 
|---|
| 214 |  | 
|---|
| 215 | intel_breadcrumbs_reset(b: engine->breadcrumbs); | 
|---|
| 216 |  | 
|---|
| 217 | /* Enforce ordering by reading HEAD register back */ | 
|---|
| 218 | ENGINE_POSTING_READ(engine, RING_HEAD); | 
|---|
| 219 |  | 
|---|
| 220 | /* | 
|---|
| 221 | * Initialize the ring. This must happen _after_ we've cleared the ring | 
|---|
| 222 | * registers with the above sequence (the readback of the HEAD registers | 
|---|
| 223 | * also enforces ordering), otherwise the hw might lose the new ring | 
|---|
| 224 | * register values. | 
|---|
| 225 | */ | 
|---|
| 226 | ENGINE_WRITE_FW(engine, RING_START, i915_ggtt_offset(ring->vma)); | 
|---|
| 227 |  | 
|---|
| 228 | /* Check that the ring offsets point within the ring! */ | 
|---|
| 229 | GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); | 
|---|
| 230 | GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); | 
|---|
| 231 | intel_ring_update_space(ring); | 
|---|
| 232 |  | 
|---|
| 233 | set_pp_dir(engine); | 
|---|
| 234 |  | 
|---|
| 235 | /* | 
|---|
| 236 | * First wake the ring up to an empty/idle ring. | 
|---|
| 237 | * Use 50ms of delay to let the engine write successfully | 
|---|
| 238 | * for all platforms. Experimented with different values and | 
|---|
| 239 | * determined that 50ms works best based on testing. | 
|---|
| 240 | */ | 
|---|
| 241 | for ((kt) = ktime_get() + (50 * NSEC_PER_MSEC); | 
|---|
| 242 | ktime_before(cmp1: ktime_get(), cmp2: (kt)); cpu_relax()) { | 
|---|
| 243 | /* | 
|---|
| 244 | * In case of resets fails because engine resumes from | 
|---|
| 245 | * incorrect RING_HEAD and then GPU may be then fed | 
|---|
| 246 | * to invalid instructions, which may lead to unrecoverable | 
|---|
| 247 | * hang. So at first write doesn't succeed then try again. | 
|---|
| 248 | */ | 
|---|
| 249 | ENGINE_WRITE_FW(engine, RING_HEAD, ring->head); | 
|---|
| 250 | if (ENGINE_READ_FW(engine, RING_HEAD) == ring->head) | 
|---|
| 251 | break; | 
|---|
| 252 | } | 
|---|
| 253 |  | 
|---|
| 254 | ENGINE_WRITE_FW(engine, RING_TAIL, ring->head); | 
|---|
| 255 | if (ENGINE_READ_FW(engine, RING_HEAD) != ENGINE_READ_FW(engine, RING_TAIL)) { | 
|---|
| 256 | ENGINE_TRACE(engine, "failed to reset empty ring: [%x, %x]: %x\n", | 
|---|
| 257 | ENGINE_READ_FW(engine, RING_HEAD), | 
|---|
| 258 | ENGINE_READ_FW(engine, RING_TAIL), | 
|---|
| 259 | ring->head); | 
|---|
| 260 | goto err; | 
|---|
| 261 | } | 
|---|
| 262 |  | 
|---|
| 263 | ENGINE_WRITE_FW(engine, RING_CTL, | 
|---|
| 264 | RING_CTL_SIZE(ring->size) | RING_VALID); | 
|---|
| 265 |  | 
|---|
| 266 | /* If the head is still not zero, the ring is dead */ | 
|---|
| 267 | if (__intel_wait_for_register_fw(uncore: engine->uncore, | 
|---|
| 268 | RING_CTL(engine->mmio_base), | 
|---|
| 269 | RING_VALID, RING_VALID, | 
|---|
| 270 | fast_timeout_us: 5000, slow_timeout_ms: 0, NULL)) { | 
|---|
| 271 | ENGINE_TRACE(engine, "failed to restart\n"); | 
|---|
| 272 | goto err; | 
|---|
| 273 | } | 
|---|
| 274 |  | 
|---|
| 275 | if (GRAPHICS_VER(engine->i915) > 2) { | 
|---|
| 276 | ENGINE_WRITE_FW(engine, | 
|---|
| 277 | RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); | 
|---|
| 278 | ENGINE_POSTING_READ(engine, RING_MI_MODE); | 
|---|
| 279 | } | 
|---|
| 280 |  | 
|---|
| 281 | /* Now awake, let it get started */ | 
|---|
| 282 | if (ring->tail != ring->head) { | 
|---|
| 283 | ENGINE_WRITE_FW(engine, RING_TAIL, ring->tail); | 
|---|
| 284 | ENGINE_POSTING_READ(engine, RING_TAIL); | 
|---|
| 285 | } | 
|---|
| 286 |  | 
|---|
| 287 | /* Papering over lost _interrupts_ immediately following the restart */ | 
|---|
| 288 | intel_engine_signal_breadcrumbs(engine); | 
|---|
| 289 | return 0; | 
|---|
| 290 |  | 
|---|
| 291 | err: | 
|---|
| 292 | gt_err(engine->gt, "%s initialization failed\n", engine->name); | 
|---|
| 293 | ENGINE_TRACE(engine, | 
|---|
| 294 | "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", | 
|---|
| 295 | ENGINE_READ(engine, RING_CTL), | 
|---|
| 296 | ENGINE_READ(engine, RING_CTL) & RING_VALID, | 
|---|
| 297 | ENGINE_READ(engine, RING_HEAD), ring->head, | 
|---|
| 298 | ENGINE_READ(engine, RING_TAIL), ring->tail, | 
|---|
| 299 | ENGINE_READ(engine, RING_START), | 
|---|
| 300 | i915_ggtt_offset(ring->vma)); | 
|---|
| 301 | GEM_TRACE_DUMP(); | 
|---|
| 302 | return -EIO; | 
|---|
| 303 | } | 
|---|
| 304 |  | 
|---|
| 305 | static void sanitize_hwsp(struct intel_engine_cs *engine) | 
|---|
| 306 | { | 
|---|
| 307 | struct intel_timeline *tl; | 
|---|
| 308 |  | 
|---|
| 309 | list_for_each_entry(tl, &engine->status_page.timelines, engine_link) | 
|---|
| 310 | intel_timeline_reset_seqno(tl); | 
|---|
| 311 | } | 
|---|
| 312 |  | 
|---|
| 313 | static void xcs_sanitize(struct intel_engine_cs *engine) | 
|---|
| 314 | { | 
|---|
| 315 | /* | 
|---|
| 316 | * Poison residual state on resume, in case the suspend didn't! | 
|---|
| 317 | * | 
|---|
| 318 | * We have to assume that across suspend/resume (or other loss | 
|---|
| 319 | * of control) that the contents of our pinned buffers has been | 
|---|
| 320 | * lost, replaced by garbage. Since this doesn't always happen, | 
|---|
| 321 | * let's poison such state so that we more quickly spot when | 
|---|
| 322 | * we falsely assume it has been preserved. | 
|---|
| 323 | */ | 
|---|
| 324 | if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) | 
|---|
| 325 | memset(s: engine->status_page.addr, POISON_INUSE, PAGE_SIZE); | 
|---|
| 326 |  | 
|---|
| 327 | /* | 
|---|
| 328 | * The kernel_context HWSP is stored in the status_page. As above, | 
|---|
| 329 | * that may be lost on resume/initialisation, and so we need to | 
|---|
| 330 | * reset the value in the HWSP. | 
|---|
| 331 | */ | 
|---|
| 332 | sanitize_hwsp(engine); | 
|---|
| 333 |  | 
|---|
| 334 | /* And scrub the dirty cachelines for the HWSP */ | 
|---|
| 335 | drm_clflush_virt_range(addr: engine->status_page.addr, PAGE_SIZE); | 
|---|
| 336 |  | 
|---|
| 337 | intel_engine_reset_pinned_contexts(engine); | 
|---|
| 338 | } | 
|---|
| 339 |  | 
|---|
| 340 | static void reset_prepare(struct intel_engine_cs *engine) | 
|---|
| 341 | { | 
|---|
| 342 | /* | 
|---|
| 343 | * We stop engines, otherwise we might get failed reset and a | 
|---|
| 344 | * dead gpu (on elk). Also as modern gpu as kbl can suffer | 
|---|
| 345 | * from system hang if batchbuffer is progressing when | 
|---|
| 346 | * the reset is issued, regardless of READY_TO_RESET ack. | 
|---|
| 347 | * Thus assume it is best to stop engines on all gens | 
|---|
| 348 | * where we have a gpu reset. | 
|---|
| 349 | * | 
|---|
| 350 | * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) | 
|---|
| 351 | * | 
|---|
| 352 | * WaMediaResetMainRingCleanup:ctg,elk (presumably) | 
|---|
| 353 | * WaClearRingBufHeadRegAtInit:ctg,elk | 
|---|
| 354 | * | 
|---|
| 355 | * FIXME: Wa for more modern gens needs to be validated | 
|---|
| 356 | */ | 
|---|
| 357 | ENGINE_TRACE(engine, "\n"); | 
|---|
| 358 | intel_engine_stop_cs(engine); | 
|---|
| 359 |  | 
|---|
| 360 | if (!stop_ring(engine)) { | 
|---|
| 361 | /* G45 ring initialization often fails to reset head to zero */ | 
|---|
| 362 | ENGINE_TRACE(engine, | 
|---|
| 363 | "HEAD not reset to zero, " | 
|---|
| 364 | "{ CTL:%08x, HEAD:%08x, TAIL:%08x, START:%08x }\n", | 
|---|
| 365 | ENGINE_READ_FW(engine, RING_CTL), | 
|---|
| 366 | ENGINE_READ_FW(engine, RING_HEAD), | 
|---|
| 367 | ENGINE_READ_FW(engine, RING_TAIL), | 
|---|
| 368 | ENGINE_READ_FW(engine, RING_START)); | 
|---|
| 369 | /* | 
|---|
| 370 | * Sometimes engine head failed to set to zero even after writing into it. | 
|---|
| 371 | * Use wait_for_atomic() with 20ms delay to let engine resumes from | 
|---|
| 372 | * correct RING_HEAD. Experimented different values and determined | 
|---|
| 373 | * that 20ms works best based on testing. | 
|---|
| 374 | */ | 
|---|
| 375 | if (wait_for_atomic((!stop_ring(engine) == 0), 20)) { | 
|---|
| 376 | drm_err(&engine->i915->drm, | 
|---|
| 377 | "failed to set %s head to zero " | 
|---|
| 378 | "ctl %08x head %08x tail %08x start %08x\n", | 
|---|
| 379 | engine->name, | 
|---|
| 380 | ENGINE_READ_FW(engine, RING_CTL), | 
|---|
| 381 | ENGINE_READ_FW(engine, RING_HEAD), | 
|---|
| 382 | ENGINE_READ_FW(engine, RING_TAIL), | 
|---|
| 383 | ENGINE_READ_FW(engine, RING_START)); | 
|---|
| 384 | } | 
|---|
| 385 | } | 
|---|
| 386 | } | 
|---|
| 387 |  | 
|---|
| 388 | static void reset_rewind(struct intel_engine_cs *engine, bool stalled) | 
|---|
| 389 | { | 
|---|
| 390 | struct i915_request *pos, *rq; | 
|---|
| 391 | unsigned long flags; | 
|---|
| 392 | u32 head; | 
|---|
| 393 |  | 
|---|
| 394 | rq = NULL; | 
|---|
| 395 | spin_lock_irqsave(&engine->sched_engine->lock, flags); | 
|---|
| 396 | rcu_read_lock(); | 
|---|
| 397 | list_for_each_entry(pos, &engine->sched_engine->requests, sched.link) { | 
|---|
| 398 | if (!__i915_request_is_complete(rq: pos)) { | 
|---|
| 399 | rq = pos; | 
|---|
| 400 | break; | 
|---|
| 401 | } | 
|---|
| 402 | } | 
|---|
| 403 | rcu_read_unlock(); | 
|---|
| 404 |  | 
|---|
| 405 | /* | 
|---|
| 406 | * The guilty request will get skipped on a hung engine. | 
|---|
| 407 | * | 
|---|
| 408 | * Users of client default contexts do not rely on logical | 
|---|
| 409 | * state preserved between batches so it is safe to execute | 
|---|
| 410 | * queued requests following the hang. Non default contexts | 
|---|
| 411 | * rely on preserved state, so skipping a batch loses the | 
|---|
| 412 | * evolution of the state and it needs to be considered corrupted. | 
|---|
| 413 | * Executing more queued batches on top of corrupted state is | 
|---|
| 414 | * risky. But we take the risk by trying to advance through | 
|---|
| 415 | * the queued requests in order to make the client behaviour | 
|---|
| 416 | * more predictable around resets, by not throwing away random | 
|---|
| 417 | * amount of batches it has prepared for execution. Sophisticated | 
|---|
| 418 | * clients can use gem_reset_stats_ioctl and dma fence status | 
|---|
| 419 | * (exported via sync_file info ioctl on explicit fences) to observe | 
|---|
| 420 | * when it loses the context state and should rebuild accordingly. | 
|---|
| 421 | * | 
|---|
| 422 | * The context ban, and ultimately the client ban, mechanism are safety | 
|---|
| 423 | * valves if client submission ends up resulting in nothing more than | 
|---|
| 424 | * subsequent hangs. | 
|---|
| 425 | */ | 
|---|
| 426 |  | 
|---|
| 427 | if (rq) { | 
|---|
| 428 | /* | 
|---|
| 429 | * Try to restore the logical GPU state to match the | 
|---|
| 430 | * continuation of the request queue. If we skip the | 
|---|
| 431 | * context/PD restore, then the next request may try to execute | 
|---|
| 432 | * assuming that its context is valid and loaded on the GPU and | 
|---|
| 433 | * so may try to access invalid memory, prompting repeated GPU | 
|---|
| 434 | * hangs. | 
|---|
| 435 | * | 
|---|
| 436 | * If the request was guilty, we still restore the logical | 
|---|
| 437 | * state in case the next request requires it (e.g. the | 
|---|
| 438 | * aliasing ppgtt), but skip over the hung batch. | 
|---|
| 439 | * | 
|---|
| 440 | * If the request was innocent, we try to replay the request | 
|---|
| 441 | * with the restored context. | 
|---|
| 442 | */ | 
|---|
| 443 | __i915_request_reset(rq, guilty: stalled); | 
|---|
| 444 |  | 
|---|
| 445 | GEM_BUG_ON(rq->ring != engine->legacy.ring); | 
|---|
| 446 | head = rq->head; | 
|---|
| 447 | } else { | 
|---|
| 448 | head = engine->legacy.ring->tail; | 
|---|
| 449 | } | 
|---|
| 450 | engine->legacy.ring->head = intel_ring_wrap(ring: engine->legacy.ring, pos: head); | 
|---|
| 451 |  | 
|---|
| 452 | spin_unlock_irqrestore(lock: &engine->sched_engine->lock, flags); | 
|---|
| 453 | } | 
|---|
| 454 |  | 
|---|
| 455 | static void reset_finish(struct intel_engine_cs *engine) | 
|---|
| 456 | { | 
|---|
| 457 | } | 
|---|
| 458 |  | 
|---|
| 459 | static void reset_cancel(struct intel_engine_cs *engine) | 
|---|
| 460 | { | 
|---|
| 461 | struct i915_request *request; | 
|---|
| 462 | unsigned long flags; | 
|---|
| 463 |  | 
|---|
| 464 | spin_lock_irqsave(&engine->sched_engine->lock, flags); | 
|---|
| 465 |  | 
|---|
| 466 | /* Mark all submitted requests as skipped. */ | 
|---|
| 467 | list_for_each_entry(request, &engine->sched_engine->requests, sched.link) | 
|---|
| 468 | i915_request_put(rq: i915_request_mark_eio(rq: request)); | 
|---|
| 469 | intel_engine_signal_breadcrumbs(engine); | 
|---|
| 470 |  | 
|---|
| 471 | /* Remaining _unready_ requests will be nop'ed when submitted */ | 
|---|
| 472 |  | 
|---|
| 473 | spin_unlock_irqrestore(lock: &engine->sched_engine->lock, flags); | 
|---|
| 474 | } | 
|---|
| 475 |  | 
|---|
| 476 | static void i9xx_submit_request(struct i915_request *request) | 
|---|
| 477 | { | 
|---|
| 478 | i915_request_submit(request); | 
|---|
| 479 | wmb(); /* paranoid flush writes out of the WCB before mmio */ | 
|---|
| 480 |  | 
|---|
| 481 | ENGINE_WRITE(request->engine, RING_TAIL, | 
|---|
| 482 | intel_ring_set_tail(request->ring, request->tail)); | 
|---|
| 483 | } | 
|---|
| 484 |  | 
|---|
| 485 | static void __ring_context_fini(struct intel_context *ce) | 
|---|
| 486 | { | 
|---|
| 487 | i915_vma_put(vma: ce->state); | 
|---|
| 488 | } | 
|---|
| 489 |  | 
|---|
| 490 | static void ring_context_destroy(struct kref *ref) | 
|---|
| 491 | { | 
|---|
| 492 | struct intel_context *ce = container_of(ref, typeof(*ce), ref); | 
|---|
| 493 |  | 
|---|
| 494 | GEM_BUG_ON(intel_context_is_pinned(ce)); | 
|---|
| 495 |  | 
|---|
| 496 | if (ce->state) | 
|---|
| 497 | __ring_context_fini(ce); | 
|---|
| 498 |  | 
|---|
| 499 | intel_context_fini(ce); | 
|---|
| 500 | intel_context_free(ce); | 
|---|
| 501 | } | 
|---|
| 502 |  | 
|---|
| 503 | static int ring_context_init_default_state(struct intel_context *ce, | 
|---|
| 504 | struct i915_gem_ww_ctx *ww) | 
|---|
| 505 | { | 
|---|
| 506 | struct drm_i915_gem_object *obj = ce->state->obj; | 
|---|
| 507 | void *vaddr; | 
|---|
| 508 |  | 
|---|
| 509 | vaddr = i915_gem_object_pin_map(obj, type: I915_MAP_WB); | 
|---|
| 510 | if (IS_ERR(ptr: vaddr)) | 
|---|
| 511 | return PTR_ERR(ptr: vaddr); | 
|---|
| 512 |  | 
|---|
| 513 | shmem_read(file: ce->default_state, off: 0, dst: vaddr, len: ce->engine->context_size); | 
|---|
| 514 |  | 
|---|
| 515 | i915_gem_object_flush_map(obj); | 
|---|
| 516 | __i915_gem_object_release_map(obj); | 
|---|
| 517 |  | 
|---|
| 518 | __set_bit(CONTEXT_VALID_BIT, &ce->flags); | 
|---|
| 519 | return 0; | 
|---|
| 520 | } | 
|---|
| 521 |  | 
|---|
| 522 | static int ring_context_pre_pin(struct intel_context *ce, | 
|---|
| 523 | struct i915_gem_ww_ctx *ww, | 
|---|
| 524 | void **unused) | 
|---|
| 525 | { | 
|---|
| 526 | struct i915_address_space *vm; | 
|---|
| 527 | int err = 0; | 
|---|
| 528 |  | 
|---|
| 529 | if (ce->default_state && | 
|---|
| 530 | !test_bit(CONTEXT_VALID_BIT, &ce->flags)) { | 
|---|
| 531 | err = ring_context_init_default_state(ce, ww); | 
|---|
| 532 | if (err) | 
|---|
| 533 | return err; | 
|---|
| 534 | } | 
|---|
| 535 |  | 
|---|
| 536 | vm = vm_alias(vm: ce->vm); | 
|---|
| 537 | if (vm) | 
|---|
| 538 | err = gen6_ppgtt_pin(base: i915_vm_to_ppgtt((vm)), ww); | 
|---|
| 539 |  | 
|---|
| 540 | return err; | 
|---|
| 541 | } | 
|---|
| 542 |  | 
|---|
| 543 | static void __context_unpin_ppgtt(struct intel_context *ce) | 
|---|
| 544 | { | 
|---|
| 545 | struct i915_address_space *vm; | 
|---|
| 546 |  | 
|---|
| 547 | vm = vm_alias(vm: ce->vm); | 
|---|
| 548 | if (vm) | 
|---|
| 549 | gen6_ppgtt_unpin(base: i915_vm_to_ppgtt(vm)); | 
|---|
| 550 | } | 
|---|
| 551 |  | 
|---|
| 552 | static void ring_context_unpin(struct intel_context *ce) | 
|---|
| 553 | { | 
|---|
| 554 | } | 
|---|
| 555 |  | 
|---|
| 556 | static void ring_context_post_unpin(struct intel_context *ce) | 
|---|
| 557 | { | 
|---|
| 558 | __context_unpin_ppgtt(ce); | 
|---|
| 559 | } | 
|---|
| 560 |  | 
|---|
| 561 | static struct i915_vma * | 
|---|
| 562 | alloc_context_vma(struct intel_engine_cs *engine) | 
|---|
| 563 | { | 
|---|
| 564 | struct drm_i915_private *i915 = engine->i915; | 
|---|
| 565 | struct drm_i915_gem_object *obj; | 
|---|
| 566 | struct i915_vma *vma; | 
|---|
| 567 | int err; | 
|---|
| 568 |  | 
|---|
| 569 | obj = i915_gem_object_create_shmem(i915, size: engine->context_size); | 
|---|
| 570 | if (IS_ERR(ptr: obj)) | 
|---|
| 571 | return ERR_CAST(ptr: obj); | 
|---|
| 572 |  | 
|---|
| 573 | /* | 
|---|
| 574 | * Try to make the context utilize L3 as well as LLC. | 
|---|
| 575 | * | 
|---|
| 576 | * On VLV we don't have L3 controls in the PTEs so we | 
|---|
| 577 | * shouldn't touch the cache level, especially as that | 
|---|
| 578 | * would make the object snooped which might have a | 
|---|
| 579 | * negative performance impact. | 
|---|
| 580 | * | 
|---|
| 581 | * Snooping is required on non-llc platforms in execlist | 
|---|
| 582 | * mode, but since all GGTT accesses use PAT entry 0 we | 
|---|
| 583 | * get snooping anyway regardless of cache_level. | 
|---|
| 584 | * | 
|---|
| 585 | * This is only applicable for Ivy Bridge devices since | 
|---|
| 586 | * later platforms don't have L3 control bits in the PTE. | 
|---|
| 587 | */ | 
|---|
| 588 | if (IS_IVYBRIDGE(i915)) | 
|---|
| 589 | i915_gem_object_set_cache_coherency(obj, cache_level: I915_CACHE_L3_LLC); | 
|---|
| 590 |  | 
|---|
| 591 | vma = i915_vma_instance(obj, vm: &engine->gt->ggtt->vm, NULL); | 
|---|
| 592 | if (IS_ERR(ptr: vma)) { | 
|---|
| 593 | err = PTR_ERR(ptr: vma); | 
|---|
| 594 | goto err_obj; | 
|---|
| 595 | } | 
|---|
| 596 |  | 
|---|
| 597 | return vma; | 
|---|
| 598 |  | 
|---|
| 599 | err_obj: | 
|---|
| 600 | i915_gem_object_put(obj); | 
|---|
| 601 | return ERR_PTR(error: err); | 
|---|
| 602 | } | 
|---|
| 603 |  | 
|---|
| 604 | static int ring_context_alloc(struct intel_context *ce) | 
|---|
| 605 | { | 
|---|
| 606 | struct intel_engine_cs *engine = ce->engine; | 
|---|
| 607 |  | 
|---|
| 608 | if (!intel_context_has_own_state(ce)) | 
|---|
| 609 | ce->default_state = engine->default_state; | 
|---|
| 610 |  | 
|---|
| 611 | /* One ringbuffer to rule them all */ | 
|---|
| 612 | GEM_BUG_ON(!engine->legacy.ring); | 
|---|
| 613 | ce->ring = engine->legacy.ring; | 
|---|
| 614 |  | 
|---|
| 615 | GEM_BUG_ON(ce->state); | 
|---|
| 616 | if (engine->context_size) { | 
|---|
| 617 | struct i915_vma *vma; | 
|---|
| 618 |  | 
|---|
| 619 | vma = alloc_context_vma(engine); | 
|---|
| 620 | if (IS_ERR(ptr: vma)) | 
|---|
| 621 | return PTR_ERR(ptr: vma); | 
|---|
| 622 |  | 
|---|
| 623 | ce->state = vma; | 
|---|
| 624 | } | 
|---|
| 625 |  | 
|---|
| 626 | ce->timeline = intel_timeline_get(timeline: engine->legacy.timeline); | 
|---|
| 627 |  | 
|---|
| 628 | return 0; | 
|---|
| 629 | } | 
|---|
| 630 |  | 
|---|
| 631 | static int ring_context_pin(struct intel_context *ce, void *unused) | 
|---|
| 632 | { | 
|---|
| 633 | return 0; | 
|---|
| 634 | } | 
|---|
| 635 |  | 
|---|
| 636 | static void ring_context_reset(struct intel_context *ce) | 
|---|
| 637 | { | 
|---|
| 638 | intel_ring_reset(ring: ce->ring, tail: ce->ring->emit); | 
|---|
| 639 | clear_bit(CONTEXT_VALID_BIT, addr: &ce->flags); | 
|---|
| 640 | } | 
|---|
| 641 |  | 
|---|
| 642 | static void ring_context_revoke(struct intel_context *ce, | 
|---|
| 643 | struct i915_request *rq, | 
|---|
| 644 | unsigned int preempt_timeout_ms) | 
|---|
| 645 | { | 
|---|
| 646 | struct intel_engine_cs *engine; | 
|---|
| 647 |  | 
|---|
| 648 | if (!rq || !i915_request_is_active(rq)) | 
|---|
| 649 | return; | 
|---|
| 650 |  | 
|---|
| 651 | engine = rq->engine; | 
|---|
| 652 | lockdep_assert_held(&engine->sched_engine->lock); | 
|---|
| 653 | list_for_each_entry_continue(rq, &engine->sched_engine->requests, | 
|---|
| 654 | sched.link) | 
|---|
| 655 | if (rq->context == ce) { | 
|---|
| 656 | i915_request_set_error_once(rq, error: -EIO); | 
|---|
| 657 | __i915_request_skip(rq); | 
|---|
| 658 | } | 
|---|
| 659 | } | 
|---|
| 660 |  | 
|---|
| 661 | static void ring_context_cancel_request(struct intel_context *ce, | 
|---|
| 662 | struct i915_request *rq) | 
|---|
| 663 | { | 
|---|
| 664 | struct intel_engine_cs *engine = NULL; | 
|---|
| 665 |  | 
|---|
| 666 | i915_request_active_engine(rq, active: &engine); | 
|---|
| 667 |  | 
|---|
| 668 | if (engine && intel_engine_pulse(engine)) | 
|---|
| 669 | intel_gt_handle_error(gt: engine->gt, engine_mask: engine->mask, flags: 0, | 
|---|
| 670 | fmt: "request cancellation by %s", | 
|---|
| 671 | current->comm); | 
|---|
| 672 | } | 
|---|
| 673 |  | 
|---|
| 674 | static const struct intel_context_ops ring_context_ops = { | 
|---|
| 675 | .alloc = ring_context_alloc, | 
|---|
| 676 |  | 
|---|
| 677 | .cancel_request = ring_context_cancel_request, | 
|---|
| 678 |  | 
|---|
| 679 | .revoke = ring_context_revoke, | 
|---|
| 680 |  | 
|---|
| 681 | .pre_pin = ring_context_pre_pin, | 
|---|
| 682 | .pin = ring_context_pin, | 
|---|
| 683 | .unpin = ring_context_unpin, | 
|---|
| 684 | .post_unpin = ring_context_post_unpin, | 
|---|
| 685 |  | 
|---|
| 686 | .enter = intel_context_enter_engine, | 
|---|
| 687 | .exit = intel_context_exit_engine, | 
|---|
| 688 |  | 
|---|
| 689 | .reset = ring_context_reset, | 
|---|
| 690 | .destroy = ring_context_destroy, | 
|---|
| 691 | }; | 
|---|
| 692 |  | 
|---|
| 693 | static int load_pd_dir(struct i915_request *rq, | 
|---|
| 694 | struct i915_address_space *vm, | 
|---|
| 695 | u32 valid) | 
|---|
| 696 | { | 
|---|
| 697 | const struct intel_engine_cs * const engine = rq->engine; | 
|---|
| 698 | u32 *cs; | 
|---|
| 699 |  | 
|---|
| 700 | cs = intel_ring_begin(rq, num_dwords: 12); | 
|---|
| 701 | if (IS_ERR(ptr: cs)) | 
|---|
| 702 | return PTR_ERR(ptr: cs); | 
|---|
| 703 |  | 
|---|
| 704 | *cs++ = MI_LOAD_REGISTER_IMM(1); | 
|---|
| 705 | *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)); | 
|---|
| 706 | *cs++ = valid; | 
|---|
| 707 |  | 
|---|
| 708 | *cs++ = MI_LOAD_REGISTER_IMM(1); | 
|---|
| 709 | *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); | 
|---|
| 710 | *cs++ = pp_dir(vm); | 
|---|
| 711 |  | 
|---|
| 712 | /* Stall until the page table load is complete? */ | 
|---|
| 713 | *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; | 
|---|
| 714 | *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); | 
|---|
| 715 | *cs++ = intel_gt_scratch_offset(gt: engine->gt, | 
|---|
| 716 | field: INTEL_GT_SCRATCH_FIELD_DEFAULT); | 
|---|
| 717 |  | 
|---|
| 718 | *cs++ = MI_LOAD_REGISTER_IMM(1); | 
|---|
| 719 | *cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base)); | 
|---|
| 720 | *cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE); | 
|---|
| 721 |  | 
|---|
| 722 | intel_ring_advance(rq, cs); | 
|---|
| 723 |  | 
|---|
| 724 | return rq->engine->emit_flush(rq, EMIT_FLUSH); | 
|---|
| 725 | } | 
|---|
| 726 |  | 
|---|
| 727 | static int mi_set_context(struct i915_request *rq, | 
|---|
| 728 | struct intel_context *ce, | 
|---|
| 729 | u32 flags) | 
|---|
| 730 | { | 
|---|
| 731 | struct intel_engine_cs *engine = rq->engine; | 
|---|
| 732 | struct drm_i915_private *i915 = engine->i915; | 
|---|
| 733 | enum intel_engine_id id; | 
|---|
| 734 | const int num_engines = | 
|---|
| 735 | IS_HASWELL(i915) ? engine->gt->info.num_engines - 1 : 0; | 
|---|
| 736 | bool force_restore = false; | 
|---|
| 737 | int len; | 
|---|
| 738 | u32 *cs; | 
|---|
| 739 |  | 
|---|
| 740 | len = 4; | 
|---|
| 741 | if (GRAPHICS_VER(i915) == 7) | 
|---|
| 742 | len += 2 + (num_engines ? 4 * num_engines + 6 : 0); | 
|---|
| 743 | else if (GRAPHICS_VER(i915) == 5) | 
|---|
| 744 | len += 2; | 
|---|
| 745 | if (flags & MI_FORCE_RESTORE) { | 
|---|
| 746 | GEM_BUG_ON(flags & MI_RESTORE_INHIBIT); | 
|---|
| 747 | flags &= ~MI_FORCE_RESTORE; | 
|---|
| 748 | force_restore = true; | 
|---|
| 749 | len += 2; | 
|---|
| 750 | } | 
|---|
| 751 |  | 
|---|
| 752 | cs = intel_ring_begin(rq, num_dwords: len); | 
|---|
| 753 | if (IS_ERR(ptr: cs)) | 
|---|
| 754 | return PTR_ERR(ptr: cs); | 
|---|
| 755 |  | 
|---|
| 756 | /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ | 
|---|
| 757 | if (GRAPHICS_VER(i915) == 7) { | 
|---|
| 758 | *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; | 
|---|
| 759 | if (num_engines) { | 
|---|
| 760 | struct intel_engine_cs *signaller; | 
|---|
| 761 |  | 
|---|
| 762 | *cs++ = MI_LOAD_REGISTER_IMM(num_engines); | 
|---|
| 763 | for_each_engine(signaller, engine->gt, id) { | 
|---|
| 764 | if (signaller == engine) | 
|---|
| 765 | continue; | 
|---|
| 766 |  | 
|---|
| 767 | *cs++ = i915_mmio_reg_offset( | 
|---|
| 768 | RING_PSMI_CTL(signaller->mmio_base)); | 
|---|
| 769 | *cs++ = _MASKED_BIT_ENABLE( | 
|---|
| 770 | GEN6_PSMI_SLEEP_MSG_DISABLE); | 
|---|
| 771 | } | 
|---|
| 772 | } | 
|---|
| 773 | } else if (GRAPHICS_VER(i915) == 5) { | 
|---|
| 774 | /* | 
|---|
| 775 | * This w/a is only listed for pre-production ilk a/b steppings, | 
|---|
| 776 | * but is also mentioned for programming the powerctx. To be | 
|---|
| 777 | * safe, just apply the workaround; we do not use SyncFlush so | 
|---|
| 778 | * this should never take effect and so be a no-op! | 
|---|
| 779 | */ | 
|---|
| 780 | *cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN; | 
|---|
| 781 | } | 
|---|
| 782 |  | 
|---|
| 783 | if (force_restore) { | 
|---|
| 784 | /* | 
|---|
| 785 | * The HW doesn't handle being told to restore the current | 
|---|
| 786 | * context very well. Quite often it likes goes to go off and | 
|---|
| 787 | * sulk, especially when it is meant to be reloading PP_DIR. | 
|---|
| 788 | * A very simple fix to force the reload is to simply switch | 
|---|
| 789 | * away from the current context and back again. | 
|---|
| 790 | * | 
|---|
| 791 | * Note that the kernel_context will contain random state | 
|---|
| 792 | * following the INHIBIT_RESTORE. We accept this since we | 
|---|
| 793 | * never use the kernel_context state; it is merely a | 
|---|
| 794 | * placeholder we use to flush other contexts. | 
|---|
| 795 | */ | 
|---|
| 796 | *cs++ = MI_SET_CONTEXT; | 
|---|
| 797 | *cs++ = i915_ggtt_offset(vma: engine->kernel_context->state) | | 
|---|
| 798 | MI_MM_SPACE_GTT | | 
|---|
| 799 | MI_RESTORE_INHIBIT; | 
|---|
| 800 | } | 
|---|
| 801 |  | 
|---|
| 802 | *cs++ = MI_NOOP; | 
|---|
| 803 | *cs++ = MI_SET_CONTEXT; | 
|---|
| 804 | *cs++ = i915_ggtt_offset(vma: ce->state) | flags; | 
|---|
| 805 | /* | 
|---|
| 806 | * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP | 
|---|
| 807 | * WaMiSetContext_Hang:snb,ivb,vlv | 
|---|
| 808 | */ | 
|---|
| 809 | *cs++ = MI_NOOP; | 
|---|
| 810 |  | 
|---|
| 811 | if (GRAPHICS_VER(i915) == 7) { | 
|---|
| 812 | if (num_engines) { | 
|---|
| 813 | struct intel_engine_cs *signaller; | 
|---|
| 814 | i915_reg_t last_reg = INVALID_MMIO_REG; /* keep gcc quiet */ | 
|---|
| 815 |  | 
|---|
| 816 | *cs++ = MI_LOAD_REGISTER_IMM(num_engines); | 
|---|
| 817 | for_each_engine(signaller, engine->gt, id) { | 
|---|
| 818 | if (signaller == engine) | 
|---|
| 819 | continue; | 
|---|
| 820 |  | 
|---|
| 821 | last_reg = RING_PSMI_CTL(signaller->mmio_base); | 
|---|
| 822 | *cs++ = i915_mmio_reg_offset(last_reg); | 
|---|
| 823 | *cs++ = _MASKED_BIT_DISABLE( | 
|---|
| 824 | GEN6_PSMI_SLEEP_MSG_DISABLE); | 
|---|
| 825 | } | 
|---|
| 826 |  | 
|---|
| 827 | /* Insert a delay before the next switch! */ | 
|---|
| 828 | *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; | 
|---|
| 829 | *cs++ = i915_mmio_reg_offset(last_reg); | 
|---|
| 830 | *cs++ = intel_gt_scratch_offset(gt: engine->gt, | 
|---|
| 831 | field: INTEL_GT_SCRATCH_FIELD_DEFAULT); | 
|---|
| 832 | *cs++ = MI_NOOP; | 
|---|
| 833 | } | 
|---|
| 834 | *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; | 
|---|
| 835 | } else if (GRAPHICS_VER(i915) == 5) { | 
|---|
| 836 | *cs++ = MI_SUSPEND_FLUSH; | 
|---|
| 837 | } | 
|---|
| 838 |  | 
|---|
| 839 | intel_ring_advance(rq, cs); | 
|---|
| 840 |  | 
|---|
| 841 | return 0; | 
|---|
| 842 | } | 
|---|
| 843 |  | 
|---|
| 844 | static int remap_l3_slice(struct i915_request *rq, int slice) | 
|---|
| 845 | { | 
|---|
| 846 | #define L3LOG_DW (GEN7_L3LOG_SIZE / sizeof(u32)) | 
|---|
| 847 | u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; | 
|---|
| 848 | int i; | 
|---|
| 849 |  | 
|---|
| 850 | if (!remap_info) | 
|---|
| 851 | return 0; | 
|---|
| 852 |  | 
|---|
| 853 | cs = intel_ring_begin(rq, L3LOG_DW * 2 + 2); | 
|---|
| 854 | if (IS_ERR(ptr: cs)) | 
|---|
| 855 | return PTR_ERR(ptr: cs); | 
|---|
| 856 |  | 
|---|
| 857 | /* | 
|---|
| 858 | * Note: We do not worry about the concurrent register cacheline hang | 
|---|
| 859 | * here because no other code should access these registers other than | 
|---|
| 860 | * at initialization time. | 
|---|
| 861 | */ | 
|---|
| 862 | *cs++ = MI_LOAD_REGISTER_IMM(L3LOG_DW); | 
|---|
| 863 | for (i = 0; i < L3LOG_DW; i++) { | 
|---|
| 864 | *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); | 
|---|
| 865 | *cs++ = remap_info[i]; | 
|---|
| 866 | } | 
|---|
| 867 | *cs++ = MI_NOOP; | 
|---|
| 868 | intel_ring_advance(rq, cs); | 
|---|
| 869 |  | 
|---|
| 870 | return 0; | 
|---|
| 871 | #undef L3LOG_DW | 
|---|
| 872 | } | 
|---|
| 873 |  | 
|---|
| 874 | static int remap_l3(struct i915_request *rq) | 
|---|
| 875 | { | 
|---|
| 876 | struct i915_gem_context *ctx = i915_request_gem_context(rq); | 
|---|
| 877 | int i, err; | 
|---|
| 878 |  | 
|---|
| 879 | if (!ctx || !ctx->remap_slice) | 
|---|
| 880 | return 0; | 
|---|
| 881 |  | 
|---|
| 882 | for (i = 0; i < MAX_L3_SLICES; i++) { | 
|---|
| 883 | if (!(ctx->remap_slice & BIT(i))) | 
|---|
| 884 | continue; | 
|---|
| 885 |  | 
|---|
| 886 | err = remap_l3_slice(rq, slice: i); | 
|---|
| 887 | if (err) | 
|---|
| 888 | return err; | 
|---|
| 889 | } | 
|---|
| 890 |  | 
|---|
| 891 | ctx->remap_slice = 0; | 
|---|
| 892 | return 0; | 
|---|
| 893 | } | 
|---|
| 894 |  | 
|---|
| 895 | static int switch_mm(struct i915_request *rq, struct i915_address_space *vm) | 
|---|
| 896 | { | 
|---|
| 897 | int ret; | 
|---|
| 898 |  | 
|---|
| 899 | if (!vm) | 
|---|
| 900 | return 0; | 
|---|
| 901 |  | 
|---|
| 902 | ret = rq->engine->emit_flush(rq, EMIT_FLUSH); | 
|---|
| 903 | if (ret) | 
|---|
| 904 | return ret; | 
|---|
| 905 |  | 
|---|
| 906 | /* | 
|---|
| 907 | * Not only do we need a full barrier (post-sync write) after | 
|---|
| 908 | * invalidating the TLBs, but we need to wait a little bit | 
|---|
| 909 | * longer. Whether this is merely delaying us, or the | 
|---|
| 910 | * subsequent flush is a key part of serialising with the | 
|---|
| 911 | * post-sync op, this extra pass appears vital before a | 
|---|
| 912 | * mm switch! | 
|---|
| 913 | */ | 
|---|
| 914 | ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G); | 
|---|
| 915 | if (ret) | 
|---|
| 916 | return ret; | 
|---|
| 917 |  | 
|---|
| 918 | return rq->engine->emit_flush(rq, EMIT_INVALIDATE); | 
|---|
| 919 | } | 
|---|
| 920 |  | 
|---|
| 921 | static int clear_residuals(struct i915_request *rq) | 
|---|
| 922 | { | 
|---|
| 923 | struct intel_engine_cs *engine = rq->engine; | 
|---|
| 924 | int ret; | 
|---|
| 925 |  | 
|---|
| 926 | ret = switch_mm(rq, vm: vm_alias(vm: engine->kernel_context->vm)); | 
|---|
| 927 | if (ret) | 
|---|
| 928 | return ret; | 
|---|
| 929 |  | 
|---|
| 930 | if (engine->kernel_context->state) { | 
|---|
| 931 | ret = mi_set_context(rq, | 
|---|
| 932 | ce: engine->kernel_context, | 
|---|
| 933 | MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT); | 
|---|
| 934 | if (ret) | 
|---|
| 935 | return ret; | 
|---|
| 936 | } | 
|---|
| 937 |  | 
|---|
| 938 | ret = engine->emit_bb_start(rq, | 
|---|
| 939 | i915_vma_offset(vma: engine->wa_ctx.vma), 0, | 
|---|
| 940 | 0); | 
|---|
| 941 | if (ret) | 
|---|
| 942 | return ret; | 
|---|
| 943 |  | 
|---|
| 944 | ret = engine->emit_flush(rq, EMIT_FLUSH); | 
|---|
| 945 | if (ret) | 
|---|
| 946 | return ret; | 
|---|
| 947 |  | 
|---|
| 948 | /* Always invalidate before the next switch_mm() */ | 
|---|
| 949 | return engine->emit_flush(rq, EMIT_INVALIDATE); | 
|---|
| 950 | } | 
|---|
| 951 |  | 
|---|
| 952 | static int switch_context(struct i915_request *rq) | 
|---|
| 953 | { | 
|---|
| 954 | struct intel_engine_cs *engine = rq->engine; | 
|---|
| 955 | struct intel_context *ce = rq->context; | 
|---|
| 956 | void **residuals = NULL; | 
|---|
| 957 | int ret; | 
|---|
| 958 |  | 
|---|
| 959 | GEM_BUG_ON(HAS_EXECLISTS(engine->i915)); | 
|---|
| 960 |  | 
|---|
| 961 | if (engine->wa_ctx.vma && ce != engine->kernel_context) { | 
|---|
| 962 | if (engine->wa_ctx.vma->private != ce && | 
|---|
| 963 | i915_mitigate_clear_residuals()) { | 
|---|
| 964 | ret = clear_residuals(rq); | 
|---|
| 965 | if (ret) | 
|---|
| 966 | return ret; | 
|---|
| 967 |  | 
|---|
| 968 | residuals = &engine->wa_ctx.vma->private; | 
|---|
| 969 | } | 
|---|
| 970 | } | 
|---|
| 971 |  | 
|---|
| 972 | ret = switch_mm(rq, vm: vm_alias(vm: ce->vm)); | 
|---|
| 973 | if (ret) | 
|---|
| 974 | return ret; | 
|---|
| 975 |  | 
|---|
| 976 | if (ce->state) { | 
|---|
| 977 | u32 flags; | 
|---|
| 978 |  | 
|---|
| 979 | GEM_BUG_ON(engine->id != RCS0); | 
|---|
| 980 |  | 
|---|
| 981 | /* For resource streamer on HSW+ and power context elsewhere */ | 
|---|
| 982 | BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN); | 
|---|
| 983 | BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN); | 
|---|
| 984 |  | 
|---|
| 985 | flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT; | 
|---|
| 986 | if (test_bit(CONTEXT_VALID_BIT, &ce->flags)) | 
|---|
| 987 | flags |= MI_RESTORE_EXT_STATE_EN; | 
|---|
| 988 | else | 
|---|
| 989 | flags |= MI_RESTORE_INHIBIT; | 
|---|
| 990 |  | 
|---|
| 991 | ret = mi_set_context(rq, ce, flags); | 
|---|
| 992 | if (ret) | 
|---|
| 993 | return ret; | 
|---|
| 994 | } | 
|---|
| 995 |  | 
|---|
| 996 | ret = remap_l3(rq); | 
|---|
| 997 | if (ret) | 
|---|
| 998 | return ret; | 
|---|
| 999 |  | 
|---|
| 1000 | /* | 
|---|
| 1001 | * Now past the point of no return, this request _will_ be emitted. | 
|---|
| 1002 | * | 
|---|
| 1003 | * Or at least this preamble will be emitted, the request may be | 
|---|
| 1004 | * interrupted prior to submitting the user payload. If so, we | 
|---|
| 1005 | * still submit the "empty" request in order to preserve global | 
|---|
| 1006 | * state tracking such as this, our tracking of the current | 
|---|
| 1007 | * dirty context. | 
|---|
| 1008 | */ | 
|---|
| 1009 | if (residuals) { | 
|---|
| 1010 | intel_context_put(ce: *residuals); | 
|---|
| 1011 | *residuals = intel_context_get(ce); | 
|---|
| 1012 | } | 
|---|
| 1013 |  | 
|---|
| 1014 | return 0; | 
|---|
| 1015 | } | 
|---|
| 1016 |  | 
|---|
| 1017 | static int ring_request_alloc(struct i915_request *request) | 
|---|
| 1018 | { | 
|---|
| 1019 | int ret; | 
|---|
| 1020 |  | 
|---|
| 1021 | GEM_BUG_ON(!intel_context_is_pinned(request->context)); | 
|---|
| 1022 | GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb); | 
|---|
| 1023 |  | 
|---|
| 1024 | /* | 
|---|
| 1025 | * Flush enough space to reduce the likelihood of waiting after | 
|---|
| 1026 | * we start building the request - in which case we will just | 
|---|
| 1027 | * have to repeat work. | 
|---|
| 1028 | */ | 
|---|
| 1029 | request->reserved_space += LEGACY_REQUEST_SIZE; | 
|---|
| 1030 |  | 
|---|
| 1031 | /* Unconditionally invalidate GPU caches and TLBs. */ | 
|---|
| 1032 | ret = request->engine->emit_flush(request, EMIT_INVALIDATE); | 
|---|
| 1033 | if (ret) | 
|---|
| 1034 | return ret; | 
|---|
| 1035 |  | 
|---|
| 1036 | ret = switch_context(rq: request); | 
|---|
| 1037 | if (ret) | 
|---|
| 1038 | return ret; | 
|---|
| 1039 |  | 
|---|
| 1040 | request->reserved_space -= LEGACY_REQUEST_SIZE; | 
|---|
| 1041 | return 0; | 
|---|
| 1042 | } | 
|---|
| 1043 |  | 
|---|
| 1044 | static void gen6_bsd_submit_request(struct i915_request *request) | 
|---|
| 1045 | { | 
|---|
| 1046 | struct intel_uncore *uncore = request->engine->uncore; | 
|---|
| 1047 |  | 
|---|
| 1048 | intel_uncore_forcewake_get(uncore, domains: FORCEWAKE_ALL); | 
|---|
| 1049 |  | 
|---|
| 1050 | /* Every tail move must follow the sequence below */ | 
|---|
| 1051 |  | 
|---|
| 1052 | /* Disable notification that the ring is IDLE. The GT | 
|---|
| 1053 | * will then assume that it is busy and bring it out of rc6. | 
|---|
| 1054 | */ | 
|---|
| 1055 | intel_uncore_write_fw(uncore, RING_PSMI_CTL(GEN6_BSD_RING_BASE), | 
|---|
| 1056 | _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); | 
|---|
| 1057 |  | 
|---|
| 1058 | /* Clear the context id. Here be magic! */ | 
|---|
| 1059 | intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0); | 
|---|
| 1060 |  | 
|---|
| 1061 | /* Wait for the ring not to be idle, i.e. for it to wake up. */ | 
|---|
| 1062 | if (__intel_wait_for_register_fw(uncore, | 
|---|
| 1063 | RING_PSMI_CTL(GEN6_BSD_RING_BASE), | 
|---|
| 1064 | GEN6_BSD_SLEEP_INDICATOR, | 
|---|
| 1065 | value: 0, | 
|---|
| 1066 | fast_timeout_us: 1000, slow_timeout_ms: 0, NULL)) | 
|---|
| 1067 | drm_err(&uncore->i915->drm, | 
|---|
| 1068 | "timed out waiting for the BSD ring to wake up\n"); | 
|---|
| 1069 |  | 
|---|
| 1070 | /* Now that the ring is fully powered up, update the tail */ | 
|---|
| 1071 | i9xx_submit_request(request); | 
|---|
| 1072 |  | 
|---|
| 1073 | /* Let the ring send IDLE messages to the GT again, | 
|---|
| 1074 | * and so let it sleep to conserve power when idle. | 
|---|
| 1075 | */ | 
|---|
| 1076 | intel_uncore_write_fw(uncore, RING_PSMI_CTL(GEN6_BSD_RING_BASE), | 
|---|
| 1077 | _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); | 
|---|
| 1078 |  | 
|---|
| 1079 | intel_uncore_forcewake_put(uncore, domains: FORCEWAKE_ALL); | 
|---|
| 1080 | } | 
|---|
| 1081 |  | 
|---|
| 1082 | static void i9xx_set_default_submission(struct intel_engine_cs *engine) | 
|---|
| 1083 | { | 
|---|
| 1084 | engine->submit_request = i9xx_submit_request; | 
|---|
| 1085 | } | 
|---|
| 1086 |  | 
|---|
| 1087 | static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) | 
|---|
| 1088 | { | 
|---|
| 1089 | engine->submit_request = gen6_bsd_submit_request; | 
|---|
| 1090 | } | 
|---|
| 1091 |  | 
|---|
| 1092 | static void ring_release(struct intel_engine_cs *engine) | 
|---|
| 1093 | { | 
|---|
| 1094 | struct drm_i915_private *i915 = engine->i915; | 
|---|
| 1095 |  | 
|---|
| 1096 | drm_WARN_ON(&i915->drm, GRAPHICS_VER(i915) > 2 && | 
|---|
| 1097 | (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); | 
|---|
| 1098 |  | 
|---|
| 1099 | intel_engine_cleanup_common(engine); | 
|---|
| 1100 |  | 
|---|
| 1101 | if (engine->wa_ctx.vma) { | 
|---|
| 1102 | intel_context_put(ce: engine->wa_ctx.vma->private); | 
|---|
| 1103 | i915_vma_unpin_and_release(p_vma: &engine->wa_ctx.vma, flags: 0); | 
|---|
| 1104 | } | 
|---|
| 1105 |  | 
|---|
| 1106 | intel_ring_unpin(ring: engine->legacy.ring); | 
|---|
| 1107 | intel_ring_put(ring: engine->legacy.ring); | 
|---|
| 1108 |  | 
|---|
| 1109 | intel_timeline_unpin(tl: engine->legacy.timeline); | 
|---|
| 1110 | intel_timeline_put(timeline: engine->legacy.timeline); | 
|---|
| 1111 | } | 
|---|
| 1112 |  | 
|---|
| 1113 | static void irq_handler(struct intel_engine_cs *engine, u16 iir) | 
|---|
| 1114 | { | 
|---|
| 1115 | intel_engine_signal_breadcrumbs(engine); | 
|---|
| 1116 | } | 
|---|
| 1117 |  | 
|---|
| 1118 | static void setup_irq(struct intel_engine_cs *engine) | 
|---|
| 1119 | { | 
|---|
| 1120 | struct drm_i915_private *i915 = engine->i915; | 
|---|
| 1121 |  | 
|---|
| 1122 | intel_engine_set_irq_handler(engine, fn: irq_handler); | 
|---|
| 1123 |  | 
|---|
| 1124 | if (GRAPHICS_VER(i915) >= 6) { | 
|---|
| 1125 | engine->irq_enable = gen6_irq_enable; | 
|---|
| 1126 | engine->irq_disable = gen6_irq_disable; | 
|---|
| 1127 | } else if (GRAPHICS_VER(i915) >= 5) { | 
|---|
| 1128 | engine->irq_enable = gen5_irq_enable; | 
|---|
| 1129 | engine->irq_disable = gen5_irq_disable; | 
|---|
| 1130 | } else { | 
|---|
| 1131 | engine->irq_enable = gen2_irq_enable; | 
|---|
| 1132 | engine->irq_disable = gen2_irq_disable; | 
|---|
| 1133 | } | 
|---|
| 1134 | } | 
|---|
| 1135 |  | 
|---|
| 1136 | static void add_to_engine(struct i915_request *rq) | 
|---|
| 1137 | { | 
|---|
| 1138 | lockdep_assert_held(&rq->engine->sched_engine->lock); | 
|---|
| 1139 | list_move_tail(list: &rq->sched.link, head: &rq->engine->sched_engine->requests); | 
|---|
| 1140 | } | 
|---|
| 1141 |  | 
|---|
| 1142 | static void remove_from_engine(struct i915_request *rq) | 
|---|
| 1143 | { | 
|---|
| 1144 | spin_lock_irq(lock: &rq->engine->sched_engine->lock); | 
|---|
| 1145 | list_del_init(entry: &rq->sched.link); | 
|---|
| 1146 |  | 
|---|
| 1147 | /* Prevent further __await_execution() registering a cb, then flush */ | 
|---|
| 1148 | set_bit(nr: I915_FENCE_FLAG_ACTIVE, addr: &rq->fence.flags); | 
|---|
| 1149 |  | 
|---|
| 1150 | spin_unlock_irq(lock: &rq->engine->sched_engine->lock); | 
|---|
| 1151 |  | 
|---|
| 1152 | i915_request_notify_execute_cb_imm(rq); | 
|---|
| 1153 | } | 
|---|
| 1154 |  | 
|---|
| 1155 | static void setup_common(struct intel_engine_cs *engine) | 
|---|
| 1156 | { | 
|---|
| 1157 | struct drm_i915_private *i915 = engine->i915; | 
|---|
| 1158 |  | 
|---|
| 1159 | /* gen8+ are only supported with execlists */ | 
|---|
| 1160 | GEM_BUG_ON(GRAPHICS_VER(i915) >= 8); | 
|---|
| 1161 |  | 
|---|
| 1162 | setup_irq(engine); | 
|---|
| 1163 |  | 
|---|
| 1164 | engine->resume = xcs_resume; | 
|---|
| 1165 | engine->sanitize = xcs_sanitize; | 
|---|
| 1166 |  | 
|---|
| 1167 | engine->reset.prepare = reset_prepare; | 
|---|
| 1168 | engine->reset.rewind = reset_rewind; | 
|---|
| 1169 | engine->reset.cancel = reset_cancel; | 
|---|
| 1170 | engine->reset.finish = reset_finish; | 
|---|
| 1171 |  | 
|---|
| 1172 | engine->add_active_request = add_to_engine; | 
|---|
| 1173 | engine->remove_active_request = remove_from_engine; | 
|---|
| 1174 |  | 
|---|
| 1175 | engine->cops = &ring_context_ops; | 
|---|
| 1176 | engine->request_alloc = ring_request_alloc; | 
|---|
| 1177 |  | 
|---|
| 1178 | /* | 
|---|
| 1179 | * Using a global execution timeline; the previous final breadcrumb is | 
|---|
| 1180 | * equivalent to our next initial bread so we can elide | 
|---|
| 1181 | * engine->emit_init_breadcrumb(). | 
|---|
| 1182 | */ | 
|---|
| 1183 | engine->emit_fini_breadcrumb = gen2_emit_breadcrumb; | 
|---|
| 1184 | if (GRAPHICS_VER(i915) == 5) | 
|---|
| 1185 | engine->emit_fini_breadcrumb = gen5_emit_breadcrumb; | 
|---|
| 1186 |  | 
|---|
| 1187 | engine->set_default_submission = i9xx_set_default_submission; | 
|---|
| 1188 |  | 
|---|
| 1189 | if (GRAPHICS_VER(i915) >= 6) | 
|---|
| 1190 | engine->emit_bb_start = gen6_emit_bb_start; | 
|---|
| 1191 | else if (GRAPHICS_VER(i915) >= 4) | 
|---|
| 1192 | engine->emit_bb_start = gen4_emit_bb_start; | 
|---|
| 1193 | else if (IS_I830(i915) || IS_I845G(i915)) | 
|---|
| 1194 | engine->emit_bb_start = i830_emit_bb_start; | 
|---|
| 1195 | else | 
|---|
| 1196 | engine->emit_bb_start = gen2_emit_bb_start; | 
|---|
| 1197 | } | 
|---|
| 1198 |  | 
|---|
| 1199 | static void setup_rcs(struct intel_engine_cs *engine) | 
|---|
| 1200 | { | 
|---|
| 1201 | struct drm_i915_private *i915 = engine->i915; | 
|---|
| 1202 |  | 
|---|
| 1203 | if (HAS_L3_DPF(i915)) | 
|---|
| 1204 | engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; | 
|---|
| 1205 |  | 
|---|
| 1206 | engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; | 
|---|
| 1207 |  | 
|---|
| 1208 | if (GRAPHICS_VER(i915) >= 7) { | 
|---|
| 1209 | engine->emit_flush = gen7_emit_flush_rcs; | 
|---|
| 1210 | engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_rcs; | 
|---|
| 1211 | } else if (GRAPHICS_VER(i915) == 6) { | 
|---|
| 1212 | engine->emit_flush = gen6_emit_flush_rcs; | 
|---|
| 1213 | engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_rcs; | 
|---|
| 1214 | } else if (GRAPHICS_VER(i915) == 5) { | 
|---|
| 1215 | engine->emit_flush = gen4_emit_flush_rcs; | 
|---|
| 1216 | } else { | 
|---|
| 1217 | if (GRAPHICS_VER(i915) < 4) | 
|---|
| 1218 | engine->emit_flush = gen2_emit_flush; | 
|---|
| 1219 | else | 
|---|
| 1220 | engine->emit_flush = gen4_emit_flush_rcs; | 
|---|
| 1221 | engine->irq_enable_mask = I915_USER_INTERRUPT; | 
|---|
| 1222 | } | 
|---|
| 1223 |  | 
|---|
| 1224 | if (IS_HASWELL(i915)) | 
|---|
| 1225 | engine->emit_bb_start = hsw_emit_bb_start; | 
|---|
| 1226 | } | 
|---|
| 1227 |  | 
|---|
| 1228 | static void setup_vcs(struct intel_engine_cs *engine) | 
|---|
| 1229 | { | 
|---|
| 1230 | struct drm_i915_private *i915 = engine->i915; | 
|---|
| 1231 |  | 
|---|
| 1232 | if (GRAPHICS_VER(i915) >= 6) { | 
|---|
| 1233 | /* gen6 bsd needs a special wa for tail updates */ | 
|---|
| 1234 | if (GRAPHICS_VER(i915) == 6) | 
|---|
| 1235 | engine->set_default_submission = gen6_bsd_set_default_submission; | 
|---|
| 1236 | engine->emit_flush = gen6_emit_flush_vcs; | 
|---|
| 1237 | engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; | 
|---|
| 1238 |  | 
|---|
| 1239 | if (GRAPHICS_VER(i915) == 6) | 
|---|
| 1240 | engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs; | 
|---|
| 1241 | else | 
|---|
| 1242 | engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; | 
|---|
| 1243 | } else { | 
|---|
| 1244 | engine->emit_flush = gen4_emit_flush_vcs; | 
|---|
| 1245 | if (GRAPHICS_VER(i915) == 5) | 
|---|
| 1246 | engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; | 
|---|
| 1247 | else | 
|---|
| 1248 | engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; | 
|---|
| 1249 | } | 
|---|
| 1250 | } | 
|---|
| 1251 |  | 
|---|
| 1252 | static void setup_bcs(struct intel_engine_cs *engine) | 
|---|
| 1253 | { | 
|---|
| 1254 | struct drm_i915_private *i915 = engine->i915; | 
|---|
| 1255 |  | 
|---|
| 1256 | engine->emit_flush = gen6_emit_flush_xcs; | 
|---|
| 1257 | engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; | 
|---|
| 1258 |  | 
|---|
| 1259 | if (GRAPHICS_VER(i915) == 6) | 
|---|
| 1260 | engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs; | 
|---|
| 1261 | else | 
|---|
| 1262 | engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; | 
|---|
| 1263 | } | 
|---|
| 1264 |  | 
|---|
| 1265 | static void setup_vecs(struct intel_engine_cs *engine) | 
|---|
| 1266 | { | 
|---|
| 1267 | struct drm_i915_private *i915 = engine->i915; | 
|---|
| 1268 |  | 
|---|
| 1269 | GEM_BUG_ON(GRAPHICS_VER(i915) < 7); | 
|---|
| 1270 |  | 
|---|
| 1271 | engine->emit_flush = gen6_emit_flush_xcs; | 
|---|
| 1272 | engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; | 
|---|
| 1273 | engine->irq_enable = hsw_irq_enable_vecs; | 
|---|
| 1274 | engine->irq_disable = hsw_irq_disable_vecs; | 
|---|
| 1275 |  | 
|---|
| 1276 | engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; | 
|---|
| 1277 | } | 
|---|
| 1278 |  | 
|---|
| 1279 | static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine, | 
|---|
| 1280 | struct i915_vma * const vma) | 
|---|
| 1281 | { | 
|---|
| 1282 | return gen7_setup_clear_gpr_bb(engine, vma); | 
|---|
| 1283 | } | 
|---|
| 1284 |  | 
|---|
| 1285 | static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine, | 
|---|
| 1286 | struct i915_gem_ww_ctx *ww, | 
|---|
| 1287 | struct i915_vma *vma) | 
|---|
| 1288 | { | 
|---|
| 1289 | int err; | 
|---|
| 1290 |  | 
|---|
| 1291 | err = i915_vma_pin_ww(vma, ww, size: 0, alignment: 0, PIN_USER | PIN_HIGH); | 
|---|
| 1292 | if (err) | 
|---|
| 1293 | return err; | 
|---|
| 1294 |  | 
|---|
| 1295 | err = i915_vma_sync(vma); | 
|---|
| 1296 | if (err) | 
|---|
| 1297 | goto err_unpin; | 
|---|
| 1298 |  | 
|---|
| 1299 | err = gen7_ctx_switch_bb_setup(engine, vma); | 
|---|
| 1300 | if (err) | 
|---|
| 1301 | goto err_unpin; | 
|---|
| 1302 |  | 
|---|
| 1303 | engine->wa_ctx.vma = vma; | 
|---|
| 1304 | return 0; | 
|---|
| 1305 |  | 
|---|
| 1306 | err_unpin: | 
|---|
| 1307 | i915_vma_unpin(vma); | 
|---|
| 1308 | return err; | 
|---|
| 1309 | } | 
|---|
| 1310 |  | 
|---|
| 1311 | static struct i915_vma *gen7_ctx_vma(struct intel_engine_cs *engine) | 
|---|
| 1312 | { | 
|---|
| 1313 | struct drm_i915_gem_object *obj; | 
|---|
| 1314 | struct i915_vma *vma; | 
|---|
| 1315 | int size, err; | 
|---|
| 1316 |  | 
|---|
| 1317 | if (GRAPHICS_VER(engine->i915) != 7 || engine->class != RENDER_CLASS) | 
|---|
| 1318 | return NULL; | 
|---|
| 1319 |  | 
|---|
| 1320 | err = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */); | 
|---|
| 1321 | if (err < 0) | 
|---|
| 1322 | return ERR_PTR(error: err); | 
|---|
| 1323 | if (!err) | 
|---|
| 1324 | return NULL; | 
|---|
| 1325 |  | 
|---|
| 1326 | size = ALIGN(err, PAGE_SIZE); | 
|---|
| 1327 |  | 
|---|
| 1328 | obj = i915_gem_object_create_internal(i915: engine->i915, size); | 
|---|
| 1329 | if (IS_ERR(ptr: obj)) | 
|---|
| 1330 | return ERR_CAST(ptr: obj); | 
|---|
| 1331 |  | 
|---|
| 1332 | vma = i915_vma_instance(obj, vm: engine->gt->vm, NULL); | 
|---|
| 1333 | if (IS_ERR(ptr: vma)) { | 
|---|
| 1334 | i915_gem_object_put(obj); | 
|---|
| 1335 | return ERR_CAST(ptr: vma); | 
|---|
| 1336 | } | 
|---|
| 1337 |  | 
|---|
| 1338 | vma->private = intel_context_create(engine); /* dummy residuals */ | 
|---|
| 1339 | if (IS_ERR(ptr: vma->private)) { | 
|---|
| 1340 | err = PTR_ERR(ptr: vma->private); | 
|---|
| 1341 | vma->private = NULL; | 
|---|
| 1342 | i915_gem_object_put(obj); | 
|---|
| 1343 | return ERR_PTR(error: err); | 
|---|
| 1344 | } | 
|---|
| 1345 |  | 
|---|
| 1346 | return vma; | 
|---|
| 1347 | } | 
|---|
| 1348 |  | 
|---|
| 1349 | int intel_ring_submission_setup(struct intel_engine_cs *engine) | 
|---|
| 1350 | { | 
|---|
| 1351 | struct i915_gem_ww_ctx ww; | 
|---|
| 1352 | struct intel_timeline *timeline; | 
|---|
| 1353 | struct intel_ring *ring; | 
|---|
| 1354 | struct i915_vma *gen7_wa_vma; | 
|---|
| 1355 | int err; | 
|---|
| 1356 |  | 
|---|
| 1357 | setup_common(engine); | 
|---|
| 1358 |  | 
|---|
| 1359 | switch (engine->class) { | 
|---|
| 1360 | case RENDER_CLASS: | 
|---|
| 1361 | setup_rcs(engine); | 
|---|
| 1362 | break; | 
|---|
| 1363 | case VIDEO_DECODE_CLASS: | 
|---|
| 1364 | setup_vcs(engine); | 
|---|
| 1365 | break; | 
|---|
| 1366 | case COPY_ENGINE_CLASS: | 
|---|
| 1367 | setup_bcs(engine); | 
|---|
| 1368 | break; | 
|---|
| 1369 | case VIDEO_ENHANCEMENT_CLASS: | 
|---|
| 1370 | setup_vecs(engine); | 
|---|
| 1371 | break; | 
|---|
| 1372 | default: | 
|---|
| 1373 | MISSING_CASE(engine->class); | 
|---|
| 1374 | return -ENODEV; | 
|---|
| 1375 | } | 
|---|
| 1376 |  | 
|---|
| 1377 | timeline = intel_timeline_create_from_engine(engine, | 
|---|
| 1378 | I915_GEM_HWS_SEQNO_ADDR); | 
|---|
| 1379 | if (IS_ERR(ptr: timeline)) { | 
|---|
| 1380 | err = PTR_ERR(ptr: timeline); | 
|---|
| 1381 | goto err; | 
|---|
| 1382 | } | 
|---|
| 1383 | GEM_BUG_ON(timeline->has_initial_breadcrumb); | 
|---|
| 1384 |  | 
|---|
| 1385 | ring = intel_engine_create_ring(engine, SZ_16K); | 
|---|
| 1386 | if (IS_ERR(ptr: ring)) { | 
|---|
| 1387 | err = PTR_ERR(ptr: ring); | 
|---|
| 1388 | goto err_timeline; | 
|---|
| 1389 | } | 
|---|
| 1390 |  | 
|---|
| 1391 | GEM_BUG_ON(engine->legacy.ring); | 
|---|
| 1392 | engine->legacy.ring = ring; | 
|---|
| 1393 | engine->legacy.timeline = timeline; | 
|---|
| 1394 |  | 
|---|
| 1395 | gen7_wa_vma = gen7_ctx_vma(engine); | 
|---|
| 1396 | if (IS_ERR(ptr: gen7_wa_vma)) { | 
|---|
| 1397 | err = PTR_ERR(ptr: gen7_wa_vma); | 
|---|
| 1398 | goto err_ring; | 
|---|
| 1399 | } | 
|---|
| 1400 |  | 
|---|
| 1401 | i915_gem_ww_ctx_init(ctx: &ww, intr: false); | 
|---|
| 1402 |  | 
|---|
| 1403 | retry: | 
|---|
| 1404 | err = i915_gem_object_lock(obj: timeline->hwsp_ggtt->obj, ww: &ww); | 
|---|
| 1405 | if (!err && gen7_wa_vma) | 
|---|
| 1406 | err = i915_gem_object_lock(obj: gen7_wa_vma->obj, ww: &ww); | 
|---|
| 1407 | if (!err) | 
|---|
| 1408 | err = i915_gem_object_lock(obj: engine->legacy.ring->vma->obj, ww: &ww); | 
|---|
| 1409 | if (!err) | 
|---|
| 1410 | err = intel_timeline_pin(tl: timeline, ww: &ww); | 
|---|
| 1411 | if (!err) { | 
|---|
| 1412 | err = intel_ring_pin(ring, ww: &ww); | 
|---|
| 1413 | if (err) | 
|---|
| 1414 | intel_timeline_unpin(tl: timeline); | 
|---|
| 1415 | } | 
|---|
| 1416 | if (err) | 
|---|
| 1417 | goto out; | 
|---|
| 1418 |  | 
|---|
| 1419 | GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); | 
|---|
| 1420 |  | 
|---|
| 1421 | if (gen7_wa_vma) { | 
|---|
| 1422 | err = gen7_ctx_switch_bb_init(engine, ww: &ww, vma: gen7_wa_vma); | 
|---|
| 1423 | if (err) { | 
|---|
| 1424 | intel_ring_unpin(ring); | 
|---|
| 1425 | intel_timeline_unpin(tl: timeline); | 
|---|
| 1426 | } | 
|---|
| 1427 | } | 
|---|
| 1428 |  | 
|---|
| 1429 | out: | 
|---|
| 1430 | if (err == -EDEADLK) { | 
|---|
| 1431 | err = i915_gem_ww_ctx_backoff(ctx: &ww); | 
|---|
| 1432 | if (!err) | 
|---|
| 1433 | goto retry; | 
|---|
| 1434 | } | 
|---|
| 1435 | i915_gem_ww_ctx_fini(ctx: &ww); | 
|---|
| 1436 | if (err) | 
|---|
| 1437 | goto err_gen7_put; | 
|---|
| 1438 |  | 
|---|
| 1439 | /* Finally, take ownership and responsibility for cleanup! */ | 
|---|
| 1440 | engine->release = ring_release; | 
|---|
| 1441 |  | 
|---|
| 1442 | return 0; | 
|---|
| 1443 |  | 
|---|
| 1444 | err_gen7_put: | 
|---|
| 1445 | if (gen7_wa_vma) { | 
|---|
| 1446 | intel_context_put(ce: gen7_wa_vma->private); | 
|---|
| 1447 | i915_gem_object_put(obj: gen7_wa_vma->obj); | 
|---|
| 1448 | } | 
|---|
| 1449 | err_ring: | 
|---|
| 1450 | intel_ring_put(ring); | 
|---|
| 1451 | err_timeline: | 
|---|
| 1452 | intel_timeline_put(timeline); | 
|---|
| 1453 | err: | 
|---|
| 1454 | intel_engine_cleanup_common(engine); | 
|---|
| 1455 | return err; | 
|---|
| 1456 | } | 
|---|
| 1457 |  | 
|---|
| 1458 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) | 
|---|
| 1459 | #include "selftest_ring_submission.c" | 
|---|
| 1460 | #endif | 
|---|
| 1461 |  | 
|---|