| 1 | // SPDX-License-Identifier: MIT | 
|---|
| 2 | /* | 
|---|
| 3 | * Copyright © 2014 Intel Corporation | 
|---|
| 4 | */ | 
|---|
| 5 |  | 
|---|
| 6 | #include "gen8_engine_cs.h" | 
|---|
| 7 | #include "intel_engine_regs.h" | 
|---|
| 8 | #include "intel_gpu_commands.h" | 
|---|
| 9 | #include "intel_gt.h" | 
|---|
| 10 | #include "intel_lrc.h" | 
|---|
| 11 | #include "intel_ring.h" | 
|---|
| 12 |  | 
|---|
| 13 | int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode) | 
|---|
| 14 | { | 
|---|
| 15 | bool vf_flush_wa = false, dc_flush_wa = false; | 
|---|
| 16 | u32 *cs, flags = 0; | 
|---|
| 17 | int len; | 
|---|
| 18 |  | 
|---|
| 19 | flags |= PIPE_CONTROL_CS_STALL; | 
|---|
| 20 |  | 
|---|
| 21 | if (mode & EMIT_FLUSH) { | 
|---|
| 22 | flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; | 
|---|
| 23 | flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; | 
|---|
| 24 | flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; | 
|---|
| 25 | flags |= PIPE_CONTROL_FLUSH_ENABLE; | 
|---|
| 26 | } | 
|---|
| 27 |  | 
|---|
| 28 | if (mode & EMIT_INVALIDATE) { | 
|---|
| 29 | flags |= PIPE_CONTROL_TLB_INVALIDATE; | 
|---|
| 30 | flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; | 
|---|
| 31 | flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; | 
|---|
| 32 | flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; | 
|---|
| 33 | flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; | 
|---|
| 34 | flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; | 
|---|
| 35 | flags |= PIPE_CONTROL_QW_WRITE; | 
|---|
| 36 | flags |= PIPE_CONTROL_STORE_DATA_INDEX; | 
|---|
| 37 |  | 
|---|
| 38 | /* | 
|---|
| 39 | * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL | 
|---|
| 40 | * pipe control. | 
|---|
| 41 | */ | 
|---|
| 42 | if (GRAPHICS_VER(rq->i915) == 9) | 
|---|
| 43 | vf_flush_wa = true; | 
|---|
| 44 |  | 
|---|
| 45 | /* WaForGAMHang:kbl */ | 
|---|
| 46 | if (IS_KABYLAKE(rq->i915) && IS_GRAPHICS_STEP(rq->i915, 0, STEP_C0)) | 
|---|
| 47 | dc_flush_wa = true; | 
|---|
| 48 | } | 
|---|
| 49 |  | 
|---|
| 50 | len = 6; | 
|---|
| 51 |  | 
|---|
| 52 | if (vf_flush_wa) | 
|---|
| 53 | len += 6; | 
|---|
| 54 |  | 
|---|
| 55 | if (dc_flush_wa) | 
|---|
| 56 | len += 12; | 
|---|
| 57 |  | 
|---|
| 58 | cs = intel_ring_begin(rq, num_dwords: len); | 
|---|
| 59 | if (IS_ERR(ptr: cs)) | 
|---|
| 60 | return PTR_ERR(ptr: cs); | 
|---|
| 61 |  | 
|---|
| 62 | if (vf_flush_wa) | 
|---|
| 63 | cs = gen8_emit_pipe_control(batch: cs, bit_group_1: 0, offset: 0); | 
|---|
| 64 |  | 
|---|
| 65 | if (dc_flush_wa) | 
|---|
| 66 | cs = gen8_emit_pipe_control(batch: cs, PIPE_CONTROL_DC_FLUSH_ENABLE, | 
|---|
| 67 | offset: 0); | 
|---|
| 68 |  | 
|---|
| 69 | cs = gen8_emit_pipe_control(batch: cs, bit_group_1: flags, LRC_PPHWSP_SCRATCH_ADDR); | 
|---|
| 70 |  | 
|---|
| 71 | if (dc_flush_wa) | 
|---|
| 72 | cs = gen8_emit_pipe_control(batch: cs, PIPE_CONTROL_CS_STALL, offset: 0); | 
|---|
| 73 |  | 
|---|
| 74 | intel_ring_advance(rq, cs); | 
|---|
| 75 |  | 
|---|
| 76 | return 0; | 
|---|
| 77 | } | 
|---|
| 78 |  | 
|---|
| 79 | int gen8_emit_flush_xcs(struct i915_request *rq, u32 mode) | 
|---|
| 80 | { | 
|---|
| 81 | u32 cmd, *cs; | 
|---|
| 82 |  | 
|---|
| 83 | cs = intel_ring_begin(rq, num_dwords: 4); | 
|---|
| 84 | if (IS_ERR(ptr: cs)) | 
|---|
| 85 | return PTR_ERR(ptr: cs); | 
|---|
| 86 |  | 
|---|
| 87 | cmd = MI_FLUSH_DW + 1; | 
|---|
| 88 |  | 
|---|
| 89 | /* | 
|---|
| 90 | * We always require a command barrier so that subsequent | 
|---|
| 91 | * commands, such as breadcrumb interrupts, are strictly ordered | 
|---|
| 92 | * wrt the contents of the write cache being flushed to memory | 
|---|
| 93 | * (and thus being coherent from the CPU). | 
|---|
| 94 | */ | 
|---|
| 95 | cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; | 
|---|
| 96 |  | 
|---|
| 97 | if (mode & EMIT_INVALIDATE) { | 
|---|
| 98 | cmd |= MI_INVALIDATE_TLB; | 
|---|
| 99 | if (rq->engine->class == VIDEO_DECODE_CLASS) | 
|---|
| 100 | cmd |= MI_INVALIDATE_BSD; | 
|---|
| 101 | } | 
|---|
| 102 |  | 
|---|
| 103 | *cs++ = cmd; | 
|---|
| 104 | *cs++ = LRC_PPHWSP_SCRATCH_ADDR; | 
|---|
| 105 | *cs++ = 0; /* upper addr */ | 
|---|
| 106 | *cs++ = 0; /* value */ | 
|---|
| 107 | intel_ring_advance(rq, cs); | 
|---|
| 108 |  | 
|---|
| 109 | return 0; | 
|---|
| 110 | } | 
|---|
| 111 |  | 
|---|
| 112 | int gen11_emit_flush_rcs(struct i915_request *rq, u32 mode) | 
|---|
| 113 | { | 
|---|
| 114 | if (mode & EMIT_FLUSH) { | 
|---|
| 115 | u32 *cs; | 
|---|
| 116 | u32 flags = 0; | 
|---|
| 117 |  | 
|---|
| 118 | flags |= PIPE_CONTROL_CS_STALL; | 
|---|
| 119 |  | 
|---|
| 120 | flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; | 
|---|
| 121 | flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; | 
|---|
| 122 | flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; | 
|---|
| 123 | flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; | 
|---|
| 124 | flags |= PIPE_CONTROL_FLUSH_ENABLE; | 
|---|
| 125 | flags |= PIPE_CONTROL_QW_WRITE; | 
|---|
| 126 | flags |= PIPE_CONTROL_STORE_DATA_INDEX; | 
|---|
| 127 |  | 
|---|
| 128 | cs = intel_ring_begin(rq, num_dwords: 6); | 
|---|
| 129 | if (IS_ERR(ptr: cs)) | 
|---|
| 130 | return PTR_ERR(ptr: cs); | 
|---|
| 131 |  | 
|---|
| 132 | cs = gen8_emit_pipe_control(batch: cs, bit_group_1: flags, LRC_PPHWSP_SCRATCH_ADDR); | 
|---|
| 133 | intel_ring_advance(rq, cs); | 
|---|
| 134 | } | 
|---|
| 135 |  | 
|---|
| 136 | if (mode & EMIT_INVALIDATE) { | 
|---|
| 137 | u32 *cs; | 
|---|
| 138 | u32 flags = 0; | 
|---|
| 139 |  | 
|---|
| 140 | flags |= PIPE_CONTROL_CS_STALL; | 
|---|
| 141 |  | 
|---|
| 142 | flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; | 
|---|
| 143 | flags |= PIPE_CONTROL_TLB_INVALIDATE; | 
|---|
| 144 | flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; | 
|---|
| 145 | flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; | 
|---|
| 146 | flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; | 
|---|
| 147 | flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; | 
|---|
| 148 | flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; | 
|---|
| 149 | flags |= PIPE_CONTROL_QW_WRITE; | 
|---|
| 150 | flags |= PIPE_CONTROL_STORE_DATA_INDEX; | 
|---|
| 151 |  | 
|---|
| 152 | cs = intel_ring_begin(rq, num_dwords: 6); | 
|---|
| 153 | if (IS_ERR(ptr: cs)) | 
|---|
| 154 | return PTR_ERR(ptr: cs); | 
|---|
| 155 |  | 
|---|
| 156 | cs = gen8_emit_pipe_control(batch: cs, bit_group_1: flags, LRC_PPHWSP_SCRATCH_ADDR); | 
|---|
| 157 | intel_ring_advance(rq, cs); | 
|---|
| 158 | } | 
|---|
| 159 |  | 
|---|
| 160 | return 0; | 
|---|
| 161 | } | 
|---|
| 162 |  | 
|---|
| 163 | static u32 preparser_disable(bool state) | 
|---|
| 164 | { | 
|---|
| 165 | return MI_ARB_CHECK | 1 << 8 | state; | 
|---|
| 166 | } | 
|---|
| 167 |  | 
|---|
| 168 | static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine) | 
|---|
| 169 | { | 
|---|
| 170 | switch (engine->id) { | 
|---|
| 171 | case RCS0: | 
|---|
| 172 | return GEN12_CCS_AUX_INV; | 
|---|
| 173 | case BCS0: | 
|---|
| 174 | return GEN12_BCS0_AUX_INV; | 
|---|
| 175 | case VCS0: | 
|---|
| 176 | return GEN12_VD0_AUX_INV; | 
|---|
| 177 | case VCS2: | 
|---|
| 178 | return GEN12_VD2_AUX_INV; | 
|---|
| 179 | case VECS0: | 
|---|
| 180 | return GEN12_VE0_AUX_INV; | 
|---|
| 181 | case CCS0: | 
|---|
| 182 | return GEN12_CCS0_AUX_INV; | 
|---|
| 183 | default: | 
|---|
| 184 | return INVALID_MMIO_REG; | 
|---|
| 185 | } | 
|---|
| 186 | } | 
|---|
| 187 |  | 
|---|
| 188 | static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine) | 
|---|
| 189 | { | 
|---|
| 190 | i915_reg_t reg = gen12_get_aux_inv_reg(engine); | 
|---|
| 191 |  | 
|---|
| 192 | /* | 
|---|
| 193 | * So far platforms supported by i915 having flat ccs do not require | 
|---|
| 194 | * AUX invalidation. Check also whether the engine requires it. | 
|---|
| 195 | */ | 
|---|
| 196 | return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915); | 
|---|
| 197 | } | 
|---|
| 198 |  | 
|---|
| 199 | u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) | 
|---|
| 200 | { | 
|---|
| 201 | i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine); | 
|---|
| 202 | u32 gsi_offset = engine->gt->uncore->gsi_offset; | 
|---|
| 203 |  | 
|---|
| 204 | if (!gen12_needs_ccs_aux_inv(engine)) | 
|---|
| 205 | return cs; | 
|---|
| 206 |  | 
|---|
| 207 | *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; | 
|---|
| 208 | *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; | 
|---|
| 209 | *cs++ = AUX_INV; | 
|---|
| 210 |  | 
|---|
| 211 | *cs++ = MI_SEMAPHORE_WAIT_TOKEN | | 
|---|
| 212 | MI_SEMAPHORE_REGISTER_POLL | | 
|---|
| 213 | MI_SEMAPHORE_POLL | | 
|---|
| 214 | MI_SEMAPHORE_SAD_EQ_SDD; | 
|---|
| 215 | *cs++ = 0; | 
|---|
| 216 | *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; | 
|---|
| 217 | *cs++ = 0; | 
|---|
| 218 | *cs++ = 0; | 
|---|
| 219 |  | 
|---|
| 220 | return cs; | 
|---|
| 221 | } | 
|---|
| 222 |  | 
|---|
| 223 | static int mtl_dummy_pipe_control(struct i915_request *rq) | 
|---|
| 224 | { | 
|---|
| 225 | /* Wa_14016712196 */ | 
|---|
| 226 | if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) || | 
|---|
| 227 | IS_DG2(rq->i915)) { | 
|---|
| 228 | u32 *cs; | 
|---|
| 229 |  | 
|---|
| 230 | /* dummy PIPE_CONTROL + depth flush */ | 
|---|
| 231 | cs = intel_ring_begin(rq, num_dwords: 6); | 
|---|
| 232 | if (IS_ERR(ptr: cs)) | 
|---|
| 233 | return PTR_ERR(ptr: cs); | 
|---|
| 234 | cs = gen12_emit_pipe_control(batch: cs, | 
|---|
| 235 | bit_group_0: 0, | 
|---|
| 236 | PIPE_CONTROL_DEPTH_CACHE_FLUSH, | 
|---|
| 237 | LRC_PPHWSP_SCRATCH_ADDR); | 
|---|
| 238 | intel_ring_advance(rq, cs); | 
|---|
| 239 | } | 
|---|
| 240 |  | 
|---|
| 241 | return 0; | 
|---|
| 242 | } | 
|---|
| 243 |  | 
|---|
| 244 | int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) | 
|---|
| 245 | { | 
|---|
| 246 | struct intel_engine_cs *engine = rq->engine; | 
|---|
| 247 |  | 
|---|
| 248 | /* | 
|---|
| 249 | * On Aux CCS platforms the invalidation of the Aux | 
|---|
| 250 | * table requires quiescing memory traffic beforehand | 
|---|
| 251 | */ | 
|---|
| 252 | if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) { | 
|---|
| 253 | u32 bit_group_0 = 0; | 
|---|
| 254 | u32 bit_group_1 = 0; | 
|---|
| 255 | int err; | 
|---|
| 256 | u32 *cs; | 
|---|
| 257 |  | 
|---|
| 258 | err = mtl_dummy_pipe_control(rq); | 
|---|
| 259 | if (err) | 
|---|
| 260 | return err; | 
|---|
| 261 |  | 
|---|
| 262 | bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH; | 
|---|
| 263 |  | 
|---|
| 264 | /* | 
|---|
| 265 | * When required, in MTL and beyond platforms we | 
|---|
| 266 | * need to set the CCS_FLUSH bit in the pipe control | 
|---|
| 267 | */ | 
|---|
| 268 | if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70)) | 
|---|
| 269 | bit_group_0 |= PIPE_CONTROL_CCS_FLUSH; | 
|---|
| 270 |  | 
|---|
| 271 | /* | 
|---|
| 272 | * L3 fabric flush is needed for AUX CCS invalidation | 
|---|
| 273 | * which happens as part of pipe-control so we can | 
|---|
| 274 | * ignore PIPE_CONTROL_FLUSH_L3. Also PIPE_CONTROL_FLUSH_L3 | 
|---|
| 275 | * deals with Protected Memory which is not needed for | 
|---|
| 276 | * AUX CCS invalidation and lead to unwanted side effects. | 
|---|
| 277 | */ | 
|---|
| 278 | if ((mode & EMIT_FLUSH) && | 
|---|
| 279 | GRAPHICS_VER_FULL(rq->i915) < IP_VER(12, 70)) | 
|---|
| 280 | bit_group_1 |= PIPE_CONTROL_FLUSH_L3; | 
|---|
| 281 |  | 
|---|
| 282 | bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH; | 
|---|
| 283 | bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; | 
|---|
| 284 | bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; | 
|---|
| 285 | /* Wa_1409600907:tgl,adl-p */ | 
|---|
| 286 | bit_group_1 |= PIPE_CONTROL_DEPTH_STALL; | 
|---|
| 287 | bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE; | 
|---|
| 288 | bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE; | 
|---|
| 289 |  | 
|---|
| 290 | bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX; | 
|---|
| 291 | bit_group_1 |= PIPE_CONTROL_QW_WRITE; | 
|---|
| 292 |  | 
|---|
| 293 | bit_group_1 |= PIPE_CONTROL_CS_STALL; | 
|---|
| 294 |  | 
|---|
| 295 | if (!HAS_3D_PIPELINE(engine->i915)) | 
|---|
| 296 | bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS; | 
|---|
| 297 | else if (engine->class == COMPUTE_CLASS) | 
|---|
| 298 | bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; | 
|---|
| 299 |  | 
|---|
| 300 | cs = intel_ring_begin(rq, num_dwords: 6); | 
|---|
| 301 | if (IS_ERR(ptr: cs)) | 
|---|
| 302 | return PTR_ERR(ptr: cs); | 
|---|
| 303 |  | 
|---|
| 304 | cs = gen12_emit_pipe_control(batch: cs, bit_group_0, bit_group_1, | 
|---|
| 305 | LRC_PPHWSP_SCRATCH_ADDR); | 
|---|
| 306 | intel_ring_advance(rq, cs); | 
|---|
| 307 | } | 
|---|
| 308 |  | 
|---|
| 309 | if (mode & EMIT_INVALIDATE) { | 
|---|
| 310 | u32 flags = 0; | 
|---|
| 311 | u32 *cs, count; | 
|---|
| 312 | int err; | 
|---|
| 313 |  | 
|---|
| 314 | err = mtl_dummy_pipe_control(rq); | 
|---|
| 315 | if (err) | 
|---|
| 316 | return err; | 
|---|
| 317 |  | 
|---|
| 318 | flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; | 
|---|
| 319 | flags |= PIPE_CONTROL_TLB_INVALIDATE; | 
|---|
| 320 | flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; | 
|---|
| 321 | flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; | 
|---|
| 322 | flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; | 
|---|
| 323 | flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; | 
|---|
| 324 | flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; | 
|---|
| 325 |  | 
|---|
| 326 | flags |= PIPE_CONTROL_STORE_DATA_INDEX; | 
|---|
| 327 | flags |= PIPE_CONTROL_QW_WRITE; | 
|---|
| 328 |  | 
|---|
| 329 | flags |= PIPE_CONTROL_CS_STALL; | 
|---|
| 330 |  | 
|---|
| 331 | if (!HAS_3D_PIPELINE(engine->i915)) | 
|---|
| 332 | flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; | 
|---|
| 333 | else if (engine->class == COMPUTE_CLASS) | 
|---|
| 334 | flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; | 
|---|
| 335 |  | 
|---|
| 336 | count = 8; | 
|---|
| 337 | if (gen12_needs_ccs_aux_inv(engine: rq->engine)) | 
|---|
| 338 | count += 8; | 
|---|
| 339 |  | 
|---|
| 340 | cs = intel_ring_begin(rq, num_dwords: count); | 
|---|
| 341 | if (IS_ERR(ptr: cs)) | 
|---|
| 342 | return PTR_ERR(ptr: cs); | 
|---|
| 343 |  | 
|---|
| 344 | /* | 
|---|
| 345 | * Prevent the pre-parser from skipping past the TLB | 
|---|
| 346 | * invalidate and loading a stale page for the batch | 
|---|
| 347 | * buffer / request payload. | 
|---|
| 348 | */ | 
|---|
| 349 | *cs++ = preparser_disable(state: true); | 
|---|
| 350 |  | 
|---|
| 351 | cs = gen8_emit_pipe_control(batch: cs, bit_group_1: flags, LRC_PPHWSP_SCRATCH_ADDR); | 
|---|
| 352 |  | 
|---|
| 353 | cs = gen12_emit_aux_table_inv(engine, cs); | 
|---|
| 354 |  | 
|---|
| 355 | *cs++ = preparser_disable(state: false); | 
|---|
| 356 | intel_ring_advance(rq, cs); | 
|---|
| 357 | } | 
|---|
| 358 |  | 
|---|
| 359 | return 0; | 
|---|
| 360 | } | 
|---|
| 361 |  | 
|---|
| 362 | int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) | 
|---|
| 363 | { | 
|---|
| 364 | u32 cmd = 4; | 
|---|
| 365 | u32 *cs; | 
|---|
| 366 |  | 
|---|
| 367 | if (mode & EMIT_INVALIDATE) { | 
|---|
| 368 | cmd += 2; | 
|---|
| 369 |  | 
|---|
| 370 | if (gen12_needs_ccs_aux_inv(engine: rq->engine)) | 
|---|
| 371 | cmd += 8; | 
|---|
| 372 | } | 
|---|
| 373 |  | 
|---|
| 374 | cs = intel_ring_begin(rq, num_dwords: cmd); | 
|---|
| 375 | if (IS_ERR(ptr: cs)) | 
|---|
| 376 | return PTR_ERR(ptr: cs); | 
|---|
| 377 |  | 
|---|
| 378 | if (mode & EMIT_INVALIDATE) | 
|---|
| 379 | *cs++ = preparser_disable(state: true); | 
|---|
| 380 |  | 
|---|
| 381 | cmd = MI_FLUSH_DW + 1; | 
|---|
| 382 |  | 
|---|
| 383 | /* | 
|---|
| 384 | * We always require a command barrier so that subsequent | 
|---|
| 385 | * commands, such as breadcrumb interrupts, are strictly ordered | 
|---|
| 386 | * wrt the contents of the write cache being flushed to memory | 
|---|
| 387 | * (and thus being coherent from the CPU). | 
|---|
| 388 | */ | 
|---|
| 389 | cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; | 
|---|
| 390 |  | 
|---|
| 391 | if (mode & EMIT_INVALIDATE) { | 
|---|
| 392 | cmd |= MI_INVALIDATE_TLB; | 
|---|
| 393 | if (rq->engine->class == VIDEO_DECODE_CLASS) | 
|---|
| 394 | cmd |= MI_INVALIDATE_BSD; | 
|---|
| 395 |  | 
|---|
| 396 | if (gen12_needs_ccs_aux_inv(engine: rq->engine) && | 
|---|
| 397 | rq->engine->class == COPY_ENGINE_CLASS) | 
|---|
| 398 | cmd |= MI_FLUSH_DW_CCS; | 
|---|
| 399 | } | 
|---|
| 400 |  | 
|---|
| 401 | *cs++ = cmd; | 
|---|
| 402 | *cs++ = LRC_PPHWSP_SCRATCH_ADDR; | 
|---|
| 403 | *cs++ = 0; /* upper addr */ | 
|---|
| 404 | *cs++ = 0; /* value */ | 
|---|
| 405 |  | 
|---|
| 406 | cs = gen12_emit_aux_table_inv(engine: rq->engine, cs); | 
|---|
| 407 |  | 
|---|
| 408 | if (mode & EMIT_INVALIDATE) | 
|---|
| 409 | *cs++ = preparser_disable(state: false); | 
|---|
| 410 |  | 
|---|
| 411 | intel_ring_advance(rq, cs); | 
|---|
| 412 |  | 
|---|
| 413 | return 0; | 
|---|
| 414 | } | 
|---|
| 415 |  | 
|---|
| 416 | static u32 preempt_address(struct intel_engine_cs *engine) | 
|---|
| 417 | { | 
|---|
| 418 | return (i915_ggtt_offset(vma: engine->status_page.vma) + | 
|---|
| 419 | I915_GEM_HWS_PREEMPT_ADDR); | 
|---|
| 420 | } | 
|---|
| 421 |  | 
|---|
| 422 | static u32 hwsp_offset(const struct i915_request *rq) | 
|---|
| 423 | { | 
|---|
| 424 | const struct intel_timeline *tl; | 
|---|
| 425 |  | 
|---|
| 426 | /* Before the request is executed, the timeline is fixed */ | 
|---|
| 427 | tl = rcu_dereference_protected(rq->timeline, | 
|---|
| 428 | !i915_request_signaled(rq)); | 
|---|
| 429 |  | 
|---|
| 430 | /* See the comment in i915_request_active_seqno(). */ | 
|---|
| 431 | return page_mask_bits(tl->hwsp_offset) + offset_in_page(rq->hwsp_seqno); | 
|---|
| 432 | } | 
|---|
| 433 |  | 
|---|
| 434 | int gen8_emit_init_breadcrumb(struct i915_request *rq) | 
|---|
| 435 | { | 
|---|
| 436 | u32 *cs; | 
|---|
| 437 |  | 
|---|
| 438 | GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq)); | 
|---|
| 439 | if (!i915_request_timeline(rq)->has_initial_breadcrumb) | 
|---|
| 440 | return 0; | 
|---|
| 441 |  | 
|---|
| 442 | cs = intel_ring_begin(rq, num_dwords: 6); | 
|---|
| 443 | if (IS_ERR(ptr: cs)) | 
|---|
| 444 | return PTR_ERR(ptr: cs); | 
|---|
| 445 |  | 
|---|
| 446 | *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; | 
|---|
| 447 | *cs++ = hwsp_offset(rq); | 
|---|
| 448 | *cs++ = 0; | 
|---|
| 449 | *cs++ = rq->fence.seqno - 1; | 
|---|
| 450 |  | 
|---|
| 451 | /* | 
|---|
| 452 | * Check if we have been preempted before we even get started. | 
|---|
| 453 | * | 
|---|
| 454 | * After this point i915_request_started() reports true, even if | 
|---|
| 455 | * we get preempted and so are no longer running. | 
|---|
| 456 | * | 
|---|
| 457 | * i915_request_started() is used during preemption processing | 
|---|
| 458 | * to decide if the request is currently inside the user payload | 
|---|
| 459 | * or spinning on a kernel semaphore (or earlier). For no-preemption | 
|---|
| 460 | * requests, we do allow preemption on the semaphore before the user | 
|---|
| 461 | * payload, but do not allow preemption once the request is started. | 
|---|
| 462 | * | 
|---|
| 463 | * i915_request_started() is similarly used during GPU hangs to | 
|---|
| 464 | * determine if the user's payload was guilty, and if so, the | 
|---|
| 465 | * request is banned. Before the request is started, it is assumed | 
|---|
| 466 | * to be unharmed and an innocent victim of another's hang. | 
|---|
| 467 | */ | 
|---|
| 468 | *cs++ = MI_NOOP; | 
|---|
| 469 | *cs++ = MI_ARB_CHECK; | 
|---|
| 470 |  | 
|---|
| 471 | intel_ring_advance(rq, cs); | 
|---|
| 472 |  | 
|---|
| 473 | /* Record the updated position of the request's payload */ | 
|---|
| 474 | rq->infix = intel_ring_offset(rq, addr: cs); | 
|---|
| 475 |  | 
|---|
| 476 | __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags); | 
|---|
| 477 |  | 
|---|
| 478 | return 0; | 
|---|
| 479 | } | 
|---|
| 480 |  | 
|---|
| 481 | static int __xehp_emit_bb_start(struct i915_request *rq, | 
|---|
| 482 | u64 offset, u32 len, | 
|---|
| 483 | const unsigned int flags, | 
|---|
| 484 | u32 arb) | 
|---|
| 485 | { | 
|---|
| 486 | struct intel_context *ce = rq->context; | 
|---|
| 487 | u32 wa_offset = lrc_indirect_bb(ce); | 
|---|
| 488 | u32 *cs; | 
|---|
| 489 |  | 
|---|
| 490 | GEM_BUG_ON(!ce->wa_bb_page); | 
|---|
| 491 |  | 
|---|
| 492 | cs = intel_ring_begin(rq, num_dwords: 12); | 
|---|
| 493 | if (IS_ERR(ptr: cs)) | 
|---|
| 494 | return PTR_ERR(ptr: cs); | 
|---|
| 495 |  | 
|---|
| 496 | *cs++ = MI_ARB_ON_OFF | arb; | 
|---|
| 497 |  | 
|---|
| 498 | *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | | 
|---|
| 499 | MI_SRM_LRM_GLOBAL_GTT | | 
|---|
| 500 | MI_LRI_LRM_CS_MMIO; | 
|---|
| 501 | *cs++ = i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)); | 
|---|
| 502 | *cs++ = wa_offset + DG2_PREDICATE_RESULT_WA; | 
|---|
| 503 | *cs++ = 0; | 
|---|
| 504 |  | 
|---|
| 505 | *cs++ = MI_BATCH_BUFFER_START_GEN8 | | 
|---|
| 506 | (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); | 
|---|
| 507 | *cs++ = lower_32_bits(offset); | 
|---|
| 508 | *cs++ = upper_32_bits(offset); | 
|---|
| 509 |  | 
|---|
| 510 | /* Fixup stray MI_SET_PREDICATE as it prevents us executing the ring */ | 
|---|
| 511 | *cs++ = MI_BATCH_BUFFER_START_GEN8; | 
|---|
| 512 | *cs++ = wa_offset + DG2_PREDICATE_RESULT_BB; | 
|---|
| 513 | *cs++ = 0; | 
|---|
| 514 |  | 
|---|
| 515 | *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; | 
|---|
| 516 |  | 
|---|
| 517 | intel_ring_advance(rq, cs); | 
|---|
| 518 |  | 
|---|
| 519 | return 0; | 
|---|
| 520 | } | 
|---|
| 521 |  | 
|---|
| 522 | int xehp_emit_bb_start_noarb(struct i915_request *rq, | 
|---|
| 523 | u64 offset, u32 len, | 
|---|
| 524 | const unsigned int flags) | 
|---|
| 525 | { | 
|---|
| 526 | return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE); | 
|---|
| 527 | } | 
|---|
| 528 |  | 
|---|
| 529 | int xehp_emit_bb_start(struct i915_request *rq, | 
|---|
| 530 | u64 offset, u32 len, | 
|---|
| 531 | const unsigned int flags) | 
|---|
| 532 | { | 
|---|
| 533 | return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE); | 
|---|
| 534 | } | 
|---|
| 535 |  | 
|---|
| 536 | int gen8_emit_bb_start_noarb(struct i915_request *rq, | 
|---|
| 537 | u64 offset, u32 len, | 
|---|
| 538 | const unsigned int flags) | 
|---|
| 539 | { | 
|---|
| 540 | u32 *cs; | 
|---|
| 541 |  | 
|---|
| 542 | cs = intel_ring_begin(rq, num_dwords: 4); | 
|---|
| 543 | if (IS_ERR(ptr: cs)) | 
|---|
| 544 | return PTR_ERR(ptr: cs); | 
|---|
| 545 |  | 
|---|
| 546 | /* | 
|---|
| 547 | * WaDisableCtxRestoreArbitration:bdw,chv | 
|---|
| 548 | * | 
|---|
| 549 | * We don't need to perform MI_ARB_ENABLE as often as we do (in | 
|---|
| 550 | * particular all the gen that do not need the w/a at all!), if we | 
|---|
| 551 | * took care to make sure that on every switch into this context | 
|---|
| 552 | * (both ordinary and for preemption) that arbitrartion was enabled | 
|---|
| 553 | * we would be fine.  However, for gen8 there is another w/a that | 
|---|
| 554 | * requires us to not preempt inside GPGPU execution, so we keep | 
|---|
| 555 | * arbitration disabled for gen8 batches. Arbitration will be | 
|---|
| 556 | * re-enabled before we close the request | 
|---|
| 557 | * (engine->emit_fini_breadcrumb). | 
|---|
| 558 | */ | 
|---|
| 559 | *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; | 
|---|
| 560 |  | 
|---|
| 561 | /* FIXME(BDW+): Address space and security selectors. */ | 
|---|
| 562 | *cs++ = MI_BATCH_BUFFER_START_GEN8 | | 
|---|
| 563 | (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); | 
|---|
| 564 | *cs++ = lower_32_bits(offset); | 
|---|
| 565 | *cs++ = upper_32_bits(offset); | 
|---|
| 566 |  | 
|---|
| 567 | intel_ring_advance(rq, cs); | 
|---|
| 568 |  | 
|---|
| 569 | return 0; | 
|---|
| 570 | } | 
|---|
| 571 |  | 
|---|
| 572 | int gen8_emit_bb_start(struct i915_request *rq, | 
|---|
| 573 | u64 offset, u32 len, | 
|---|
| 574 | const unsigned int flags) | 
|---|
| 575 | { | 
|---|
| 576 | u32 *cs; | 
|---|
| 577 |  | 
|---|
| 578 | if (unlikely(i915_request_has_nopreempt(rq))) | 
|---|
| 579 | return gen8_emit_bb_start_noarb(rq, offset, len, flags); | 
|---|
| 580 |  | 
|---|
| 581 | cs = intel_ring_begin(rq, num_dwords: 6); | 
|---|
| 582 | if (IS_ERR(ptr: cs)) | 
|---|
| 583 | return PTR_ERR(ptr: cs); | 
|---|
| 584 |  | 
|---|
| 585 | *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; | 
|---|
| 586 |  | 
|---|
| 587 | *cs++ = MI_BATCH_BUFFER_START_GEN8 | | 
|---|
| 588 | (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); | 
|---|
| 589 | *cs++ = lower_32_bits(offset); | 
|---|
| 590 | *cs++ = upper_32_bits(offset); | 
|---|
| 591 |  | 
|---|
| 592 | *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; | 
|---|
| 593 | *cs++ = MI_NOOP; | 
|---|
| 594 |  | 
|---|
| 595 | intel_ring_advance(rq, cs); | 
|---|
| 596 |  | 
|---|
| 597 | return 0; | 
|---|
| 598 | } | 
|---|
| 599 |  | 
|---|
| 600 | static void assert_request_valid(struct i915_request *rq) | 
|---|
| 601 | { | 
|---|
| 602 | struct intel_ring *ring __maybe_unused = rq->ring; | 
|---|
| 603 |  | 
|---|
| 604 | /* Can we unwind this request without appearing to go forwards? */ | 
|---|
| 605 | GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0); | 
|---|
| 606 | } | 
|---|
| 607 |  | 
|---|
| 608 | /* | 
|---|
| 609 | * Reserve space for 2 NOOPs at the end of each request to be | 
|---|
| 610 | * used as a workaround for not being allowed to do lite | 
|---|
| 611 | * restore with HEAD==TAIL (WaIdleLiteRestore). | 
|---|
| 612 | */ | 
|---|
| 613 | static u32 *gen8_emit_wa_tail(struct i915_request *rq, u32 *cs) | 
|---|
| 614 | { | 
|---|
| 615 | /* Ensure there's always at least one preemption point per-request. */ | 
|---|
| 616 | *cs++ = MI_ARB_CHECK; | 
|---|
| 617 | *cs++ = MI_NOOP; | 
|---|
| 618 | rq->wa_tail = intel_ring_offset(rq, addr: cs); | 
|---|
| 619 |  | 
|---|
| 620 | /* Check that entire request is less than half the ring */ | 
|---|
| 621 | assert_request_valid(rq); | 
|---|
| 622 |  | 
|---|
| 623 | return cs; | 
|---|
| 624 | } | 
|---|
| 625 |  | 
|---|
| 626 | static u32 *emit_preempt_busywait(struct i915_request *rq, u32 *cs) | 
|---|
| 627 | { | 
|---|
| 628 | *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */ | 
|---|
| 629 | *cs++ = MI_SEMAPHORE_WAIT | | 
|---|
| 630 | MI_SEMAPHORE_GLOBAL_GTT | | 
|---|
| 631 | MI_SEMAPHORE_POLL | | 
|---|
| 632 | MI_SEMAPHORE_SAD_EQ_SDD; | 
|---|
| 633 | *cs++ = 0; | 
|---|
| 634 | *cs++ = preempt_address(engine: rq->engine); | 
|---|
| 635 | *cs++ = 0; | 
|---|
| 636 | *cs++ = MI_NOOP; | 
|---|
| 637 |  | 
|---|
| 638 | return cs; | 
|---|
| 639 | } | 
|---|
| 640 |  | 
|---|
| 641 | static __always_inline u32* | 
|---|
| 642 | gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) | 
|---|
| 643 | { | 
|---|
| 644 | *cs++ = MI_USER_INTERRUPT; | 
|---|
| 645 |  | 
|---|
| 646 | *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; | 
|---|
| 647 | if (intel_engine_has_semaphores(engine: rq->engine) && | 
|---|
| 648 | !intel_uc_uses_guc_submission(uc: &rq->engine->gt->uc)) | 
|---|
| 649 | cs = emit_preempt_busywait(rq, cs); | 
|---|
| 650 |  | 
|---|
| 651 | rq->tail = intel_ring_offset(rq, addr: cs); | 
|---|
| 652 | assert_ring_tail_valid(ring: rq->ring, tail: rq->tail); | 
|---|
| 653 |  | 
|---|
| 654 | return gen8_emit_wa_tail(rq, cs); | 
|---|
| 655 | } | 
|---|
| 656 |  | 
|---|
| 657 | static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs) | 
|---|
| 658 | { | 
|---|
| 659 | return gen8_emit_ggtt_write(cs, value: rq->fence.seqno, gtt_offset: hwsp_offset(rq), flags: 0); | 
|---|
| 660 | } | 
|---|
| 661 |  | 
|---|
| 662 | u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) | 
|---|
| 663 | { | 
|---|
| 664 | return gen8_emit_fini_breadcrumb_tail(rq, cs: emit_xcs_breadcrumb(rq, cs)); | 
|---|
| 665 | } | 
|---|
| 666 |  | 
|---|
| 667 | u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) | 
|---|
| 668 | { | 
|---|
| 669 | cs = gen8_emit_pipe_control(batch: cs, | 
|---|
| 670 | PIPE_CONTROL_CS_STALL | | 
|---|
| 671 | PIPE_CONTROL_TLB_INVALIDATE | | 
|---|
| 672 | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | | 
|---|
| 673 | PIPE_CONTROL_DEPTH_CACHE_FLUSH | | 
|---|
| 674 | PIPE_CONTROL_DC_FLUSH_ENABLE, | 
|---|
| 675 | offset: 0); | 
|---|
| 676 |  | 
|---|
| 677 | /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ | 
|---|
| 678 | cs = gen8_emit_ggtt_write_rcs(cs, | 
|---|
| 679 | value: rq->fence.seqno, | 
|---|
| 680 | gtt_offset: hwsp_offset(rq), | 
|---|
| 681 | PIPE_CONTROL_FLUSH_ENABLE | | 
|---|
| 682 | PIPE_CONTROL_CS_STALL); | 
|---|
| 683 |  | 
|---|
| 684 | return gen8_emit_fini_breadcrumb_tail(rq, cs); | 
|---|
| 685 | } | 
|---|
| 686 |  | 
|---|
| 687 | u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) | 
|---|
| 688 | { | 
|---|
| 689 | cs = gen8_emit_pipe_control(batch: cs, | 
|---|
| 690 | PIPE_CONTROL_CS_STALL | | 
|---|
| 691 | PIPE_CONTROL_TLB_INVALIDATE | | 
|---|
| 692 | PIPE_CONTROL_TILE_CACHE_FLUSH | | 
|---|
| 693 | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | | 
|---|
| 694 | PIPE_CONTROL_DEPTH_CACHE_FLUSH | | 
|---|
| 695 | PIPE_CONTROL_DC_FLUSH_ENABLE, | 
|---|
| 696 | offset: 0); | 
|---|
| 697 |  | 
|---|
| 698 | /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */ | 
|---|
| 699 | cs = gen8_emit_ggtt_write_rcs(cs, | 
|---|
| 700 | value: rq->fence.seqno, | 
|---|
| 701 | gtt_offset: hwsp_offset(rq), | 
|---|
| 702 | PIPE_CONTROL_FLUSH_ENABLE | | 
|---|
| 703 | PIPE_CONTROL_CS_STALL); | 
|---|
| 704 |  | 
|---|
| 705 | return gen8_emit_fini_breadcrumb_tail(rq, cs); | 
|---|
| 706 | } | 
|---|
| 707 |  | 
|---|
| 708 | /* | 
|---|
| 709 | * Note that the CS instruction pre-parser will not stall on the breadcrumb | 
|---|
| 710 | * flush and will continue pre-fetching the instructions after it before the | 
|---|
| 711 | * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at | 
|---|
| 712 | * BB_START/END instructions, so, even though we might pre-fetch the pre-amble | 
|---|
| 713 | * of the next request before the memory has been flushed, we're guaranteed that | 
|---|
| 714 | * we won't access the batch itself too early. | 
|---|
| 715 | * However, on gen12+ the parser can pre-fetch across the BB_START/END commands, | 
|---|
| 716 | * so, if the current request is modifying an instruction in the next request on | 
|---|
| 717 | * the same intel_context, we might pre-fetch and then execute the pre-update | 
|---|
| 718 | * instruction. To avoid this, the users of self-modifying code should either | 
|---|
| 719 | * disable the parser around the code emitting the memory writes, via a new flag | 
|---|
| 720 | * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For | 
|---|
| 721 | * the in-kernel use-cases we've opted to use a separate context, see | 
|---|
| 722 | * reloc_gpu() as an example. | 
|---|
| 723 | * All the above applies only to the instructions themselves. Non-inline data | 
|---|
| 724 | * used by the instructions is not pre-fetched. | 
|---|
| 725 | */ | 
|---|
| 726 |  | 
|---|
| 727 | static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs) | 
|---|
| 728 | { | 
|---|
| 729 | *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */ | 
|---|
| 730 | *cs++ = MI_SEMAPHORE_WAIT_TOKEN | | 
|---|
| 731 | MI_SEMAPHORE_GLOBAL_GTT | | 
|---|
| 732 | MI_SEMAPHORE_POLL | | 
|---|
| 733 | MI_SEMAPHORE_SAD_EQ_SDD; | 
|---|
| 734 | *cs++ = 0; | 
|---|
| 735 | *cs++ = preempt_address(engine: rq->engine); | 
|---|
| 736 | *cs++ = 0; | 
|---|
| 737 | *cs++ = 0; | 
|---|
| 738 |  | 
|---|
| 739 | return cs; | 
|---|
| 740 | } | 
|---|
| 741 |  | 
|---|
| 742 | /* Wa_14014475959:dg2 */ | 
|---|
| 743 | /* Wa_16019325821 */ | 
|---|
| 744 | /* Wa_14019159160 */ | 
|---|
| 745 | #define HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET	0x540 | 
|---|
| 746 | static u32 hold_switchout_semaphore_offset(struct i915_request *rq) | 
|---|
| 747 | { | 
|---|
| 748 | return i915_ggtt_offset(vma: rq->context->state) + | 
|---|
| 749 | (LRC_PPHWSP_PN * PAGE_SIZE) + HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET; | 
|---|
| 750 | } | 
|---|
| 751 |  | 
|---|
| 752 | /* Wa_14014475959:dg2 */ | 
|---|
| 753 | /* Wa_16019325821 */ | 
|---|
| 754 | /* Wa_14019159160 */ | 
|---|
| 755 | static u32 *hold_switchout_emit_wa_busywait(struct i915_request *rq, u32 *cs) | 
|---|
| 756 | { | 
|---|
| 757 | int i; | 
|---|
| 758 |  | 
|---|
| 759 | *cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL | | 
|---|
| 760 | MI_ATOMIC_MOVE; | 
|---|
| 761 | *cs++ = hold_switchout_semaphore_offset(rq); | 
|---|
| 762 | *cs++ = 0; | 
|---|
| 763 | *cs++ = 1; | 
|---|
| 764 |  | 
|---|
| 765 | /* | 
|---|
| 766 | * When MI_ATOMIC_INLINE_DATA set this command must be 11 DW + (1 NOP) | 
|---|
| 767 | * to align. 4 DWs above + 8 filler DWs here. | 
|---|
| 768 | */ | 
|---|
| 769 | for (i = 0; i < 8; ++i) | 
|---|
| 770 | *cs++ = 0; | 
|---|
| 771 |  | 
|---|
| 772 | *cs++ = MI_SEMAPHORE_WAIT | | 
|---|
| 773 | MI_SEMAPHORE_GLOBAL_GTT | | 
|---|
| 774 | MI_SEMAPHORE_POLL | | 
|---|
| 775 | MI_SEMAPHORE_SAD_EQ_SDD; | 
|---|
| 776 | *cs++ = 0; | 
|---|
| 777 | *cs++ = hold_switchout_semaphore_offset(rq); | 
|---|
| 778 | *cs++ = 0; | 
|---|
| 779 |  | 
|---|
| 780 | return cs; | 
|---|
| 781 | } | 
|---|
| 782 |  | 
|---|
| 783 | static __always_inline u32* | 
|---|
| 784 | gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) | 
|---|
| 785 | { | 
|---|
| 786 | *cs++ = MI_USER_INTERRUPT; | 
|---|
| 787 |  | 
|---|
| 788 | *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; | 
|---|
| 789 | if (intel_engine_has_semaphores(engine: rq->engine) && | 
|---|
| 790 | !intel_uc_uses_guc_submission(uc: &rq->engine->gt->uc)) | 
|---|
| 791 | cs = gen12_emit_preempt_busywait(rq, cs); | 
|---|
| 792 |  | 
|---|
| 793 | /* Wa_14014475959:dg2 */ | 
|---|
| 794 | /* Wa_16019325821 */ | 
|---|
| 795 | /* Wa_14019159160 */ | 
|---|
| 796 | if (intel_engine_uses_wa_hold_switchout(engine: rq->engine)) | 
|---|
| 797 | cs = hold_switchout_emit_wa_busywait(rq, cs); | 
|---|
| 798 |  | 
|---|
| 799 | rq->tail = intel_ring_offset(rq, addr: cs); | 
|---|
| 800 | assert_ring_tail_valid(ring: rq->ring, tail: rq->tail); | 
|---|
| 801 |  | 
|---|
| 802 | return gen8_emit_wa_tail(rq, cs); | 
|---|
| 803 | } | 
|---|
| 804 |  | 
|---|
| 805 | u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) | 
|---|
| 806 | { | 
|---|
| 807 | /* XXX Stalling flush before seqno write; post-sync not */ | 
|---|
| 808 | cs = emit_xcs_breadcrumb(rq, cs: __gen8_emit_flush_dw(cs, value: 0, gtt_offset: 0, flags: 0)); | 
|---|
| 809 | return gen12_emit_fini_breadcrumb_tail(rq, cs); | 
|---|
| 810 | } | 
|---|
| 811 |  | 
|---|
| 812 | u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) | 
|---|
| 813 | { | 
|---|
| 814 | struct drm_i915_private *i915 = rq->i915; | 
|---|
| 815 | struct intel_gt *gt = rq->engine->gt; | 
|---|
| 816 | u32 flags = (PIPE_CONTROL_CS_STALL | | 
|---|
| 817 | PIPE_CONTROL_TLB_INVALIDATE | | 
|---|
| 818 | PIPE_CONTROL_TILE_CACHE_FLUSH | | 
|---|
| 819 | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | | 
|---|
| 820 | PIPE_CONTROL_DEPTH_CACHE_FLUSH | | 
|---|
| 821 | PIPE_CONTROL_DC_FLUSH_ENABLE | | 
|---|
| 822 | PIPE_CONTROL_FLUSH_ENABLE); | 
|---|
| 823 |  | 
|---|
| 824 | if (GRAPHICS_VER_FULL(rq->i915) < IP_VER(12, 70)) | 
|---|
| 825 | flags |= PIPE_CONTROL_FLUSH_L3; | 
|---|
| 826 |  | 
|---|
| 827 | /* Wa_14016712196 */ | 
|---|
| 828 | if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915)) | 
|---|
| 829 | /* dummy PIPE_CONTROL + depth flush */ | 
|---|
| 830 | cs = gen12_emit_pipe_control(batch: cs, bit_group_0: 0, | 
|---|
| 831 | PIPE_CONTROL_DEPTH_CACHE_FLUSH, offset: 0); | 
|---|
| 832 |  | 
|---|
| 833 | if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 55)) | 
|---|
| 834 | /* Wa_1409600907 */ | 
|---|
| 835 | flags |= PIPE_CONTROL_DEPTH_STALL; | 
|---|
| 836 |  | 
|---|
| 837 | if (!HAS_3D_PIPELINE(rq->i915)) | 
|---|
| 838 | flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; | 
|---|
| 839 | else if (rq->engine->class == COMPUTE_CLASS) | 
|---|
| 840 | flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; | 
|---|
| 841 |  | 
|---|
| 842 | cs = gen12_emit_pipe_control(batch: cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, bit_group_1: flags, offset: 0); | 
|---|
| 843 |  | 
|---|
| 844 | /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */ | 
|---|
| 845 | cs = gen12_emit_ggtt_write_rcs(cs, | 
|---|
| 846 | value: rq->fence.seqno, | 
|---|
| 847 | gtt_offset: hwsp_offset(rq), | 
|---|
| 848 | flags0: 0, | 
|---|
| 849 | PIPE_CONTROL_FLUSH_ENABLE | | 
|---|
| 850 | PIPE_CONTROL_CS_STALL); | 
|---|
| 851 |  | 
|---|
| 852 | return gen12_emit_fini_breadcrumb_tail(rq, cs); | 
|---|
| 853 | } | 
|---|
| 854 |  | 
|---|