| 1 | // SPDX-License-Identifier: MIT | 
|---|
| 2 | /* | 
|---|
| 3 | * Copyright © 2020 Intel Corporation | 
|---|
| 4 | */ | 
|---|
| 5 |  | 
|---|
| 6 | #include "gen2_engine_cs.h" | 
|---|
| 7 | #include "i915_drv.h" | 
|---|
| 8 | #include "i915_reg.h" | 
|---|
| 9 | #include "intel_engine.h" | 
|---|
| 10 | #include "intel_engine_regs.h" | 
|---|
| 11 | #include "intel_gpu_commands.h" | 
|---|
| 12 | #include "intel_gt.h" | 
|---|
| 13 | #include "intel_gt_irq.h" | 
|---|
| 14 | #include "intel_ring.h" | 
|---|
| 15 |  | 
|---|
| 16 | int gen2_emit_flush(struct i915_request *rq, u32 mode) | 
|---|
| 17 | { | 
|---|
| 18 | unsigned int num_store_dw = 12; | 
|---|
| 19 | u32 cmd, *cs; | 
|---|
| 20 |  | 
|---|
| 21 | cmd = MI_FLUSH; | 
|---|
| 22 | if (mode & EMIT_INVALIDATE) | 
|---|
| 23 | cmd |= MI_READ_FLUSH; | 
|---|
| 24 |  | 
|---|
| 25 | cs = intel_ring_begin(rq, num_dwords: 2 + 4 * num_store_dw); | 
|---|
| 26 | if (IS_ERR(ptr: cs)) | 
|---|
| 27 | return PTR_ERR(ptr: cs); | 
|---|
| 28 |  | 
|---|
| 29 | *cs++ = cmd; | 
|---|
| 30 | while (num_store_dw--) { | 
|---|
| 31 | *cs++ = MI_STORE_DWORD_INDEX; | 
|---|
| 32 | *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32); | 
|---|
| 33 | *cs++ = 0; | 
|---|
| 34 | *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH; | 
|---|
| 35 | } | 
|---|
| 36 | *cs++ = cmd; | 
|---|
| 37 |  | 
|---|
| 38 | intel_ring_advance(rq, cs); | 
|---|
| 39 |  | 
|---|
| 40 | return 0; | 
|---|
| 41 | } | 
|---|
| 42 |  | 
|---|
| 43 | int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode) | 
|---|
| 44 | { | 
|---|
| 45 | u32 cmd, *cs; | 
|---|
| 46 | int i; | 
|---|
| 47 |  | 
|---|
| 48 | /* | 
|---|
| 49 | * read/write caches: | 
|---|
| 50 | * | 
|---|
| 51 | * I915_GEM_DOMAIN_RENDER is always invalidated, but is | 
|---|
| 52 | * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is | 
|---|
| 53 | * also flushed at 2d versus 3d pipeline switches. | 
|---|
| 54 | * | 
|---|
| 55 | * read-only caches: | 
|---|
| 56 | * | 
|---|
| 57 | * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if | 
|---|
| 58 | * MI_READ_FLUSH is set, and is always flushed on 965. | 
|---|
| 59 | * | 
|---|
| 60 | * I915_GEM_DOMAIN_COMMAND may not exist? | 
|---|
| 61 | * | 
|---|
| 62 | * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is | 
|---|
| 63 | * invalidated when MI_EXE_FLUSH is set. | 
|---|
| 64 | * | 
|---|
| 65 | * I915_GEM_DOMAIN_VERTEX, which exists on 965, is | 
|---|
| 66 | * invalidated with every MI_FLUSH. | 
|---|
| 67 | * | 
|---|
| 68 | * TLBs: | 
|---|
| 69 | * | 
|---|
| 70 | * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND | 
|---|
| 71 | * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and | 
|---|
| 72 | * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER | 
|---|
| 73 | * are flushed at any MI_FLUSH. | 
|---|
| 74 | */ | 
|---|
| 75 |  | 
|---|
| 76 | cmd = MI_FLUSH; | 
|---|
| 77 | if (mode & EMIT_INVALIDATE) { | 
|---|
| 78 | cmd |= MI_EXE_FLUSH; | 
|---|
| 79 | if (IS_G4X(rq->i915) || GRAPHICS_VER(rq->i915) == 5) | 
|---|
| 80 | cmd |= MI_INVALIDATE_ISP; | 
|---|
| 81 | } | 
|---|
| 82 |  | 
|---|
| 83 | i = 2; | 
|---|
| 84 | if (mode & EMIT_INVALIDATE) | 
|---|
| 85 | i += 20; | 
|---|
| 86 |  | 
|---|
| 87 | cs = intel_ring_begin(rq, num_dwords: i); | 
|---|
| 88 | if (IS_ERR(ptr: cs)) | 
|---|
| 89 | return PTR_ERR(ptr: cs); | 
|---|
| 90 |  | 
|---|
| 91 | *cs++ = cmd; | 
|---|
| 92 |  | 
|---|
| 93 | /* | 
|---|
| 94 | * A random delay to let the CS invalidate take effect? Without this | 
|---|
| 95 | * delay, the GPU relocation path fails as the CS does not see | 
|---|
| 96 | * the updated contents. Just as important, if we apply the flushes | 
|---|
| 97 | * to the EMIT_FLUSH branch (i.e. immediately after the relocation | 
|---|
| 98 | * write and before the invalidate on the next batch), the relocations | 
|---|
| 99 | * still fail. This implies that is a delay following invalidation | 
|---|
| 100 | * that is required to reset the caches as opposed to a delay to | 
|---|
| 101 | * ensure the memory is written. | 
|---|
| 102 | */ | 
|---|
| 103 | if (mode & EMIT_INVALIDATE) { | 
|---|
| 104 | *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; | 
|---|
| 105 | *cs++ = intel_gt_scratch_offset(gt: rq->engine->gt, | 
|---|
| 106 | field: INTEL_GT_SCRATCH_FIELD_DEFAULT) | | 
|---|
| 107 | PIPE_CONTROL_GLOBAL_GTT; | 
|---|
| 108 | *cs++ = 0; | 
|---|
| 109 | *cs++ = 0; | 
|---|
| 110 |  | 
|---|
| 111 | for (i = 0; i < 12; i++) | 
|---|
| 112 | *cs++ = MI_FLUSH; | 
|---|
| 113 |  | 
|---|
| 114 | *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; | 
|---|
| 115 | *cs++ = intel_gt_scratch_offset(gt: rq->engine->gt, | 
|---|
| 116 | field: INTEL_GT_SCRATCH_FIELD_DEFAULT) | | 
|---|
| 117 | PIPE_CONTROL_GLOBAL_GTT; | 
|---|
| 118 | *cs++ = 0; | 
|---|
| 119 | *cs++ = 0; | 
|---|
| 120 | } | 
|---|
| 121 |  | 
|---|
| 122 | *cs++ = cmd; | 
|---|
| 123 |  | 
|---|
| 124 | intel_ring_advance(rq, cs); | 
|---|
| 125 |  | 
|---|
| 126 | return 0; | 
|---|
| 127 | } | 
|---|
| 128 |  | 
|---|
| 129 | int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode) | 
|---|
| 130 | { | 
|---|
| 131 | u32 *cs; | 
|---|
| 132 |  | 
|---|
| 133 | cs = intel_ring_begin(rq, num_dwords: 2); | 
|---|
| 134 | if (IS_ERR(ptr: cs)) | 
|---|
| 135 | return PTR_ERR(ptr: cs); | 
|---|
| 136 |  | 
|---|
| 137 | *cs++ = MI_FLUSH; | 
|---|
| 138 | *cs++ = MI_NOOP; | 
|---|
| 139 | intel_ring_advance(rq, cs); | 
|---|
| 140 |  | 
|---|
| 141 | return 0; | 
|---|
| 142 | } | 
|---|
| 143 |  | 
|---|
| 144 | static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs, | 
|---|
| 145 | int flush, int post) | 
|---|
| 146 | { | 
|---|
| 147 | GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); | 
|---|
| 148 | GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR); | 
|---|
| 149 |  | 
|---|
| 150 | *cs++ = MI_FLUSH; | 
|---|
| 151 |  | 
|---|
| 152 | while (flush--) { | 
|---|
| 153 | *cs++ = MI_STORE_DWORD_INDEX; | 
|---|
| 154 | *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32); | 
|---|
| 155 | *cs++ = rq->fence.seqno; | 
|---|
| 156 | } | 
|---|
| 157 |  | 
|---|
| 158 | while (post--) { | 
|---|
| 159 | *cs++ = MI_STORE_DWORD_INDEX; | 
|---|
| 160 | *cs++ = I915_GEM_HWS_SEQNO_ADDR; | 
|---|
| 161 | *cs++ = rq->fence.seqno; | 
|---|
| 162 | } | 
|---|
| 163 |  | 
|---|
| 164 | *cs++ = MI_USER_INTERRUPT; | 
|---|
| 165 |  | 
|---|
| 166 | rq->tail = intel_ring_offset(rq, addr: cs); | 
|---|
| 167 | assert_ring_tail_valid(ring: rq->ring, tail: rq->tail); | 
|---|
| 168 |  | 
|---|
| 169 | return cs; | 
|---|
| 170 | } | 
|---|
| 171 |  | 
|---|
| 172 | u32 *gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs) | 
|---|
| 173 | { | 
|---|
| 174 | return __gen2_emit_breadcrumb(rq, cs, flush: 16, post: 8); | 
|---|
| 175 | } | 
|---|
| 176 |  | 
|---|
| 177 | u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) | 
|---|
| 178 | { | 
|---|
| 179 | return __gen2_emit_breadcrumb(rq, cs, flush: 8, post: 8); | 
|---|
| 180 | } | 
|---|
| 181 |  | 
|---|
| 182 | /* Just userspace ABI convention to limit the wa batch bo to a reasonable size */ | 
|---|
| 183 | #define I830_BATCH_LIMIT SZ_256K | 
|---|
| 184 | #define I830_TLB_ENTRIES (2) | 
|---|
| 185 | #define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT) | 
|---|
| 186 | int i830_emit_bb_start(struct i915_request *rq, | 
|---|
| 187 | u64 offset, u32 len, | 
|---|
| 188 | unsigned int dispatch_flags) | 
|---|
| 189 | { | 
|---|
| 190 | u32 *cs, cs_offset = | 
|---|
| 191 | intel_gt_scratch_offset(gt: rq->engine->gt, | 
|---|
| 192 | field: INTEL_GT_SCRATCH_FIELD_DEFAULT); | 
|---|
| 193 |  | 
|---|
| 194 | GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE); | 
|---|
| 195 |  | 
|---|
| 196 | cs = intel_ring_begin(rq, num_dwords: 6); | 
|---|
| 197 | if (IS_ERR(ptr: cs)) | 
|---|
| 198 | return PTR_ERR(ptr: cs); | 
|---|
| 199 |  | 
|---|
| 200 | /* Evict the invalid PTE TLBs */ | 
|---|
| 201 | *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; | 
|---|
| 202 | *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; | 
|---|
| 203 | *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ | 
|---|
| 204 | *cs++ = cs_offset; | 
|---|
| 205 | *cs++ = 0xdeadbeef; | 
|---|
| 206 | *cs++ = MI_NOOP; | 
|---|
| 207 | intel_ring_advance(rq, cs); | 
|---|
| 208 |  | 
|---|
| 209 | if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { | 
|---|
| 210 | if (len > I830_BATCH_LIMIT) | 
|---|
| 211 | return -ENOSPC; | 
|---|
| 212 |  | 
|---|
| 213 | cs = intel_ring_begin(rq, num_dwords: 6 + 2); | 
|---|
| 214 | if (IS_ERR(ptr: cs)) | 
|---|
| 215 | return PTR_ERR(ptr: cs); | 
|---|
| 216 |  | 
|---|
| 217 | /* | 
|---|
| 218 | * Blit the batch (which has now all relocs applied) to the | 
|---|
| 219 | * stable batch scratch bo area (so that the CS never | 
|---|
| 220 | * stumbles over its tlb invalidation bug) ... | 
|---|
| 221 | */ | 
|---|
| 222 | *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); | 
|---|
| 223 | *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; | 
|---|
| 224 | *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; | 
|---|
| 225 | *cs++ = cs_offset; | 
|---|
| 226 | *cs++ = 4096; | 
|---|
| 227 | *cs++ = offset; | 
|---|
| 228 |  | 
|---|
| 229 | *cs++ = MI_FLUSH; | 
|---|
| 230 | *cs++ = MI_NOOP; | 
|---|
| 231 | intel_ring_advance(rq, cs); | 
|---|
| 232 |  | 
|---|
| 233 | /* ... and execute it. */ | 
|---|
| 234 | offset = cs_offset; | 
|---|
| 235 | } | 
|---|
| 236 |  | 
|---|
| 237 | if (!(dispatch_flags & I915_DISPATCH_SECURE)) | 
|---|
| 238 | offset |= MI_BATCH_NON_SECURE; | 
|---|
| 239 |  | 
|---|
| 240 | cs = intel_ring_begin(rq, num_dwords: 2); | 
|---|
| 241 | if (IS_ERR(ptr: cs)) | 
|---|
| 242 | return PTR_ERR(ptr: cs); | 
|---|
| 243 |  | 
|---|
| 244 | *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; | 
|---|
| 245 | *cs++ = offset; | 
|---|
| 246 | intel_ring_advance(rq, cs); | 
|---|
| 247 |  | 
|---|
| 248 | return 0; | 
|---|
| 249 | } | 
|---|
| 250 |  | 
|---|
| 251 | int gen2_emit_bb_start(struct i915_request *rq, | 
|---|
| 252 | u64 offset, u32 len, | 
|---|
| 253 | unsigned int dispatch_flags) | 
|---|
| 254 | { | 
|---|
| 255 | u32 *cs; | 
|---|
| 256 |  | 
|---|
| 257 | if (!(dispatch_flags & I915_DISPATCH_SECURE)) | 
|---|
| 258 | offset |= MI_BATCH_NON_SECURE; | 
|---|
| 259 |  | 
|---|
| 260 | cs = intel_ring_begin(rq, num_dwords: 2); | 
|---|
| 261 | if (IS_ERR(ptr: cs)) | 
|---|
| 262 | return PTR_ERR(ptr: cs); | 
|---|
| 263 |  | 
|---|
| 264 | *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; | 
|---|
| 265 | *cs++ = offset; | 
|---|
| 266 | intel_ring_advance(rq, cs); | 
|---|
| 267 |  | 
|---|
| 268 | return 0; | 
|---|
| 269 | } | 
|---|
| 270 |  | 
|---|
| 271 | int gen4_emit_bb_start(struct i915_request *rq, | 
|---|
| 272 | u64 offset, u32 length, | 
|---|
| 273 | unsigned int dispatch_flags) | 
|---|
| 274 | { | 
|---|
| 275 | u32 security; | 
|---|
| 276 | u32 *cs; | 
|---|
| 277 |  | 
|---|
| 278 | security = MI_BATCH_NON_SECURE_I965; | 
|---|
| 279 | if (dispatch_flags & I915_DISPATCH_SECURE) | 
|---|
| 280 | security = 0; | 
|---|
| 281 |  | 
|---|
| 282 | cs = intel_ring_begin(rq, num_dwords: 2); | 
|---|
| 283 | if (IS_ERR(ptr: cs)) | 
|---|
| 284 | return PTR_ERR(ptr: cs); | 
|---|
| 285 |  | 
|---|
| 286 | *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security; | 
|---|
| 287 | *cs++ = offset; | 
|---|
| 288 | intel_ring_advance(rq, cs); | 
|---|
| 289 |  | 
|---|
| 290 | return 0; | 
|---|
| 291 | } | 
|---|
| 292 |  | 
|---|
| 293 | void gen2_irq_enable(struct intel_engine_cs *engine) | 
|---|
| 294 | { | 
|---|
| 295 | engine->i915->irq_mask &= ~engine->irq_enable_mask; | 
|---|
| 296 | intel_uncore_write(uncore: engine->uncore, GEN2_IMR, val: engine->i915->irq_mask); | 
|---|
| 297 | intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR); | 
|---|
| 298 | } | 
|---|
| 299 |  | 
|---|
| 300 | void gen2_irq_disable(struct intel_engine_cs *engine) | 
|---|
| 301 | { | 
|---|
| 302 | engine->i915->irq_mask |= engine->irq_enable_mask; | 
|---|
| 303 | intel_uncore_write(uncore: engine->uncore, GEN2_IMR, val: engine->i915->irq_mask); | 
|---|
| 304 | } | 
|---|
| 305 |  | 
|---|
| 306 | void gen5_irq_enable(struct intel_engine_cs *engine) | 
|---|
| 307 | { | 
|---|
| 308 | gen5_gt_enable_irq(gt: engine->gt, mask: engine->irq_enable_mask); | 
|---|
| 309 | } | 
|---|
| 310 |  | 
|---|
| 311 | void gen5_irq_disable(struct intel_engine_cs *engine) | 
|---|
| 312 | { | 
|---|
| 313 | gen5_gt_disable_irq(gt: engine->gt, mask: engine->irq_enable_mask); | 
|---|
| 314 | } | 
|---|
| 315 |  | 
|---|