| 1 | // SPDX-License-Identifier: MIT | 
|---|
| 2 | /* | 
|---|
| 3 | * Copyright © 2014 Intel Corporation | 
|---|
| 4 | */ | 
|---|
| 5 |  | 
|---|
| 6 | #include "gem/i915_gem_internal.h" | 
|---|
| 7 |  | 
|---|
| 8 | #include "i915_drv.h" | 
|---|
| 9 | #include "intel_renderstate.h" | 
|---|
| 10 | #include "intel_context.h" | 
|---|
| 11 | #include "intel_gpu_commands.h" | 
|---|
| 12 | #include "intel_ring.h" | 
|---|
| 13 |  | 
|---|
| 14 | static const struct intel_renderstate_rodata * | 
|---|
| 15 | render_state_get_rodata(const struct intel_engine_cs *engine) | 
|---|
| 16 | { | 
|---|
| 17 | if (engine->class != RENDER_CLASS) | 
|---|
| 18 | return NULL; | 
|---|
| 19 |  | 
|---|
| 20 | switch (GRAPHICS_VER(engine->i915)) { | 
|---|
| 21 | case 6: | 
|---|
| 22 | return &gen6_null_state; | 
|---|
| 23 | case 7: | 
|---|
| 24 | return &gen7_null_state; | 
|---|
| 25 | case 8: | 
|---|
| 26 | return &gen8_null_state; | 
|---|
| 27 | case 9: | 
|---|
| 28 | return &gen9_null_state; | 
|---|
| 29 | } | 
|---|
| 30 |  | 
|---|
| 31 | return NULL; | 
|---|
| 32 | } | 
|---|
| 33 |  | 
|---|
| 34 | /* | 
|---|
| 35 | * Macro to add commands to auxiliary batch. | 
|---|
| 36 | * This macro only checks for page overflow before inserting the commands, | 
|---|
| 37 | * this is sufficient as the null state generator makes the final batch | 
|---|
| 38 | * with two passes to build command and state separately. At this point | 
|---|
| 39 | * the size of both are known and it compacts them by relocating the state | 
|---|
| 40 | * right after the commands taking care of alignment so we should sufficient | 
|---|
| 41 | * space below them for adding new commands. | 
|---|
| 42 | */ | 
|---|
| 43 | #define OUT_BATCH(batch, i, val)				\ | 
|---|
| 44 | do {							\ | 
|---|
| 45 | if ((i) >= PAGE_SIZE / sizeof(u32))		\ | 
|---|
| 46 | goto out;				\ | 
|---|
| 47 | (batch)[(i)++] = (val);				\ | 
|---|
| 48 | } while (0) | 
|---|
| 49 |  | 
|---|
| 50 | static int render_state_setup(struct intel_renderstate *so, | 
|---|
| 51 | struct drm_i915_private *i915) | 
|---|
| 52 | { | 
|---|
| 53 | const struct intel_renderstate_rodata *rodata = so->rodata; | 
|---|
| 54 | unsigned int i = 0, reloc_index = 0; | 
|---|
| 55 | int ret = -EINVAL; | 
|---|
| 56 | u32 *d; | 
|---|
| 57 |  | 
|---|
| 58 | d = i915_gem_object_pin_map(obj: so->vma->obj, type: I915_MAP_WB); | 
|---|
| 59 | if (IS_ERR(ptr: d)) | 
|---|
| 60 | return PTR_ERR(ptr: d); | 
|---|
| 61 |  | 
|---|
| 62 | while (i < rodata->batch_items) { | 
|---|
| 63 | u32 s = rodata->batch[i]; | 
|---|
| 64 |  | 
|---|
| 65 | if (i * 4  == rodata->reloc[reloc_index]) { | 
|---|
| 66 | u64 r = s + i915_vma_offset(vma: so->vma); | 
|---|
| 67 |  | 
|---|
| 68 | s = lower_32_bits(r); | 
|---|
| 69 | if (HAS_64BIT_RELOC(i915)) { | 
|---|
| 70 | if (i + 1 >= rodata->batch_items || | 
|---|
| 71 | rodata->batch[i + 1] != 0) | 
|---|
| 72 | goto out; | 
|---|
| 73 |  | 
|---|
| 74 | d[i++] = s; | 
|---|
| 75 | s = upper_32_bits(r); | 
|---|
| 76 | } | 
|---|
| 77 |  | 
|---|
| 78 | reloc_index++; | 
|---|
| 79 | } | 
|---|
| 80 |  | 
|---|
| 81 | d[i++] = s; | 
|---|
| 82 | } | 
|---|
| 83 |  | 
|---|
| 84 | if (rodata->reloc[reloc_index] != -1) { | 
|---|
| 85 | drm_err(&i915->drm, "only %d relocs resolved\n", reloc_index); | 
|---|
| 86 | goto out; | 
|---|
| 87 | } | 
|---|
| 88 |  | 
|---|
| 89 | so->batch_offset = i915_ggtt_offset(vma: so->vma); | 
|---|
| 90 | so->batch_size = rodata->batch_items * sizeof(u32); | 
|---|
| 91 |  | 
|---|
| 92 | while (i % CACHELINE_DWORDS) | 
|---|
| 93 | OUT_BATCH(d, i, MI_NOOP); | 
|---|
| 94 |  | 
|---|
| 95 | so->aux_offset = i * sizeof(u32); | 
|---|
| 96 |  | 
|---|
| 97 | if (HAS_POOLED_EU(i915)) { | 
|---|
| 98 | /* | 
|---|
| 99 | * We always program 3x6 pool config but depending upon which | 
|---|
| 100 | * subslice is disabled HW drops down to appropriate config | 
|---|
| 101 | * shown below. | 
|---|
| 102 | * | 
|---|
| 103 | * In the below table 2x6 config always refers to | 
|---|
| 104 | * fused-down version, native 2x6 is not available and can | 
|---|
| 105 | * be ignored | 
|---|
| 106 | * | 
|---|
| 107 | * SNo  subslices config                eu pool configuration | 
|---|
| 108 | * ----------------------------------------------------------- | 
|---|
| 109 | * 1    3 subslices enabled (3x6)  -    0x00777000  (9+9) | 
|---|
| 110 | * 2    ss0 disabled (2x6)         -    0x00777000  (3+9) | 
|---|
| 111 | * 3    ss1 disabled (2x6)         -    0x00770000  (6+6) | 
|---|
| 112 | * 4    ss2 disabled (2x6)         -    0x00007000  (9+3) | 
|---|
| 113 | */ | 
|---|
| 114 | u32 eu_pool_config = 0x00777000; | 
|---|
| 115 |  | 
|---|
| 116 | OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE); | 
|---|
| 117 | OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE); | 
|---|
| 118 | OUT_BATCH(d, i, eu_pool_config); | 
|---|
| 119 | OUT_BATCH(d, i, 0); | 
|---|
| 120 | OUT_BATCH(d, i, 0); | 
|---|
| 121 | OUT_BATCH(d, i, 0); | 
|---|
| 122 | } | 
|---|
| 123 |  | 
|---|
| 124 | OUT_BATCH(d, i, MI_BATCH_BUFFER_END); | 
|---|
| 125 | so->aux_size = i * sizeof(u32) - so->aux_offset; | 
|---|
| 126 | so->aux_offset += so->batch_offset; | 
|---|
| 127 | /* | 
|---|
| 128 | * Since we are sending length, we need to strictly conform to | 
|---|
| 129 | * all requirements. For Gen2 this must be a multiple of 8. | 
|---|
| 130 | */ | 
|---|
| 131 | so->aux_size = ALIGN(so->aux_size, 8); | 
|---|
| 132 |  | 
|---|
| 133 | ret = 0; | 
|---|
| 134 | out: | 
|---|
| 135 | __i915_gem_object_flush_map(obj: so->vma->obj, offset: 0, size: i * sizeof(u32)); | 
|---|
| 136 | __i915_gem_object_release_map(obj: so->vma->obj); | 
|---|
| 137 | return ret; | 
|---|
| 138 | } | 
|---|
| 139 |  | 
|---|
| 140 | #undef OUT_BATCH | 
|---|
| 141 |  | 
|---|
| 142 | int intel_renderstate_init(struct intel_renderstate *so, | 
|---|
| 143 | struct intel_context *ce) | 
|---|
| 144 | { | 
|---|
| 145 | struct intel_engine_cs *engine = ce->engine; | 
|---|
| 146 | struct drm_i915_gem_object *obj = NULL; | 
|---|
| 147 | int err; | 
|---|
| 148 |  | 
|---|
| 149 | memset(s: so, c: 0, n: sizeof(*so)); | 
|---|
| 150 |  | 
|---|
| 151 | so->rodata = render_state_get_rodata(engine); | 
|---|
| 152 | if (so->rodata) { | 
|---|
| 153 | if (so->rodata->batch_items * 4 > PAGE_SIZE) | 
|---|
| 154 | return -EINVAL; | 
|---|
| 155 |  | 
|---|
| 156 | obj = i915_gem_object_create_internal(i915: engine->i915, PAGE_SIZE); | 
|---|
| 157 | if (IS_ERR(ptr: obj)) | 
|---|
| 158 | return PTR_ERR(ptr: obj); | 
|---|
| 159 |  | 
|---|
| 160 | so->vma = i915_vma_instance(obj, vm: &engine->gt->ggtt->vm, NULL); | 
|---|
| 161 | if (IS_ERR(ptr: so->vma)) { | 
|---|
| 162 | err = PTR_ERR(ptr: so->vma); | 
|---|
| 163 | goto err_obj; | 
|---|
| 164 | } | 
|---|
| 165 | } | 
|---|
| 166 |  | 
|---|
| 167 | i915_gem_ww_ctx_init(ctx: &so->ww, intr: true); | 
|---|
| 168 | retry: | 
|---|
| 169 | err = intel_context_pin_ww(ce, ww: &so->ww); | 
|---|
| 170 | if (err) | 
|---|
| 171 | goto err_fini; | 
|---|
| 172 |  | 
|---|
| 173 | /* return early if there's nothing to setup */ | 
|---|
| 174 | if (!err && !so->rodata) | 
|---|
| 175 | return 0; | 
|---|
| 176 |  | 
|---|
| 177 | err = i915_gem_object_lock(obj: so->vma->obj, ww: &so->ww); | 
|---|
| 178 | if (err) | 
|---|
| 179 | goto err_context; | 
|---|
| 180 |  | 
|---|
| 181 | err = i915_vma_pin_ww(vma: so->vma, ww: &so->ww, size: 0, alignment: 0, PIN_GLOBAL | PIN_HIGH); | 
|---|
| 182 | if (err) | 
|---|
| 183 | goto err_context; | 
|---|
| 184 |  | 
|---|
| 185 | err = render_state_setup(so, i915: engine->i915); | 
|---|
| 186 | if (err) | 
|---|
| 187 | goto err_unpin; | 
|---|
| 188 |  | 
|---|
| 189 | return 0; | 
|---|
| 190 |  | 
|---|
| 191 | err_unpin: | 
|---|
| 192 | i915_vma_unpin(vma: so->vma); | 
|---|
| 193 | err_context: | 
|---|
| 194 | intel_context_unpin(ce); | 
|---|
| 195 | err_fini: | 
|---|
| 196 | if (err == -EDEADLK) { | 
|---|
| 197 | err = i915_gem_ww_ctx_backoff(ctx: &so->ww); | 
|---|
| 198 | if (!err) | 
|---|
| 199 | goto retry; | 
|---|
| 200 | } | 
|---|
| 201 | i915_gem_ww_ctx_fini(ctx: &so->ww); | 
|---|
| 202 | err_obj: | 
|---|
| 203 | if (obj) | 
|---|
| 204 | i915_gem_object_put(obj); | 
|---|
| 205 | so->vma = NULL; | 
|---|
| 206 | return err; | 
|---|
| 207 | } | 
|---|
| 208 |  | 
|---|
| 209 | int intel_renderstate_emit(struct intel_renderstate *so, | 
|---|
| 210 | struct i915_request *rq) | 
|---|
| 211 | { | 
|---|
| 212 | struct intel_engine_cs *engine = rq->engine; | 
|---|
| 213 | int err; | 
|---|
| 214 |  | 
|---|
| 215 | if (!so->vma) | 
|---|
| 216 | return 0; | 
|---|
| 217 |  | 
|---|
| 218 | err = i915_vma_move_to_active(vma: so->vma, rq, flags: 0); | 
|---|
| 219 | if (err) | 
|---|
| 220 | return err; | 
|---|
| 221 |  | 
|---|
| 222 | err = engine->emit_bb_start(rq, | 
|---|
| 223 | so->batch_offset, so->batch_size, | 
|---|
| 224 | I915_DISPATCH_SECURE); | 
|---|
| 225 | if (err) | 
|---|
| 226 | return err; | 
|---|
| 227 |  | 
|---|
| 228 | if (so->aux_size > 8) { | 
|---|
| 229 | err = engine->emit_bb_start(rq, | 
|---|
| 230 | so->aux_offset, so->aux_size, | 
|---|
| 231 | I915_DISPATCH_SECURE); | 
|---|
| 232 | if (err) | 
|---|
| 233 | return err; | 
|---|
| 234 | } | 
|---|
| 235 |  | 
|---|
| 236 | return 0; | 
|---|
| 237 | } | 
|---|
| 238 |  | 
|---|
| 239 | void intel_renderstate_fini(struct intel_renderstate *so, | 
|---|
| 240 | struct intel_context *ce) | 
|---|
| 241 | { | 
|---|
| 242 | if (so->vma) { | 
|---|
| 243 | i915_vma_unpin(vma: so->vma); | 
|---|
| 244 | i915_vma_close(vma: so->vma); | 
|---|
| 245 | } | 
|---|
| 246 |  | 
|---|
| 247 | intel_context_unpin(ce); | 
|---|
| 248 | i915_gem_ww_ctx_fini(ctx: &so->ww); | 
|---|
| 249 |  | 
|---|
| 250 | if (so->vma) | 
|---|
| 251 | i915_gem_object_put(obj: so->vma->obj); | 
|---|
| 252 | } | 
|---|
| 253 |  | 
|---|