intel_engine_pm.c source code [Linux/drivers/gpu/drm/i915/gt/intel_engine_pm.c]

1	// SPDX-License-Identifier: MIT
2	/*
3	* Copyright © 2019 Intel Corporation
4	*/
5
6	#include "i915_drv.h"
7
8	#include "intel_breadcrumbs.h"
9	#include "intel_context.h"
10	#include "intel_engine.h"
11	#include "intel_engine_heartbeat.h"
12	#include "intel_engine_pm.h"
13	#include "intel_gt.h"
14	#include "intel_gt_pm.h"
15	#include "intel_rc6.h"
16	#include "intel_ring.h"
17	#include "shmem_utils.h"
18	#include "intel_gt_regs.h"
19
20	static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine)
21	{
22	struct drm_i915_private *i915 = engine->i915;
23
24	if (MEDIA_VER(i915) >= `13` && engine->id == GSC0) {
25	intel_uncore_write(uncore: engine->gt->uncore,
26	RC_PSMI_CTRL_GSCCS,
27	_MASKED_BIT_DISABLE(IDLE_MSG_DISABLE));
28	/ hysteresis 0xA=5us as recommended in spec/
29	intel_uncore_write(uncore: engine->gt->uncore,
30	PWRCTX_MAXCNT_GSCCS,
31	val: `0xA`);
32	}
33	}
34
35	static void dbg_poison_ce(struct intel_context *ce)
36	{
37	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
38	return;
39
40	if (ce->state) {
41	struct drm_i915_gem_object *obj = ce->state->obj;
42	int type = intel_gt_coherent_map_type(gt: ce->engine->gt, obj, always_coherent: true);
43	void *map;
44
45	if (!i915_gem_object_trylock(obj, NULL))
46	return;
47
48	map = i915_gem_object_pin_map(obj, type);
49	if (!IS_ERR(ptr: map)) {
50	memset(s: map, CONTEXT_REDZONE, n: obj->base.size);
51	i915_gem_object_flush_map(obj);
52	i915_gem_object_unpin_map(obj);
53	}
54	i915_gem_object_unlock(obj);
55	}
56	}
57
58	static int __engine_unpark(struct intel_wakeref *wf)
59	{
60	struct intel_engine_cs *engine =
61	container_of(wf, typeof(*engine), wakeref);
62	struct intel_context *ce;
63
64	ENGINE_TRACE(engine, "\n");
65
66	engine->wakeref_track = intel_gt_pm_get(gt: engine->gt);
67
68	/ Discard stale context state from across idling /
69	ce = engine->kernel_context;
70	if (ce) {
71	GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags));
72
73	/ Flush all pending HW writes before we touch the context /
74	while (unlikely(intel_context_inflight(ce)))
75	intel_engine_flush_submission(engine);
76
77	/ First poison the image to verify we never fully trust it /
78	dbg_poison_ce(ce);
79
80	/ Scrub the context image after our loss of control /
81	ce->ops->reset(ce);
82
83	CE_TRACE(ce, "reset { seqno:%x, *hwsp:%x, ring:%x }\n",
84	ce->timeline->seqno,
85	READ_ONCE(*ce->timeline->hwsp_seqno),
86	ce->ring->emit);
87	GEM_BUG_ON(ce->timeline->seqno !=
88	READ_ONCE(*ce->timeline->hwsp_seqno));
89	}
90
91	if (engine->unpark)
92	engine->unpark(engine);
93
94	intel_breadcrumbs_unpark(b: engine->breadcrumbs);
95	intel_engine_unpark_heartbeat(engine);
96	return `0`;
97	}
98
99	static void duration(struct dma_fence fence, struct* dma_fence_cb *cb)
100	{
101	struct i915_request *rq = to_request(fence);
102
103	ewma__engine_latency_add(e: &rq->engine->latency,
104	val: ktime_us_delta(later: rq->fence.timestamp,
105	earlier: rq->duration.emitted));
106	}
107
108	static void
109	__queue_and_release_pm(struct i915_request *rq,
110	struct intel_timeline *tl,
111	struct intel_engine_cs *engine)
112	{
113	struct intel_gt_timelines *timelines = &engine->gt->timelines;
114
115	ENGINE_TRACE(engine, "parking\n");
116
117	/*
118	* Open coded one half of intel_context_enter, which we have to omit
119	* here (see the large comment below) and because the other part must
120	* not be called due constructing directly with __i915_request_create
121	* which increments active count via intel_context_mark_active.
122	*/
123	GEM_BUG_ON(rq->context->active_count != `1`);
124	__intel_gt_pm_get(gt: engine->gt);
125	rq->context->wakeref = intel_wakeref_track(wf: &engine->gt->wakeref);
126
127	/*
128	* We have to serialise all potential retirement paths with our
129	* submission, as we don't want to underflow either the
130	* engine->wakeref.counter or our timeline->active_count.
131	*
132	* Equally, we cannot allow a new submission to start until
133	* after we finish queueing, nor could we allow that submitter
134	* to retire us before we are ready!
135	*/
136	spin_lock(lock: &timelines->lock);
137
138	/ Let intel_gt_retire_requests() retire us (acquired under lock) /
139	if (!atomic_fetch_inc(v: &tl->active_count))
140	list_add_tail(new: &tl->link, head: &timelines->active_list);
141
142	/ Hand the request over to HW and so engine_retire() /
143	__i915_request_queue_bh(rq);
144
145	/ Let new submissions commence (and maybe retire this timeline) /
146	__intel_wakeref_defer_park(wf: &engine->wakeref);
147
148	spin_unlock(lock: &timelines->lock);
149	}
150
151	static bool switch_to_kernel_context(struct intel_engine_cs *engine)
152	{
153	struct intel_context *ce = engine->kernel_context;
154	struct i915_request *rq;
155	bool result = true;
156
157	/*
158	* This is execlist specific behaviour intended to ensure the GPU is
159	* idle by switching to a known 'safe' context. With GuC submission, the
160	* same idle guarantee is achieved by other means (disabling
161	* scheduling). Further, switching to a 'safe' context has no effect
162	* with GuC submission as the scheduler can just switch back again.
163	*
164	* FIXME: Move this backend scheduler specific behaviour into the
165	* scheduler backend.
166	*/
167	if (intel_engine_uses_guc(engine))
168	return true;
169
170	/ GPU is pointing to the void, as good as in the kernel context. /
171	if (intel_gt_is_wedged(gt: engine->gt))
172	return true;
173
174	GEM_BUG_ON(!intel_context_is_barrier(ce));
175	GEM_BUG_ON(ce->timeline->hwsp_ggtt != engine->status_page.vma);
176
177	/ Already inside the kernel context, safe to power down. /
178	if (engine->wakeref_serial == engine->serial)
179	return true;
180
181	/*
182	* Note, we do this without taking the timeline->mutex. We cannot
183	* as we may be called while retiring the kernel context and so
184	* already underneath the timeline->mutex. Instead we rely on the
185	* exclusive property of the __engine_park that prevents anyone
186	* else from creating a request on this engine. This also requires
187	* that the ring is empty and we avoid any waits while constructing
188	* the context, as they assume protection by the timeline->mutex.
189	* This should hold true as we can only park the engine after
190	* retiring the last request, thus all rings should be empty and
191	* all timelines idle.
192	*
193	* For unlocking, there are 2 other parties and the GPU who have a
194	* stake here.
195	*
196	* A new gpu user will be waiting on the engine-pm to start their
197	* engine_unpark. New waiters are predicated on engine->wakeref.count
198	* and so intel_wakeref_defer_park() acts like a mutex_unlock of the
199	* engine->wakeref.
200	*
201	* The other party is intel_gt_retire_requests(), which is walking the
202	* list of active timelines looking for completions. Meanwhile as soon
203	* as we call __i915_request_queue(), the GPU may complete our request.
204	* Ergo, if we put ourselves on the timelines.active_list
205	* (se intel_timeline_enter()) before we increment the
206	* engine->wakeref.count, we may see the request completion and retire
207	* it causing an underflow of the engine->wakeref.
208	*/
209	set_bit(CONTEXT_IS_PARKING, addr: &ce->flags);
210	GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < `0`);
211
212	rq = __i915_request_create(ce, GFP_NOWAIT);
213	if (IS_ERR(ptr: rq))
214	/ Context switch failed, hope for the best! Maybe reset? /
215	goto out_unlock;
216
217	/ Check again on the next retirement. /
218	engine->wakeref_serial = engine->serial + `1`;
219	i915_request_add_active_barriers(rq);
220
221	/ Install ourselves as a preemption barrier /
222	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
223	if (likely(!__i915_request_commit(rq))) { / engine should be idle! /
224	/*
225	* Use an interrupt for precise measurement of duration,
226	* otherwise we rely on someone else retiring all the requests
227	* which may delay the signaling (i.e. we will likely wait
228	* until the background request retirement running every
229	* second or two).
230	*/
231	BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq));
232	dma_fence_add_callback(fence: &rq->fence, cb: &rq->duration.cb, func: duration);
233	rq->duration.emitted = ktime_get();
234	}
235
236	/ Expose ourselves to the world /
237	__queue_and_release_pm(rq, tl: ce->timeline, engine);
238
239	result = false;
240	out_unlock:
241	clear_bit(CONTEXT_IS_PARKING, addr: &ce->flags);
242	return result;
243	}
244
245	static void call_idle_barriers(struct intel_engine_cs *engine)
246	{
247	struct llist_node node, next;
248
249	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
250	struct dma_fence_cb *cb =
251	container_of((struct list_head *)node,
252	typeof(*cb), node);
253
254	cb->func(ERR_PTR(error: -EAGAIN), cb);
255	}
256	}
257
258	static int __engine_park(struct intel_wakeref *wf)
259	{
260	struct intel_engine_cs *engine =
261	container_of(wf, typeof(*engine), wakeref);
262
263	engine->saturated = `0`;
264
265	/*
266	* If one and only one request is completed between pm events,
267	* we know that we are inside the kernel context and it is
268	* safe to power down. (We are paranoid in case that runtime
269	* suspend causes corruption to the active context image, and
270	* want to avoid that impacting userspace.)
271	*/
272	if (!switch_to_kernel_context(engine))
273	return -EBUSY;
274
275	ENGINE_TRACE(engine, "parked\n");
276
277	call_idle_barriers(engine); / cleanup after wedging /
278
279	intel_engine_park_heartbeat(engine);
280	intel_breadcrumbs_park(b: engine->breadcrumbs);
281
282	if (engine->park)
283	engine->park(engine);
284
285	/ While gt calls i915_vma_parked(), we have to break the lock cycle /
286	intel_gt_pm_put_async(gt: engine->gt, handle: engine->wakeref_track);
287	return `0`;
288	}
289
290	static const struct intel_wakeref_ops wf_ops = {
291	.get = __engine_unpark,
292	.put = __engine_park,
293	};
294
295	void intel_engine_init__pm(struct intel_engine_cs *engine)
296	{
297	intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops, engine->name);
298	intel_engine_init_heartbeat(engine);
299
300	intel_gsc_idle_msg_enable(engine);
301	}
302
303	/**
304	* intel_engine_reset_pinned_contexts - Reset the pinned contexts of
305	* an engine.
306	* @engine: The engine whose pinned contexts we want to reset.
307	*
308	* Typically the pinned context LMEM images lose or get their content
309	* corrupted on suspend. This function resets their images.
310	*/
311	void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine)
312	{
313	struct intel_context *ce;
314
315	list_for_each_entry(ce, &engine->pinned_contexts_list,
316	pinned_contexts_link) {
317	/ kernel context gets reset at __engine_unpark() /
318	if (ce == engine->kernel_context)
319	continue;
320
321	dbg_poison_ce(ce);
322	ce->ops->reset(ce);
323	}
324	}
325
326	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
327	#include "selftest_engine_pm.c"
328	#endif
329

Browse the source code of Linux/drivers/gpu/drm/i915/gt/intel_engine_pm.c