1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2014 Intel Corporation
4 */
5
6 #include <linux/circ_buf.h>
7
8 #include "gem/i915_gem_context.h"
9 #include "gt/gen8_engine_cs.h"
10 #include "gt/intel_breadcrumbs.h"
11 #include "gt/intel_context.h"
12 #include "gt/intel_engine_pm.h"
13 #include "gt/intel_engine_heartbeat.h"
14 #include "gt/intel_gpu_commands.h"
15 #include "gt/intel_gt.h"
16 #include "gt/intel_gt_irq.h"
17 #include "gt/intel_gt_pm.h"
18 #include "gt/intel_gt_requests.h"
19 #include "gt/intel_lrc.h"
20 #include "gt/intel_lrc_reg.h"
21 #include "gt/intel_mocs.h"
22 #include "gt/intel_ring.h"
23
24 #include "intel_guc_submission.h"
25
26 #include "i915_drv.h"
27 #include "i915_trace.h"
28
29 /**
30 * DOC: GuC-based command submission
31 *
32 * The Scratch registers:
33 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
34 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
35 * triggers an interrupt on the GuC via another register write (0xC4C8).
36 * Firmware writes a success/fail code back to the action register after
37 * processes the request. The kernel driver polls waiting for this update and
38 * then proceeds.
39 *
40 * Command Transport buffers (CTBs):
41 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host
42 * - G2H) are a message interface between the i915 and GuC.
43 *
44 * Context registration:
45 * Before a context can be submitted it must be registered with the GuC via a
46 * H2G. A unique guc_id is associated with each context. The context is either
47 * registered at request creation time (normal operation) or at submission time
48 * (abnormal operation, e.g. after a reset).
49 *
50 * Context submission:
51 * The i915 updates the LRC tail value in memory. The i915 must enable the
52 * scheduling of the context within the GuC for the GuC to actually consider it.
53 * Therefore, the first time a disabled context is submitted we use a schedule
54 * enable H2G, while follow up submissions are done via the context submit H2G,
55 * which informs the GuC that a previously enabled context has new work
56 * available.
57 *
58 * Context unpin:
59 * To unpin a context a H2G is used to disable scheduling. When the
60 * corresponding G2H returns indicating the scheduling disable operation has
61 * completed it is safe to unpin the context. While a disable is in flight it
62 * isn't safe to resubmit the context so a fence is used to stall all future
63 * requests of that context until the G2H is returned.
64 *
65 * Context deregistration:
66 * Before a context can be destroyed or if we steal its guc_id we must
67 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't
68 * safe to submit anything to this guc_id until the deregister completes so a
69 * fence is used to stall all requests associated with this guc_id until the
70 * corresponding G2H returns indicating the guc_id has been deregistered.
71 *
72 * submission_state.guc_ids:
73 * Unique number associated with private GuC context data passed in during
74 * context registration / submission / deregistration. 64k available. Simple ida
75 * is used for allocation.
76 *
77 * Stealing guc_ids:
78 * If no guc_ids are available they can be stolen from another context at
79 * request creation time if that context is unpinned. If a guc_id can't be found
80 * we punt this problem to the user as we believe this is near impossible to hit
81 * during normal use cases.
82 *
83 * Locking:
84 * In the GuC submission code we have 3 basic spin locks which protect
85 * everything. Details about each below.
86 *
87 * sched_engine->lock
88 * This is the submission lock for all contexts that share an i915 schedule
89 * engine (sched_engine), thus only one of the contexts which share a
90 * sched_engine can be submitting at a time. Currently only one sched_engine is
91 * used for all of GuC submission but that could change in the future.
92 *
93 * guc->submission_state.lock
94 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts
95 * list.
96 *
97 * ce->guc_state.lock
98 * Protects everything under ce->guc_state. Ensures that a context is in the
99 * correct state before issuing a H2G. e.g. We don't issue a schedule disable
100 * on a disabled context (bad idea), we don't issue a schedule enable when a
101 * schedule disable is in flight, etc... Also protects list of inflight requests
102 * on the context and the priority management state. Lock is individual to each
103 * context.
104 *
105 * Lock ordering rules:
106 * sched_engine->lock -> ce->guc_state.lock
107 * guc->submission_state.lock -> ce->guc_state.lock
108 *
109 * Reset races:
110 * When a full GT reset is triggered it is assumed that some G2H responses to
111 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be
112 * fatal as we do certain operations upon receiving a G2H (e.g. destroy
113 * contexts, release guc_ids, etc...). When this occurs we can scrub the
114 * context state and cleanup appropriately, however this is quite racey.
115 * To avoid races, the reset code must disable submission before scrubbing for
116 * the missing G2H, while the submission code must check for submission being
117 * disabled and skip sending H2Gs and updating context states when it is. Both
118 * sides must also make sure to hold the relevant locks.
119 */
120
121 /* GuC Virtual Engine */
122 struct guc_virtual_engine {
123 struct intel_engine_cs base;
124 struct intel_context context;
125 };
126
127 static struct intel_context *
128 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
129 unsigned long flags);
130
131 static struct intel_context *
132 guc_create_parallel(struct intel_engine_cs **engines,
133 unsigned int num_siblings,
134 unsigned int width);
135
136 #define GUC_REQUEST_SIZE 64 /* bytes */
137
138 /*
139 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
140 * per the GuC submission interface. A different allocation algorithm is used
141 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
142 * partition the guc_id space. We believe the number of multi-lrc contexts in
143 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
144 * multi-lrc.
145 */
146 #define NUMBER_MULTI_LRC_GUC_ID (GUC_MAX_LRC_DESCRIPTORS / 16)
147
148 /*
149 * Below is a set of functions which control the GuC scheduling state which
150 * require a lock.
151 */
152 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0)
153 #define SCHED_STATE_DESTROYED BIT(1)
154 #define SCHED_STATE_PENDING_DISABLE BIT(2)
155 #define SCHED_STATE_BANNED BIT(3)
156 #define SCHED_STATE_ENABLED BIT(4)
157 #define SCHED_STATE_PENDING_ENABLE BIT(5)
158 #define SCHED_STATE_REGISTERED BIT(6)
159 #define SCHED_STATE_BLOCKED_SHIFT 7
160 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT)
161 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT)
162
init_sched_state(struct intel_context * ce)163 static inline void init_sched_state(struct intel_context *ce)
164 {
165 lockdep_assert_held(&ce->guc_state.lock);
166 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
167 }
168
169 __maybe_unused
sched_state_is_init(struct intel_context * ce)170 static bool sched_state_is_init(struct intel_context *ce)
171 {
172 /*
173 * XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after
174 * suspend.
175 */
176 return !(ce->guc_state.sched_state &=
177 ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
178 }
179
180 static inline bool
context_wait_for_deregister_to_register(struct intel_context * ce)181 context_wait_for_deregister_to_register(struct intel_context *ce)
182 {
183 return ce->guc_state.sched_state &
184 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
185 }
186
187 static inline void
set_context_wait_for_deregister_to_register(struct intel_context * ce)188 set_context_wait_for_deregister_to_register(struct intel_context *ce)
189 {
190 lockdep_assert_held(&ce->guc_state.lock);
191 ce->guc_state.sched_state |=
192 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
193 }
194
195 static inline void
clr_context_wait_for_deregister_to_register(struct intel_context * ce)196 clr_context_wait_for_deregister_to_register(struct intel_context *ce)
197 {
198 lockdep_assert_held(&ce->guc_state.lock);
199 ce->guc_state.sched_state &=
200 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
201 }
202
203 static inline bool
context_destroyed(struct intel_context * ce)204 context_destroyed(struct intel_context *ce)
205 {
206 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
207 }
208
209 static inline void
set_context_destroyed(struct intel_context * ce)210 set_context_destroyed(struct intel_context *ce)
211 {
212 lockdep_assert_held(&ce->guc_state.lock);
213 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
214 }
215
context_pending_disable(struct intel_context * ce)216 static inline bool context_pending_disable(struct intel_context *ce)
217 {
218 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
219 }
220
set_context_pending_disable(struct intel_context * ce)221 static inline void set_context_pending_disable(struct intel_context *ce)
222 {
223 lockdep_assert_held(&ce->guc_state.lock);
224 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE;
225 }
226
clr_context_pending_disable(struct intel_context * ce)227 static inline void clr_context_pending_disable(struct intel_context *ce)
228 {
229 lockdep_assert_held(&ce->guc_state.lock);
230 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE;
231 }
232
context_banned(struct intel_context * ce)233 static inline bool context_banned(struct intel_context *ce)
234 {
235 return ce->guc_state.sched_state & SCHED_STATE_BANNED;
236 }
237
set_context_banned(struct intel_context * ce)238 static inline void set_context_banned(struct intel_context *ce)
239 {
240 lockdep_assert_held(&ce->guc_state.lock);
241 ce->guc_state.sched_state |= SCHED_STATE_BANNED;
242 }
243
clr_context_banned(struct intel_context * ce)244 static inline void clr_context_banned(struct intel_context *ce)
245 {
246 lockdep_assert_held(&ce->guc_state.lock);
247 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED;
248 }
249
context_enabled(struct intel_context * ce)250 static inline bool context_enabled(struct intel_context *ce)
251 {
252 return ce->guc_state.sched_state & SCHED_STATE_ENABLED;
253 }
254
set_context_enabled(struct intel_context * ce)255 static inline void set_context_enabled(struct intel_context *ce)
256 {
257 lockdep_assert_held(&ce->guc_state.lock);
258 ce->guc_state.sched_state |= SCHED_STATE_ENABLED;
259 }
260
clr_context_enabled(struct intel_context * ce)261 static inline void clr_context_enabled(struct intel_context *ce)
262 {
263 lockdep_assert_held(&ce->guc_state.lock);
264 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED;
265 }
266
context_pending_enable(struct intel_context * ce)267 static inline bool context_pending_enable(struct intel_context *ce)
268 {
269 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE;
270 }
271
set_context_pending_enable(struct intel_context * ce)272 static inline void set_context_pending_enable(struct intel_context *ce)
273 {
274 lockdep_assert_held(&ce->guc_state.lock);
275 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE;
276 }
277
clr_context_pending_enable(struct intel_context * ce)278 static inline void clr_context_pending_enable(struct intel_context *ce)
279 {
280 lockdep_assert_held(&ce->guc_state.lock);
281 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE;
282 }
283
context_registered(struct intel_context * ce)284 static inline bool context_registered(struct intel_context *ce)
285 {
286 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED;
287 }
288
set_context_registered(struct intel_context * ce)289 static inline void set_context_registered(struct intel_context *ce)
290 {
291 lockdep_assert_held(&ce->guc_state.lock);
292 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED;
293 }
294
clr_context_registered(struct intel_context * ce)295 static inline void clr_context_registered(struct intel_context *ce)
296 {
297 lockdep_assert_held(&ce->guc_state.lock);
298 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED;
299 }
300
context_blocked(struct intel_context * ce)301 static inline u32 context_blocked(struct intel_context *ce)
302 {
303 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
304 SCHED_STATE_BLOCKED_SHIFT;
305 }
306
incr_context_blocked(struct intel_context * ce)307 static inline void incr_context_blocked(struct intel_context *ce)
308 {
309 lockdep_assert_held(&ce->guc_state.lock);
310
311 ce->guc_state.sched_state += SCHED_STATE_BLOCKED;
312
313 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */
314 }
315
decr_context_blocked(struct intel_context * ce)316 static inline void decr_context_blocked(struct intel_context *ce)
317 {
318 lockdep_assert_held(&ce->guc_state.lock);
319
320 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */
321
322 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
323 }
324
context_has_committed_requests(struct intel_context * ce)325 static inline bool context_has_committed_requests(struct intel_context *ce)
326 {
327 return !!ce->guc_state.number_committed_requests;
328 }
329
incr_context_committed_requests(struct intel_context * ce)330 static inline void incr_context_committed_requests(struct intel_context *ce)
331 {
332 lockdep_assert_held(&ce->guc_state.lock);
333 ++ce->guc_state.number_committed_requests;
334 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
335 }
336
decr_context_committed_requests(struct intel_context * ce)337 static inline void decr_context_committed_requests(struct intel_context *ce)
338 {
339 lockdep_assert_held(&ce->guc_state.lock);
340 --ce->guc_state.number_committed_requests;
341 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
342 }
343
344 static struct intel_context *
request_to_scheduling_context(struct i915_request * rq)345 request_to_scheduling_context(struct i915_request *rq)
346 {
347 return intel_context_to_parent(rq->context);
348 }
349
context_guc_id_invalid(struct intel_context * ce)350 static inline bool context_guc_id_invalid(struct intel_context *ce)
351 {
352 return ce->guc_id.id == GUC_INVALID_LRC_ID;
353 }
354
set_context_guc_id_invalid(struct intel_context * ce)355 static inline void set_context_guc_id_invalid(struct intel_context *ce)
356 {
357 ce->guc_id.id = GUC_INVALID_LRC_ID;
358 }
359
ce_to_guc(struct intel_context * ce)360 static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
361 {
362 return &ce->engine->gt->uc.guc;
363 }
364
to_priolist(struct rb_node * rb)365 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
366 {
367 return rb_entry(rb, struct i915_priolist, node);
368 }
369
370 /*
371 * When using multi-lrc submission a scratch memory area is reserved in the
372 * parent's context state for the process descriptor, work queue, and handshake
373 * between the parent + children contexts to insert safe preemption points
374 * between each of the BBs. Currently the scratch area is sized to a page.
375 *
376 * The layout of this scratch area is below:
377 * 0 guc_process_desc
378 * + sizeof(struct guc_process_desc) child go
379 * + CACHELINE_BYTES child join[0]
380 * ...
381 * + CACHELINE_BYTES child join[n - 1]
382 * ... unused
383 * PARENT_SCRATCH_SIZE / 2 work queue start
384 * ... work queue
385 * PARENT_SCRATCH_SIZE - 1 work queue end
386 */
387 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2)
388 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE)
389
390 struct sync_semaphore {
391 u32 semaphore;
392 u8 unused[CACHELINE_BYTES - sizeof(u32)];
393 };
394
395 struct parent_scratch {
396 struct guc_process_desc pdesc;
397
398 struct sync_semaphore go;
399 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
400
401 u8 unused[WQ_OFFSET - sizeof(struct guc_process_desc) -
402 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
403
404 u32 wq[WQ_SIZE / sizeof(u32)];
405 };
406
__get_parent_scratch_offset(struct intel_context * ce)407 static u32 __get_parent_scratch_offset(struct intel_context *ce)
408 {
409 GEM_BUG_ON(!ce->parallel.guc.parent_page);
410
411 return ce->parallel.guc.parent_page * PAGE_SIZE;
412 }
413
__get_wq_offset(struct intel_context * ce)414 static u32 __get_wq_offset(struct intel_context *ce)
415 {
416 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET);
417
418 return __get_parent_scratch_offset(ce) + WQ_OFFSET;
419 }
420
421 static struct parent_scratch *
__get_parent_scratch(struct intel_context * ce)422 __get_parent_scratch(struct intel_context *ce)
423 {
424 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE);
425 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES);
426
427 /*
428 * Need to subtract LRC_STATE_OFFSET here as the
429 * parallel.guc.parent_page is the offset into ce->state while
430 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET.
431 */
432 return (struct parent_scratch *)
433 (ce->lrc_reg_state +
434 ((__get_parent_scratch_offset(ce) -
435 LRC_STATE_OFFSET) / sizeof(u32)));
436 }
437
438 static struct guc_process_desc *
__get_process_desc(struct intel_context * ce)439 __get_process_desc(struct intel_context *ce)
440 {
441 struct parent_scratch *ps = __get_parent_scratch(ce);
442
443 return &ps->pdesc;
444 }
445
get_wq_pointer(struct guc_process_desc * desc,struct intel_context * ce,u32 wqi_size)446 static u32 *get_wq_pointer(struct guc_process_desc *desc,
447 struct intel_context *ce,
448 u32 wqi_size)
449 {
450 /*
451 * Check for space in work queue. Caching a value of head pointer in
452 * intel_context structure in order reduce the number accesses to shared
453 * GPU memory which may be across a PCIe bus.
454 */
455 #define AVAILABLE_SPACE \
456 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
457 if (wqi_size > AVAILABLE_SPACE) {
458 ce->parallel.guc.wqi_head = READ_ONCE(desc->head);
459
460 if (wqi_size > AVAILABLE_SPACE)
461 return NULL;
462 }
463 #undef AVAILABLE_SPACE
464
465 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)];
466 }
467
__get_lrc_desc(struct intel_guc * guc,u32 index)468 static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index)
469 {
470 struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr;
471
472 GEM_BUG_ON(index >= GUC_MAX_LRC_DESCRIPTORS);
473
474 return &base[index];
475 }
476
__get_context(struct intel_guc * guc,u32 id)477 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
478 {
479 struct intel_context *ce = xa_load(&guc->context_lookup, id);
480
481 GEM_BUG_ON(id >= GUC_MAX_LRC_DESCRIPTORS);
482
483 return ce;
484 }
485
guc_lrc_desc_pool_create(struct intel_guc * guc)486 static int guc_lrc_desc_pool_create(struct intel_guc *guc)
487 {
488 u32 size;
489 int ret;
490
491 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc) *
492 GUC_MAX_LRC_DESCRIPTORS);
493 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool,
494 (void **)&guc->lrc_desc_pool_vaddr);
495 if (ret)
496 return ret;
497
498 return 0;
499 }
500
guc_lrc_desc_pool_destroy(struct intel_guc * guc)501 static void guc_lrc_desc_pool_destroy(struct intel_guc *guc)
502 {
503 guc->lrc_desc_pool_vaddr = NULL;
504 i915_vma_unpin_and_release(&guc->lrc_desc_pool, I915_VMA_RELEASE_MAP);
505 }
506
guc_submission_initialized(struct intel_guc * guc)507 static inline bool guc_submission_initialized(struct intel_guc *guc)
508 {
509 return !!guc->lrc_desc_pool_vaddr;
510 }
511
reset_lrc_desc(struct intel_guc * guc,u32 id)512 static inline void reset_lrc_desc(struct intel_guc *guc, u32 id)
513 {
514 if (likely(guc_submission_initialized(guc))) {
515 struct guc_lrc_desc *desc = __get_lrc_desc(guc, id);
516 unsigned long flags;
517
518 memset(desc, 0, sizeof(*desc));
519
520 /*
521 * xarray API doesn't have xa_erase_irqsave wrapper, so calling
522 * the lower level functions directly.
523 */
524 xa_lock_irqsave(&guc->context_lookup, flags);
525 __xa_erase(&guc->context_lookup, id);
526 xa_unlock_irqrestore(&guc->context_lookup, flags);
527 }
528 }
529
lrc_desc_registered(struct intel_guc * guc,u32 id)530 static inline bool lrc_desc_registered(struct intel_guc *guc, u32 id)
531 {
532 return __get_context(guc, id);
533 }
534
set_lrc_desc_registered(struct intel_guc * guc,u32 id,struct intel_context * ce)535 static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
536 struct intel_context *ce)
537 {
538 unsigned long flags;
539
540 /*
541 * xarray API doesn't have xa_save_irqsave wrapper, so calling the
542 * lower level functions directly.
543 */
544 xa_lock_irqsave(&guc->context_lookup, flags);
545 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC);
546 xa_unlock_irqrestore(&guc->context_lookup, flags);
547 }
548
decr_outstanding_submission_g2h(struct intel_guc * guc)549 static void decr_outstanding_submission_g2h(struct intel_guc *guc)
550 {
551 if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
552 wake_up_all(&guc->ct.wq);
553 }
554
guc_submission_send_busy_loop(struct intel_guc * guc,const u32 * action,u32 len,u32 g2h_len_dw,bool loop)555 static int guc_submission_send_busy_loop(struct intel_guc *guc,
556 const u32 *action,
557 u32 len,
558 u32 g2h_len_dw,
559 bool loop)
560 {
561 /*
562 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
563 * so we don't handle the case where we don't get a reply because we
564 * aborted the send due to the channel being busy.
565 */
566 GEM_BUG_ON(g2h_len_dw && !loop);
567
568 if (g2h_len_dw)
569 atomic_inc(&guc->outstanding_submission_g2h);
570
571 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
572 }
573
intel_guc_wait_for_pending_msg(struct intel_guc * guc,atomic_t * wait_var,bool interruptible,long timeout)574 int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
575 atomic_t *wait_var,
576 bool interruptible,
577 long timeout)
578 {
579 const int state = interruptible ?
580 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
581 DEFINE_WAIT(wait);
582
583 might_sleep();
584 GEM_BUG_ON(timeout < 0);
585
586 if (!atomic_read(wait_var))
587 return 0;
588
589 if (!timeout)
590 return -ETIME;
591
592 for (;;) {
593 prepare_to_wait(&guc->ct.wq, &wait, state);
594
595 if (!atomic_read(wait_var))
596 break;
597
598 if (signal_pending_state(state, current)) {
599 timeout = -EINTR;
600 break;
601 }
602
603 if (!timeout) {
604 timeout = -ETIME;
605 break;
606 }
607
608 timeout = io_schedule_timeout(timeout);
609 }
610 finish_wait(&guc->ct.wq, &wait);
611
612 return (timeout < 0) ? timeout : 0;
613 }
614
intel_guc_wait_for_idle(struct intel_guc * guc,long timeout)615 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
616 {
617 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc))
618 return 0;
619
620 return intel_guc_wait_for_pending_msg(guc,
621 &guc->outstanding_submission_g2h,
622 true, timeout);
623 }
624
625 static int guc_lrc_desc_pin(struct intel_context *ce, bool loop);
626
__guc_add_request(struct intel_guc * guc,struct i915_request * rq)627 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
628 {
629 int err = 0;
630 struct intel_context *ce = request_to_scheduling_context(rq);
631 u32 action[3];
632 int len = 0;
633 u32 g2h_len_dw = 0;
634 bool enabled;
635
636 lockdep_assert_held(&rq->engine->sched_engine->lock);
637
638 /*
639 * Corner case where requests were sitting in the priority list or a
640 * request resubmitted after the context was banned.
641 */
642 if (unlikely(intel_context_is_banned(ce))) {
643 i915_request_put(i915_request_mark_eio(rq));
644 intel_engine_signal_breadcrumbs(ce->engine);
645 return 0;
646 }
647
648 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
649 GEM_BUG_ON(context_guc_id_invalid(ce));
650
651 spin_lock(&ce->guc_state.lock);
652
653 /*
654 * The request / context will be run on the hardware when scheduling
655 * gets enabled in the unblock. For multi-lrc we still submit the
656 * context to move the LRC tails.
657 */
658 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce)))
659 goto out;
660
661 enabled = context_enabled(ce) || context_blocked(ce);
662
663 if (!enabled) {
664 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
665 action[len++] = ce->guc_id.id;
666 action[len++] = GUC_CONTEXT_ENABLE;
667 set_context_pending_enable(ce);
668 intel_context_get(ce);
669 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
670 } else {
671 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
672 action[len++] = ce->guc_id.id;
673 }
674
675 err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
676 if (!enabled && !err) {
677 trace_intel_context_sched_enable(ce);
678 atomic_inc(&guc->outstanding_submission_g2h);
679 set_context_enabled(ce);
680
681 /*
682 * Without multi-lrc KMD does the submission step (moving the
683 * lrc tail) so enabling scheduling is sufficient to submit the
684 * context. This isn't the case in multi-lrc submission as the
685 * GuC needs to move the tails, hence the need for another H2G
686 * to submit a multi-lrc context after enabling scheduling.
687 */
688 if (intel_context_is_parent(ce)) {
689 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT;
690 err = intel_guc_send_nb(guc, action, len - 1, 0);
691 }
692 } else if (!enabled) {
693 clr_context_pending_enable(ce);
694 intel_context_put(ce);
695 }
696 if (likely(!err))
697 trace_i915_request_guc_submit(rq);
698
699 out:
700 spin_unlock(&ce->guc_state.lock);
701 return err;
702 }
703
guc_add_request(struct intel_guc * guc,struct i915_request * rq)704 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
705 {
706 int ret = __guc_add_request(guc, rq);
707
708 if (unlikely(ret == -EBUSY)) {
709 guc->stalled_request = rq;
710 guc->submission_stall_reason = STALL_ADD_REQUEST;
711 }
712
713 return ret;
714 }
715
guc_set_lrc_tail(struct i915_request * rq)716 static inline void guc_set_lrc_tail(struct i915_request *rq)
717 {
718 rq->context->lrc_reg_state[CTX_RING_TAIL] =
719 intel_ring_set_tail(rq->ring, rq->tail);
720 }
721
rq_prio(const struct i915_request * rq)722 static inline int rq_prio(const struct i915_request *rq)
723 {
724 return rq->sched.attr.priority;
725 }
726
is_multi_lrc_rq(struct i915_request * rq)727 static bool is_multi_lrc_rq(struct i915_request *rq)
728 {
729 return intel_context_is_parallel(rq->context);
730 }
731
can_merge_rq(struct i915_request * rq,struct i915_request * last)732 static bool can_merge_rq(struct i915_request *rq,
733 struct i915_request *last)
734 {
735 return request_to_scheduling_context(rq) ==
736 request_to_scheduling_context(last);
737 }
738
wq_space_until_wrap(struct intel_context * ce)739 static u32 wq_space_until_wrap(struct intel_context *ce)
740 {
741 return (WQ_SIZE - ce->parallel.guc.wqi_tail);
742 }
743
write_wqi(struct guc_process_desc * desc,struct intel_context * ce,u32 wqi_size)744 static void write_wqi(struct guc_process_desc *desc,
745 struct intel_context *ce,
746 u32 wqi_size)
747 {
748 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
749
750 /*
751 * Ensure WQI are visible before updating tail
752 */
753 intel_guc_write_barrier(ce_to_guc(ce));
754
755 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
756 (WQ_SIZE - 1);
757 WRITE_ONCE(desc->tail, ce->parallel.guc.wqi_tail);
758 }
759
guc_wq_noop_append(struct intel_context * ce)760 static int guc_wq_noop_append(struct intel_context *ce)
761 {
762 struct guc_process_desc *desc = __get_process_desc(ce);
763 u32 *wqi = get_wq_pointer(desc, ce, wq_space_until_wrap(ce));
764 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
765
766 if (!wqi)
767 return -EBUSY;
768
769 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
770
771 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
772 FIELD_PREP(WQ_LEN_MASK, len_dw);
773 ce->parallel.guc.wqi_tail = 0;
774
775 return 0;
776 }
777
__guc_wq_item_append(struct i915_request * rq)778 static int __guc_wq_item_append(struct i915_request *rq)
779 {
780 struct intel_context *ce = request_to_scheduling_context(rq);
781 struct intel_context *child;
782 struct guc_process_desc *desc = __get_process_desc(ce);
783 unsigned int wqi_size = (ce->parallel.number_children + 4) *
784 sizeof(u32);
785 u32 *wqi;
786 u32 len_dw = (wqi_size / sizeof(u32)) - 1;
787 int ret;
788
789 /* Ensure context is in correct state updating work queue */
790 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
791 GEM_BUG_ON(context_guc_id_invalid(ce));
792 GEM_BUG_ON(context_wait_for_deregister_to_register(ce));
793 GEM_BUG_ON(!lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id));
794
795 /* Insert NOOP if this work queue item will wrap the tail pointer. */
796 if (wqi_size > wq_space_until_wrap(ce)) {
797 ret = guc_wq_noop_append(ce);
798 if (ret)
799 return ret;
800 }
801
802 wqi = get_wq_pointer(desc, ce, wqi_size);
803 if (!wqi)
804 return -EBUSY;
805
806 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
807
808 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
809 FIELD_PREP(WQ_LEN_MASK, len_dw);
810 *wqi++ = ce->lrc.lrca;
811 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) |
812 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64));
813 *wqi++ = 0; /* fence_id */
814 for_each_child(ce, child)
815 *wqi++ = child->ring->tail / sizeof(u64);
816
817 write_wqi(desc, ce, wqi_size);
818
819 return 0;
820 }
821
guc_wq_item_append(struct intel_guc * guc,struct i915_request * rq)822 static int guc_wq_item_append(struct intel_guc *guc,
823 struct i915_request *rq)
824 {
825 struct intel_context *ce = request_to_scheduling_context(rq);
826 int ret = 0;
827
828 if (likely(!intel_context_is_banned(ce))) {
829 ret = __guc_wq_item_append(rq);
830
831 if (unlikely(ret == -EBUSY)) {
832 guc->stalled_request = rq;
833 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL;
834 }
835 }
836
837 return ret;
838 }
839
multi_lrc_submit(struct i915_request * rq)840 static bool multi_lrc_submit(struct i915_request *rq)
841 {
842 struct intel_context *ce = request_to_scheduling_context(rq);
843
844 intel_ring_set_tail(rq->ring, rq->tail);
845
846 /*
847 * We expect the front end (execbuf IOCTL) to set this flag on the last
848 * request generated from a multi-BB submission. This indicates to the
849 * backend (GuC interface) that we should submit this context thus
850 * submitting all the requests generated in parallel.
851 */
852 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) ||
853 intel_context_is_banned(ce);
854 }
855
guc_dequeue_one_context(struct intel_guc * guc)856 static int guc_dequeue_one_context(struct intel_guc *guc)
857 {
858 struct i915_sched_engine * const sched_engine = guc->sched_engine;
859 struct i915_request *last = NULL;
860 bool submit = false;
861 struct rb_node *rb;
862 int ret;
863
864 lockdep_assert_held(&sched_engine->lock);
865
866 if (guc->stalled_request) {
867 submit = true;
868 last = guc->stalled_request;
869
870 switch (guc->submission_stall_reason) {
871 case STALL_REGISTER_CONTEXT:
872 goto register_context;
873 case STALL_MOVE_LRC_TAIL:
874 goto move_lrc_tail;
875 case STALL_ADD_REQUEST:
876 goto add_request;
877 default:
878 MISSING_CASE(guc->submission_stall_reason);
879 }
880 }
881
882 while ((rb = rb_first_cached(&sched_engine->queue))) {
883 struct i915_priolist *p = to_priolist(rb);
884 struct i915_request *rq, *rn;
885
886 priolist_for_each_request_consume(rq, rn, p) {
887 if (last && !can_merge_rq(rq, last))
888 goto register_context;
889
890 list_del_init(&rq->sched.link);
891
892 __i915_request_submit(rq);
893
894 trace_i915_request_in(rq, 0);
895 last = rq;
896
897 if (is_multi_lrc_rq(rq)) {
898 /*
899 * We need to coalesce all multi-lrc requests in
900 * a relationship into a single H2G. We are
901 * guaranteed that all of these requests will be
902 * submitted sequentially.
903 */
904 if (multi_lrc_submit(rq)) {
905 submit = true;
906 goto register_context;
907 }
908 } else {
909 submit = true;
910 }
911 }
912
913 rb_erase_cached(&p->node, &sched_engine->queue);
914 i915_priolist_free(p);
915 }
916
917 register_context:
918 if (submit) {
919 struct intel_context *ce = request_to_scheduling_context(last);
920
921 if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id) &&
922 !intel_context_is_banned(ce))) {
923 ret = guc_lrc_desc_pin(ce, false);
924 if (unlikely(ret == -EPIPE)) {
925 goto deadlk;
926 } else if (ret == -EBUSY) {
927 guc->stalled_request = last;
928 guc->submission_stall_reason =
929 STALL_REGISTER_CONTEXT;
930 goto schedule_tasklet;
931 } else if (ret != 0) {
932 GEM_WARN_ON(ret); /* Unexpected */
933 goto deadlk;
934 }
935 }
936
937 move_lrc_tail:
938 if (is_multi_lrc_rq(last)) {
939 ret = guc_wq_item_append(guc, last);
940 if (ret == -EBUSY) {
941 goto schedule_tasklet;
942 } else if (ret != 0) {
943 GEM_WARN_ON(ret); /* Unexpected */
944 goto deadlk;
945 }
946 } else {
947 guc_set_lrc_tail(last);
948 }
949
950 add_request:
951 ret = guc_add_request(guc, last);
952 if (unlikely(ret == -EPIPE)) {
953 goto deadlk;
954 } else if (ret == -EBUSY) {
955 goto schedule_tasklet;
956 } else if (ret != 0) {
957 GEM_WARN_ON(ret); /* Unexpected */
958 goto deadlk;
959 }
960 }
961
962 guc->stalled_request = NULL;
963 guc->submission_stall_reason = STALL_NONE;
964 return submit;
965
966 deadlk:
967 sched_engine->tasklet.callback = NULL;
968 tasklet_disable_nosync(&sched_engine->tasklet);
969 return false;
970
971 schedule_tasklet:
972 tasklet_schedule(&sched_engine->tasklet);
973 return false;
974 }
975
guc_submission_tasklet(struct tasklet_struct * t)976 static void guc_submission_tasklet(struct tasklet_struct *t)
977 {
978 struct i915_sched_engine *sched_engine =
979 from_tasklet(sched_engine, t, tasklet);
980 unsigned long flags;
981 bool loop;
982
983 spin_lock_irqsave(&sched_engine->lock, flags);
984
985 do {
986 loop = guc_dequeue_one_context(sched_engine->private_data);
987 } while (loop);
988
989 i915_sched_engine_reset_on_empty(sched_engine);
990
991 spin_unlock_irqrestore(&sched_engine->lock, flags);
992 }
993
cs_irq_handler(struct intel_engine_cs * engine,u16 iir)994 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
995 {
996 if (iir & GT_RENDER_USER_INTERRUPT)
997 intel_engine_signal_breadcrumbs(engine);
998 }
999
1000 static void __guc_context_destroy(struct intel_context *ce);
1001 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce);
1002 static void guc_signal_context_fence(struct intel_context *ce);
1003 static void guc_cancel_context_requests(struct intel_context *ce);
1004 static void guc_blocked_fence_complete(struct intel_context *ce);
1005
scrub_guc_desc_for_outstanding_g2h(struct intel_guc * guc)1006 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
1007 {
1008 struct intel_context *ce;
1009 unsigned long index, flags;
1010 bool pending_disable, pending_enable, deregister, destroyed, banned;
1011
1012 xa_lock_irqsave(&guc->context_lookup, flags);
1013 xa_for_each(&guc->context_lookup, index, ce) {
1014 /*
1015 * Corner case where the ref count on the object is zero but and
1016 * deregister G2H was lost. In this case we don't touch the ref
1017 * count and finish the destroy of the context.
1018 */
1019 bool do_put = kref_get_unless_zero(&ce->ref);
1020
1021 xa_unlock(&guc->context_lookup);
1022
1023 spin_lock(&ce->guc_state.lock);
1024
1025 /*
1026 * Once we are at this point submission_disabled() is guaranteed
1027 * to be visible to all callers who set the below flags (see above
1028 * flush and flushes in reset_prepare). If submission_disabled()
1029 * is set, the caller shouldn't set these flags.
1030 */
1031
1032 destroyed = context_destroyed(ce);
1033 pending_enable = context_pending_enable(ce);
1034 pending_disable = context_pending_disable(ce);
1035 deregister = context_wait_for_deregister_to_register(ce);
1036 banned = context_banned(ce);
1037 init_sched_state(ce);
1038
1039 spin_unlock(&ce->guc_state.lock);
1040
1041 GEM_BUG_ON(!do_put && !destroyed);
1042
1043 if (pending_enable || destroyed || deregister) {
1044 decr_outstanding_submission_g2h(guc);
1045 if (deregister)
1046 guc_signal_context_fence(ce);
1047 if (destroyed) {
1048 intel_gt_pm_put_async(guc_to_gt(guc));
1049 release_guc_id(guc, ce);
1050 __guc_context_destroy(ce);
1051 }
1052 if (pending_enable || deregister)
1053 intel_context_put(ce);
1054 }
1055
1056 /* Not mutualy exclusive with above if statement. */
1057 if (pending_disable) {
1058 guc_signal_context_fence(ce);
1059 if (banned) {
1060 guc_cancel_context_requests(ce);
1061 intel_engine_signal_breadcrumbs(ce->engine);
1062 }
1063 intel_context_sched_disable_unpin(ce);
1064 decr_outstanding_submission_g2h(guc);
1065
1066 spin_lock(&ce->guc_state.lock);
1067 guc_blocked_fence_complete(ce);
1068 spin_unlock(&ce->guc_state.lock);
1069
1070 intel_context_put(ce);
1071 }
1072
1073 if (do_put)
1074 intel_context_put(ce);
1075 xa_lock(&guc->context_lookup);
1076 }
1077 xa_unlock_irqrestore(&guc->context_lookup, flags);
1078 }
1079
1080 static inline bool
submission_disabled(struct intel_guc * guc)1081 submission_disabled(struct intel_guc *guc)
1082 {
1083 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1084
1085 return unlikely(!sched_engine ||
1086 !__tasklet_is_enabled(&sched_engine->tasklet));
1087 }
1088
disable_submission(struct intel_guc * guc)1089 static void disable_submission(struct intel_guc *guc)
1090 {
1091 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1092
1093 if (__tasklet_is_enabled(&sched_engine->tasklet)) {
1094 GEM_BUG_ON(!guc->ct.enabled);
1095 __tasklet_disable_sync_once(&sched_engine->tasklet);
1096 sched_engine->tasklet.callback = NULL;
1097 }
1098 }
1099
enable_submission(struct intel_guc * guc)1100 static void enable_submission(struct intel_guc *guc)
1101 {
1102 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1103 unsigned long flags;
1104
1105 spin_lock_irqsave(&guc->sched_engine->lock, flags);
1106 sched_engine->tasklet.callback = guc_submission_tasklet;
1107 wmb(); /* Make sure callback visible */
1108 if (!__tasklet_is_enabled(&sched_engine->tasklet) &&
1109 __tasklet_enable(&sched_engine->tasklet)) {
1110 GEM_BUG_ON(!guc->ct.enabled);
1111
1112 /* And kick in case we missed a new request submission. */
1113 tasklet_hi_schedule(&sched_engine->tasklet);
1114 }
1115 spin_unlock_irqrestore(&guc->sched_engine->lock, flags);
1116 }
1117
guc_flush_submissions(struct intel_guc * guc)1118 static void guc_flush_submissions(struct intel_guc *guc)
1119 {
1120 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1121 unsigned long flags;
1122
1123 spin_lock_irqsave(&sched_engine->lock, flags);
1124 spin_unlock_irqrestore(&sched_engine->lock, flags);
1125 }
1126
1127 static void guc_flush_destroyed_contexts(struct intel_guc *guc);
1128
intel_guc_submission_reset_prepare(struct intel_guc * guc)1129 void intel_guc_submission_reset_prepare(struct intel_guc *guc)
1130 {
1131 int i;
1132
1133 if (unlikely(!guc_submission_initialized(guc))) {
1134 /* Reset called during driver load? GuC not yet initialised! */
1135 return;
1136 }
1137
1138 intel_gt_park_heartbeats(guc_to_gt(guc));
1139 disable_submission(guc);
1140 guc->interrupts.disable(guc);
1141
1142 /* Flush IRQ handler */
1143 spin_lock_irq(&guc_to_gt(guc)->irq_lock);
1144 spin_unlock_irq(&guc_to_gt(guc)->irq_lock);
1145
1146 guc_flush_submissions(guc);
1147 guc_flush_destroyed_contexts(guc);
1148
1149 /*
1150 * Handle any outstanding G2Hs before reset. Call IRQ handler directly
1151 * each pass as interrupt have been disabled. We always scrub for
1152 * outstanding G2H as it is possible for outstanding_submission_g2h to
1153 * be incremented after the context state update.
1154 */
1155 for (i = 0; i < 4 && atomic_read(&guc->outstanding_submission_g2h); ++i) {
1156 intel_guc_to_host_event_handler(guc);
1157 #define wait_for_reset(guc, wait_var) \
1158 intel_guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20))
1159 do {
1160 wait_for_reset(guc, &guc->outstanding_submission_g2h);
1161 } while (!list_empty(&guc->ct.requests.incoming));
1162 }
1163
1164 scrub_guc_desc_for_outstanding_g2h(guc);
1165 }
1166
1167 static struct intel_engine_cs *
guc_virtual_get_sibling(struct intel_engine_cs * ve,unsigned int sibling)1168 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling)
1169 {
1170 struct intel_engine_cs *engine;
1171 intel_engine_mask_t tmp, mask = ve->mask;
1172 unsigned int num_siblings = 0;
1173
1174 for_each_engine_masked(engine, ve->gt, mask, tmp)
1175 if (num_siblings++ == sibling)
1176 return engine;
1177
1178 return NULL;
1179 }
1180
1181 static inline struct intel_engine_cs *
__context_to_physical_engine(struct intel_context * ce)1182 __context_to_physical_engine(struct intel_context *ce)
1183 {
1184 struct intel_engine_cs *engine = ce->engine;
1185
1186 if (intel_engine_is_virtual(engine))
1187 engine = guc_virtual_get_sibling(engine, 0);
1188
1189 return engine;
1190 }
1191
guc_reset_state(struct intel_context * ce,u32 head,bool scrub)1192 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub)
1193 {
1194 struct intel_engine_cs *engine = __context_to_physical_engine(ce);
1195
1196 if (intel_context_is_banned(ce))
1197 return;
1198
1199 GEM_BUG_ON(!intel_context_is_pinned(ce));
1200
1201 /*
1202 * We want a simple context + ring to execute the breadcrumb update.
1203 * We cannot rely on the context being intact across the GPU hang,
1204 * so clear it and rebuild just what we need for the breadcrumb.
1205 * All pending requests for this context will be zapped, and any
1206 * future request will be after userspace has had the opportunity
1207 * to recreate its own state.
1208 */
1209 if (scrub)
1210 lrc_init_regs(ce, engine, true);
1211
1212 /* Rerun the request; its payload has been neutered (if guilty). */
1213 lrc_update_regs(ce, engine, head);
1214 }
1215
guc_reset_nop(struct intel_engine_cs * engine)1216 static void guc_reset_nop(struct intel_engine_cs *engine)
1217 {
1218 }
1219
guc_rewind_nop(struct intel_engine_cs * engine,bool stalled)1220 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled)
1221 {
1222 }
1223
1224 static void
__unwind_incomplete_requests(struct intel_context * ce)1225 __unwind_incomplete_requests(struct intel_context *ce)
1226 {
1227 struct i915_request *rq, *rn;
1228 struct list_head *pl;
1229 int prio = I915_PRIORITY_INVALID;
1230 struct i915_sched_engine * const sched_engine =
1231 ce->engine->sched_engine;
1232 unsigned long flags;
1233
1234 spin_lock_irqsave(&sched_engine->lock, flags);
1235 spin_lock(&ce->guc_state.lock);
1236 list_for_each_entry_safe_reverse(rq, rn,
1237 &ce->guc_state.requests,
1238 sched.link) {
1239 if (i915_request_completed(rq))
1240 continue;
1241
1242 list_del_init(&rq->sched.link);
1243 __i915_request_unsubmit(rq);
1244
1245 /* Push the request back into the queue for later resubmission. */
1246 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1247 if (rq_prio(rq) != prio) {
1248 prio = rq_prio(rq);
1249 pl = i915_sched_lookup_priolist(sched_engine, prio);
1250 }
1251 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
1252
1253 list_add(&rq->sched.link, pl);
1254 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1255 }
1256 spin_unlock(&ce->guc_state.lock);
1257 spin_unlock_irqrestore(&sched_engine->lock, flags);
1258 }
1259
__guc_reset_context(struct intel_context * ce,bool stalled)1260 static void __guc_reset_context(struct intel_context *ce, bool stalled)
1261 {
1262 bool local_stalled;
1263 struct i915_request *rq;
1264 unsigned long flags;
1265 u32 head;
1266 int i, number_children = ce->parallel.number_children;
1267 bool skip = false;
1268 struct intel_context *parent = ce;
1269
1270 GEM_BUG_ON(intel_context_is_child(ce));
1271
1272 intel_context_get(ce);
1273
1274 /*
1275 * GuC will implicitly mark the context as non-schedulable when it sends
1276 * the reset notification. Make sure our state reflects this change. The
1277 * context will be marked enabled on resubmission.
1278 *
1279 * XXX: If the context is reset as a result of the request cancellation
1280 * this G2H is received after the schedule disable complete G2H which is
1281 * wrong as this creates a race between the request cancellation code
1282 * re-submitting the context and this G2H handler. This is a bug in the
1283 * GuC but can be worked around in the meantime but converting this to a
1284 * NOP if a pending enable is in flight as this indicates that a request
1285 * cancellation has occurred.
1286 */
1287 spin_lock_irqsave(&ce->guc_state.lock, flags);
1288 if (likely(!context_pending_enable(ce)))
1289 clr_context_enabled(ce);
1290 else
1291 skip = true;
1292 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1293 if (unlikely(skip))
1294 goto out_put;
1295
1296 /*
1297 * For each context in the relationship find the hanging request
1298 * resetting each context / request as needed
1299 */
1300 for (i = 0; i < number_children + 1; ++i) {
1301 if (!intel_context_is_pinned(ce))
1302 goto next_context;
1303
1304 local_stalled = false;
1305 rq = intel_context_find_active_request(ce);
1306 if (!rq) {
1307 head = ce->ring->tail;
1308 goto out_replay;
1309 }
1310
1311 if (i915_request_started(rq))
1312 local_stalled = true;
1313
1314 GEM_BUG_ON(i915_active_is_idle(&ce->active));
1315 head = intel_ring_wrap(ce->ring, rq->head);
1316
1317 __i915_request_reset(rq, local_stalled && stalled);
1318 out_replay:
1319 guc_reset_state(ce, head, local_stalled && stalled);
1320 next_context:
1321 if (i != number_children)
1322 ce = list_next_entry(ce, parallel.child_link);
1323 }
1324
1325 __unwind_incomplete_requests(parent);
1326 out_put:
1327 intel_context_put(parent);
1328 }
1329
intel_guc_submission_reset(struct intel_guc * guc,bool stalled)1330 void intel_guc_submission_reset(struct intel_guc *guc, bool stalled)
1331 {
1332 struct intel_context *ce;
1333 unsigned long index;
1334 unsigned long flags;
1335
1336 if (unlikely(!guc_submission_initialized(guc))) {
1337 /* Reset called during driver load? GuC not yet initialised! */
1338 return;
1339 }
1340
1341 xa_lock_irqsave(&guc->context_lookup, flags);
1342 xa_for_each(&guc->context_lookup, index, ce) {
1343 if (!kref_get_unless_zero(&ce->ref))
1344 continue;
1345
1346 xa_unlock(&guc->context_lookup);
1347
1348 if (intel_context_is_pinned(ce) &&
1349 !intel_context_is_child(ce))
1350 __guc_reset_context(ce, stalled);
1351
1352 intel_context_put(ce);
1353
1354 xa_lock(&guc->context_lookup);
1355 }
1356 xa_unlock_irqrestore(&guc->context_lookup, flags);
1357
1358 /* GuC is blown away, drop all references to contexts */
1359 xa_destroy(&guc->context_lookup);
1360 }
1361
guc_cancel_context_requests(struct intel_context * ce)1362 static void guc_cancel_context_requests(struct intel_context *ce)
1363 {
1364 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine;
1365 struct i915_request *rq;
1366 unsigned long flags;
1367
1368 /* Mark all executing requests as skipped. */
1369 spin_lock_irqsave(&sched_engine->lock, flags);
1370 spin_lock(&ce->guc_state.lock);
1371 list_for_each_entry(rq, &ce->guc_state.requests, sched.link)
1372 i915_request_put(i915_request_mark_eio(rq));
1373 spin_unlock(&ce->guc_state.lock);
1374 spin_unlock_irqrestore(&sched_engine->lock, flags);
1375 }
1376
1377 static void
guc_cancel_sched_engine_requests(struct i915_sched_engine * sched_engine)1378 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine)
1379 {
1380 struct i915_request *rq, *rn;
1381 struct rb_node *rb;
1382 unsigned long flags;
1383
1384 /* Can be called during boot if GuC fails to load */
1385 if (!sched_engine)
1386 return;
1387
1388 /*
1389 * Before we call engine->cancel_requests(), we should have exclusive
1390 * access to the submission state. This is arranged for us by the
1391 * caller disabling the interrupt generation, the tasklet and other
1392 * threads that may then access the same state, giving us a free hand
1393 * to reset state. However, we still need to let lockdep be aware that
1394 * we know this state may be accessed in hardirq context, so we
1395 * disable the irq around this manipulation and we want to keep
1396 * the spinlock focused on its duties and not accidentally conflate
1397 * coverage to the submission's irq state. (Similarly, although we
1398 * shouldn't need to disable irq around the manipulation of the
1399 * submission's irq state, we also wish to remind ourselves that
1400 * it is irq state.)
1401 */
1402 spin_lock_irqsave(&sched_engine->lock, flags);
1403
1404 /* Flush the queued requests to the timeline list (for retiring). */
1405 while ((rb = rb_first_cached(&sched_engine->queue))) {
1406 struct i915_priolist *p = to_priolist(rb);
1407
1408 priolist_for_each_request_consume(rq, rn, p) {
1409 list_del_init(&rq->sched.link);
1410
1411 __i915_request_submit(rq);
1412
1413 i915_request_put(i915_request_mark_eio(rq));
1414 }
1415
1416 rb_erase_cached(&p->node, &sched_engine->queue);
1417 i915_priolist_free(p);
1418 }
1419
1420 /* Remaining _unready_ requests will be nop'ed when submitted */
1421
1422 sched_engine->queue_priority_hint = INT_MIN;
1423 sched_engine->queue = RB_ROOT_CACHED;
1424
1425 spin_unlock_irqrestore(&sched_engine->lock, flags);
1426 }
1427
intel_guc_submission_cancel_requests(struct intel_guc * guc)1428 void intel_guc_submission_cancel_requests(struct intel_guc *guc)
1429 {
1430 struct intel_context *ce;
1431 unsigned long index;
1432 unsigned long flags;
1433
1434 xa_lock_irqsave(&guc->context_lookup, flags);
1435 xa_for_each(&guc->context_lookup, index, ce) {
1436 if (!kref_get_unless_zero(&ce->ref))
1437 continue;
1438
1439 xa_unlock(&guc->context_lookup);
1440
1441 if (intel_context_is_pinned(ce) &&
1442 !intel_context_is_child(ce))
1443 guc_cancel_context_requests(ce);
1444
1445 intel_context_put(ce);
1446
1447 xa_lock(&guc->context_lookup);
1448 }
1449 xa_unlock_irqrestore(&guc->context_lookup, flags);
1450
1451 guc_cancel_sched_engine_requests(guc->sched_engine);
1452
1453 /* GuC is blown away, drop all references to contexts */
1454 xa_destroy(&guc->context_lookup);
1455 }
1456
intel_guc_submission_reset_finish(struct intel_guc * guc)1457 void intel_guc_submission_reset_finish(struct intel_guc *guc)
1458 {
1459 /* Reset called during driver load or during wedge? */
1460 if (unlikely(!guc_submission_initialized(guc) ||
1461 test_bit(I915_WEDGED, &guc_to_gt(guc)->reset.flags))) {
1462 return;
1463 }
1464
1465 /*
1466 * Technically possible for either of these values to be non-zero here,
1467 * but very unlikely + harmless. Regardless let's add a warn so we can
1468 * see in CI if this happens frequently / a precursor to taking down the
1469 * machine.
1470 */
1471 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
1472 atomic_set(&guc->outstanding_submission_g2h, 0);
1473
1474 intel_guc_global_policies_update(guc);
1475 enable_submission(guc);
1476 intel_gt_unpark_heartbeats(guc_to_gt(guc));
1477 }
1478
1479 static void destroyed_worker_func(struct work_struct *w);
1480
1481 /*
1482 * Set up the memory resources to be shared with the GuC (via the GGTT)
1483 * at firmware loading time.
1484 */
intel_guc_submission_init(struct intel_guc * guc)1485 int intel_guc_submission_init(struct intel_guc *guc)
1486 {
1487 int ret;
1488
1489 if (guc->lrc_desc_pool)
1490 return 0;
1491
1492 ret = guc_lrc_desc_pool_create(guc);
1493 if (ret)
1494 return ret;
1495 /*
1496 * Keep static analysers happy, let them know that we allocated the
1497 * vma after testing that it didn't exist earlier.
1498 */
1499 GEM_BUG_ON(!guc->lrc_desc_pool);
1500
1501 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
1502
1503 spin_lock_init(&guc->submission_state.lock);
1504 INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
1505 ida_init(&guc->submission_state.guc_ids);
1506 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
1507 INIT_WORK(&guc->submission_state.destroyed_worker,
1508 destroyed_worker_func);
1509
1510 guc->submission_state.guc_ids_bitmap =
1511 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL);
1512 if (!guc->submission_state.guc_ids_bitmap)
1513 return -ENOMEM;
1514
1515 return 0;
1516 }
1517
intel_guc_submission_fini(struct intel_guc * guc)1518 void intel_guc_submission_fini(struct intel_guc *guc)
1519 {
1520 if (!guc->lrc_desc_pool)
1521 return;
1522
1523 guc_flush_destroyed_contexts(guc);
1524 guc_lrc_desc_pool_destroy(guc);
1525 i915_sched_engine_put(guc->sched_engine);
1526 bitmap_free(guc->submission_state.guc_ids_bitmap);
1527 }
1528
queue_request(struct i915_sched_engine * sched_engine,struct i915_request * rq,int prio)1529 static inline void queue_request(struct i915_sched_engine *sched_engine,
1530 struct i915_request *rq,
1531 int prio)
1532 {
1533 GEM_BUG_ON(!list_empty(&rq->sched.link));
1534 list_add_tail(&rq->sched.link,
1535 i915_sched_lookup_priolist(sched_engine, prio));
1536 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1537 tasklet_hi_schedule(&sched_engine->tasklet);
1538 }
1539
guc_bypass_tasklet_submit(struct intel_guc * guc,struct i915_request * rq)1540 static int guc_bypass_tasklet_submit(struct intel_guc *guc,
1541 struct i915_request *rq)
1542 {
1543 int ret = 0;
1544
1545 __i915_request_submit(rq);
1546
1547 trace_i915_request_in(rq, 0);
1548
1549 if (is_multi_lrc_rq(rq)) {
1550 if (multi_lrc_submit(rq)) {
1551 ret = guc_wq_item_append(guc, rq);
1552 if (!ret)
1553 ret = guc_add_request(guc, rq);
1554 }
1555 } else {
1556 guc_set_lrc_tail(rq);
1557 ret = guc_add_request(guc, rq);
1558 }
1559
1560 if (unlikely(ret == -EPIPE))
1561 disable_submission(guc);
1562
1563 return ret;
1564 }
1565
need_tasklet(struct intel_guc * guc,struct i915_request * rq)1566 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
1567 {
1568 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
1569 struct intel_context *ce = request_to_scheduling_context(rq);
1570
1571 return submission_disabled(guc) || guc->stalled_request ||
1572 !i915_sched_engine_is_empty(sched_engine) ||
1573 !lrc_desc_registered(guc, ce->guc_id.id);
1574 }
1575
guc_submit_request(struct i915_request * rq)1576 static void guc_submit_request(struct i915_request *rq)
1577 {
1578 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
1579 struct intel_guc *guc = &rq->engine->gt->uc.guc;
1580 unsigned long flags;
1581
1582 /* Will be called from irq-context when using foreign fences. */
1583 spin_lock_irqsave(&sched_engine->lock, flags);
1584
1585 if (need_tasklet(guc, rq))
1586 queue_request(sched_engine, rq, rq_prio(rq));
1587 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
1588 tasklet_hi_schedule(&sched_engine->tasklet);
1589
1590 spin_unlock_irqrestore(&sched_engine->lock, flags);
1591 }
1592
new_guc_id(struct intel_guc * guc,struct intel_context * ce)1593 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
1594 {
1595 int ret;
1596
1597 GEM_BUG_ON(intel_context_is_child(ce));
1598
1599 if (intel_context_is_parent(ce))
1600 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
1601 NUMBER_MULTI_LRC_GUC_ID,
1602 order_base_2(ce->parallel.number_children
1603 + 1));
1604 else
1605 ret = ida_simple_get(&guc->submission_state.guc_ids,
1606 NUMBER_MULTI_LRC_GUC_ID,
1607 GUC_MAX_LRC_DESCRIPTORS,
1608 GFP_KERNEL | __GFP_RETRY_MAYFAIL |
1609 __GFP_NOWARN);
1610 if (unlikely(ret < 0))
1611 return ret;
1612
1613 ce->guc_id.id = ret;
1614 return 0;
1615 }
1616
__release_guc_id(struct intel_guc * guc,struct intel_context * ce)1617 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
1618 {
1619 GEM_BUG_ON(intel_context_is_child(ce));
1620
1621 if (!context_guc_id_invalid(ce)) {
1622 if (intel_context_is_parent(ce))
1623 bitmap_release_region(guc->submission_state.guc_ids_bitmap,
1624 ce->guc_id.id,
1625 order_base_2(ce->parallel.number_children
1626 + 1));
1627 else
1628 ida_simple_remove(&guc->submission_state.guc_ids,
1629 ce->guc_id.id);
1630 reset_lrc_desc(guc, ce->guc_id.id);
1631 set_context_guc_id_invalid(ce);
1632 }
1633 if (!list_empty(&ce->guc_id.link))
1634 list_del_init(&ce->guc_id.link);
1635 }
1636
release_guc_id(struct intel_guc * guc,struct intel_context * ce)1637 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
1638 {
1639 unsigned long flags;
1640
1641 spin_lock_irqsave(&guc->submission_state.lock, flags);
1642 __release_guc_id(guc, ce);
1643 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
1644 }
1645
steal_guc_id(struct intel_guc * guc,struct intel_context * ce)1646 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
1647 {
1648 struct intel_context *cn;
1649
1650 lockdep_assert_held(&guc->submission_state.lock);
1651 GEM_BUG_ON(intel_context_is_child(ce));
1652 GEM_BUG_ON(intel_context_is_parent(ce));
1653
1654 if (!list_empty(&guc->submission_state.guc_id_list)) {
1655 cn = list_first_entry(&guc->submission_state.guc_id_list,
1656 struct intel_context,
1657 guc_id.link);
1658
1659 GEM_BUG_ON(atomic_read(&cn->guc_id.ref));
1660 GEM_BUG_ON(context_guc_id_invalid(cn));
1661 GEM_BUG_ON(intel_context_is_child(cn));
1662 GEM_BUG_ON(intel_context_is_parent(cn));
1663
1664 list_del_init(&cn->guc_id.link);
1665 ce->guc_id.id = cn->guc_id.id;
1666
1667 spin_lock(&cn->guc_state.lock);
1668 clr_context_registered(cn);
1669 spin_unlock(&cn->guc_state.lock);
1670
1671 set_context_guc_id_invalid(cn);
1672
1673 return 0;
1674 } else {
1675 return -EAGAIN;
1676 }
1677 }
1678
assign_guc_id(struct intel_guc * guc,struct intel_context * ce)1679 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce)
1680 {
1681 int ret;
1682
1683 lockdep_assert_held(&guc->submission_state.lock);
1684 GEM_BUG_ON(intel_context_is_child(ce));
1685
1686 ret = new_guc_id(guc, ce);
1687 if (unlikely(ret < 0)) {
1688 if (intel_context_is_parent(ce))
1689 return -ENOSPC;
1690
1691 ret = steal_guc_id(guc, ce);
1692 if (ret < 0)
1693 return ret;
1694 }
1695
1696 if (intel_context_is_parent(ce)) {
1697 struct intel_context *child;
1698 int i = 1;
1699
1700 for_each_child(ce, child)
1701 child->guc_id.id = ce->guc_id.id + i++;
1702 }
1703
1704 return 0;
1705 }
1706
1707 #define PIN_GUC_ID_TRIES 4
pin_guc_id(struct intel_guc * guc,struct intel_context * ce)1708 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
1709 {
1710 int ret = 0;
1711 unsigned long flags, tries = PIN_GUC_ID_TRIES;
1712
1713 GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
1714
1715 try_again:
1716 spin_lock_irqsave(&guc->submission_state.lock, flags);
1717
1718 might_lock(&ce->guc_state.lock);
1719
1720 if (context_guc_id_invalid(ce)) {
1721 ret = assign_guc_id(guc, ce);
1722 if (ret)
1723 goto out_unlock;
1724 ret = 1; /* Indidcates newly assigned guc_id */
1725 }
1726 if (!list_empty(&ce->guc_id.link))
1727 list_del_init(&ce->guc_id.link);
1728 atomic_inc(&ce->guc_id.ref);
1729
1730 out_unlock:
1731 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
1732
1733 /*
1734 * -EAGAIN indicates no guc_id are available, let's retire any
1735 * outstanding requests to see if that frees up a guc_id. If the first
1736 * retire didn't help, insert a sleep with the timeslice duration before
1737 * attempting to retire more requests. Double the sleep period each
1738 * subsequent pass before finally giving up. The sleep period has max of
1739 * 100ms and minimum of 1ms.
1740 */
1741 if (ret == -EAGAIN && --tries) {
1742 if (PIN_GUC_ID_TRIES - tries > 1) {
1743 unsigned int timeslice_shifted =
1744 ce->engine->props.timeslice_duration_ms <<
1745 (PIN_GUC_ID_TRIES - tries - 2);
1746 unsigned int max = min_t(unsigned int, 100,
1747 timeslice_shifted);
1748
1749 msleep(max_t(unsigned int, max, 1));
1750 }
1751 intel_gt_retire_requests(guc_to_gt(guc));
1752 goto try_again;
1753 }
1754
1755 return ret;
1756 }
1757
unpin_guc_id(struct intel_guc * guc,struct intel_context * ce)1758 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
1759 {
1760 unsigned long flags;
1761
1762 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0);
1763 GEM_BUG_ON(intel_context_is_child(ce));
1764
1765 if (unlikely(context_guc_id_invalid(ce) ||
1766 intel_context_is_parent(ce)))
1767 return;
1768
1769 spin_lock_irqsave(&guc->submission_state.lock, flags);
1770 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) &&
1771 !atomic_read(&ce->guc_id.ref))
1772 list_add_tail(&ce->guc_id.link,
1773 &guc->submission_state.guc_id_list);
1774 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
1775 }
1776
__guc_action_register_multi_lrc(struct intel_guc * guc,struct intel_context * ce,u32 guc_id,u32 offset,bool loop)1777 static int __guc_action_register_multi_lrc(struct intel_guc *guc,
1778 struct intel_context *ce,
1779 u32 guc_id,
1780 u32 offset,
1781 bool loop)
1782 {
1783 struct intel_context *child;
1784 u32 action[4 + MAX_ENGINE_INSTANCE];
1785 int len = 0;
1786
1787 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
1788
1789 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
1790 action[len++] = guc_id;
1791 action[len++] = ce->parallel.number_children + 1;
1792 action[len++] = offset;
1793 for_each_child(ce, child) {
1794 offset += sizeof(struct guc_lrc_desc);
1795 action[len++] = offset;
1796 }
1797
1798 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
1799 }
1800
__guc_action_register_context(struct intel_guc * guc,u32 guc_id,u32 offset,bool loop)1801 static int __guc_action_register_context(struct intel_guc *guc,
1802 u32 guc_id,
1803 u32 offset,
1804 bool loop)
1805 {
1806 u32 action[] = {
1807 INTEL_GUC_ACTION_REGISTER_CONTEXT,
1808 guc_id,
1809 offset,
1810 };
1811
1812 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
1813 0, loop);
1814 }
1815
register_context(struct intel_context * ce,bool loop)1816 static int register_context(struct intel_context *ce, bool loop)
1817 {
1818 struct intel_guc *guc = ce_to_guc(ce);
1819 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) +
1820 ce->guc_id.id * sizeof(struct guc_lrc_desc);
1821 int ret;
1822
1823 GEM_BUG_ON(intel_context_is_child(ce));
1824 trace_intel_context_register(ce);
1825
1826 if (intel_context_is_parent(ce))
1827 ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id,
1828 offset, loop);
1829 else
1830 ret = __guc_action_register_context(guc, ce->guc_id.id, offset,
1831 loop);
1832 if (likely(!ret)) {
1833 unsigned long flags;
1834
1835 spin_lock_irqsave(&ce->guc_state.lock, flags);
1836 set_context_registered(ce);
1837 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1838 }
1839
1840 return ret;
1841 }
1842
__guc_action_deregister_context(struct intel_guc * guc,u32 guc_id)1843 static int __guc_action_deregister_context(struct intel_guc *guc,
1844 u32 guc_id)
1845 {
1846 u32 action[] = {
1847 INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
1848 guc_id,
1849 };
1850
1851 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
1852 G2H_LEN_DW_DEREGISTER_CONTEXT,
1853 true);
1854 }
1855
deregister_context(struct intel_context * ce,u32 guc_id)1856 static int deregister_context(struct intel_context *ce, u32 guc_id)
1857 {
1858 struct intel_guc *guc = ce_to_guc(ce);
1859
1860 GEM_BUG_ON(intel_context_is_child(ce));
1861 trace_intel_context_deregister(ce);
1862
1863 return __guc_action_deregister_context(guc, guc_id);
1864 }
1865
clear_children_join_go_memory(struct intel_context * ce)1866 static inline void clear_children_join_go_memory(struct intel_context *ce)
1867 {
1868 struct parent_scratch *ps = __get_parent_scratch(ce);
1869 int i;
1870
1871 ps->go.semaphore = 0;
1872 for (i = 0; i < ce->parallel.number_children + 1; ++i)
1873 ps->join[i].semaphore = 0;
1874 }
1875
get_children_go_value(struct intel_context * ce)1876 static inline u32 get_children_go_value(struct intel_context *ce)
1877 {
1878 return __get_parent_scratch(ce)->go.semaphore;
1879 }
1880
get_children_join_value(struct intel_context * ce,u8 child_index)1881 static inline u32 get_children_join_value(struct intel_context *ce,
1882 u8 child_index)
1883 {
1884 return __get_parent_scratch(ce)->join[child_index].semaphore;
1885 }
1886
guc_context_policy_init(struct intel_engine_cs * engine,struct guc_lrc_desc * desc)1887 static void guc_context_policy_init(struct intel_engine_cs *engine,
1888 struct guc_lrc_desc *desc)
1889 {
1890 desc->policy_flags = 0;
1891
1892 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
1893 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE;
1894
1895 /* NB: For both of these, zero means disabled. */
1896 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
1897 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
1898 }
1899
guc_lrc_desc_pin(struct intel_context * ce,bool loop)1900 static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
1901 {
1902 struct intel_engine_cs *engine = ce->engine;
1903 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
1904 struct intel_guc *guc = &engine->gt->uc.guc;
1905 u32 desc_idx = ce->guc_id.id;
1906 struct guc_lrc_desc *desc;
1907 bool context_registered;
1908 intel_wakeref_t wakeref;
1909 struct intel_context *child;
1910 int ret = 0;
1911
1912 GEM_BUG_ON(!engine->mask);
1913 GEM_BUG_ON(!sched_state_is_init(ce));
1914
1915 /*
1916 * Ensure LRC + CT vmas are is same region as write barrier is done
1917 * based on CT vma region.
1918 */
1919 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
1920 i915_gem_object_is_lmem(ce->ring->vma->obj));
1921
1922 context_registered = lrc_desc_registered(guc, desc_idx);
1923
1924 reset_lrc_desc(guc, desc_idx);
1925 set_lrc_desc_registered(guc, desc_idx, ce);
1926
1927 desc = __get_lrc_desc(guc, desc_idx);
1928 desc->engine_class = engine_class_to_guc_class(engine->class);
1929 desc->engine_submit_mask = engine->logical_mask;
1930 desc->hw_context_desc = ce->lrc.lrca;
1931 desc->priority = ce->guc_state.prio;
1932 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
1933 guc_context_policy_init(engine, desc);
1934
1935 /*
1936 * If context is a parent, we need to register a process descriptor
1937 * describing a work queue and register all child contexts.
1938 */
1939 if (intel_context_is_parent(ce)) {
1940 struct guc_process_desc *pdesc;
1941
1942 ce->parallel.guc.wqi_tail = 0;
1943 ce->parallel.guc.wqi_head = 0;
1944
1945 desc->process_desc = i915_ggtt_offset(ce->state) +
1946 __get_parent_scratch_offset(ce);
1947 desc->wq_addr = i915_ggtt_offset(ce->state) +
1948 __get_wq_offset(ce);
1949 desc->wq_size = WQ_SIZE;
1950
1951 pdesc = __get_process_desc(ce);
1952 memset(pdesc, 0, sizeof(*(pdesc)));
1953 pdesc->stage_id = ce->guc_id.id;
1954 pdesc->wq_base_addr = desc->wq_addr;
1955 pdesc->wq_size_bytes = desc->wq_size;
1956 pdesc->wq_status = WQ_STATUS_ACTIVE;
1957
1958 for_each_child(ce, child) {
1959 desc = __get_lrc_desc(guc, child->guc_id.id);
1960
1961 desc->engine_class =
1962 engine_class_to_guc_class(engine->class);
1963 desc->hw_context_desc = child->lrc.lrca;
1964 desc->priority = ce->guc_state.prio;
1965 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
1966 guc_context_policy_init(engine, desc);
1967 }
1968
1969 clear_children_join_go_memory(ce);
1970 }
1971
1972 /*
1973 * The context_lookup xarray is used to determine if the hardware
1974 * context is currently registered. There are two cases in which it
1975 * could be registered either the guc_id has been stolen from another
1976 * context or the lrc descriptor address of this context has changed. In
1977 * either case the context needs to be deregistered with the GuC before
1978 * registering this context.
1979 */
1980 if (context_registered) {
1981 bool disabled;
1982 unsigned long flags;
1983
1984 trace_intel_context_steal_guc_id(ce);
1985 GEM_BUG_ON(!loop);
1986
1987 /* Seal race with Reset */
1988 spin_lock_irqsave(&ce->guc_state.lock, flags);
1989 disabled = submission_disabled(guc);
1990 if (likely(!disabled)) {
1991 set_context_wait_for_deregister_to_register(ce);
1992 intel_context_get(ce);
1993 }
1994 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1995 if (unlikely(disabled)) {
1996 reset_lrc_desc(guc, desc_idx);
1997 return 0; /* Will get registered later */
1998 }
1999
2000 /*
2001 * If stealing the guc_id, this ce has the same guc_id as the
2002 * context whose guc_id was stolen.
2003 */
2004 with_intel_runtime_pm(runtime_pm, wakeref)
2005 ret = deregister_context(ce, ce->guc_id.id);
2006 if (unlikely(ret == -ENODEV))
2007 ret = 0; /* Will get registered later */
2008 } else {
2009 with_intel_runtime_pm(runtime_pm, wakeref)
2010 ret = register_context(ce, loop);
2011 if (unlikely(ret == -EBUSY)) {
2012 reset_lrc_desc(guc, desc_idx);
2013 } else if (unlikely(ret == -ENODEV)) {
2014 reset_lrc_desc(guc, desc_idx);
2015 ret = 0; /* Will get registered later */
2016 }
2017 }
2018
2019 return ret;
2020 }
2021
__guc_context_pre_pin(struct intel_context * ce,struct intel_engine_cs * engine,struct i915_gem_ww_ctx * ww,void ** vaddr)2022 static int __guc_context_pre_pin(struct intel_context *ce,
2023 struct intel_engine_cs *engine,
2024 struct i915_gem_ww_ctx *ww,
2025 void **vaddr)
2026 {
2027 return lrc_pre_pin(ce, engine, ww, vaddr);
2028 }
2029
__guc_context_pin(struct intel_context * ce,struct intel_engine_cs * engine,void * vaddr)2030 static int __guc_context_pin(struct intel_context *ce,
2031 struct intel_engine_cs *engine,
2032 void *vaddr)
2033 {
2034 if (i915_ggtt_offset(ce->state) !=
2035 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
2036 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
2037
2038 /*
2039 * GuC context gets pinned in guc_request_alloc. See that function for
2040 * explaination of why.
2041 */
2042
2043 return lrc_pin(ce, engine, vaddr);
2044 }
2045
guc_context_pre_pin(struct intel_context * ce,struct i915_gem_ww_ctx * ww,void ** vaddr)2046 static int guc_context_pre_pin(struct intel_context *ce,
2047 struct i915_gem_ww_ctx *ww,
2048 void **vaddr)
2049 {
2050 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr);
2051 }
2052
guc_context_pin(struct intel_context * ce,void * vaddr)2053 static int guc_context_pin(struct intel_context *ce, void *vaddr)
2054 {
2055 int ret = __guc_context_pin(ce, ce->engine, vaddr);
2056
2057 if (likely(!ret && !intel_context_is_barrier(ce)))
2058 intel_engine_pm_get(ce->engine);
2059
2060 return ret;
2061 }
2062
guc_context_unpin(struct intel_context * ce)2063 static void guc_context_unpin(struct intel_context *ce)
2064 {
2065 struct intel_guc *guc = ce_to_guc(ce);
2066
2067 unpin_guc_id(guc, ce);
2068 lrc_unpin(ce);
2069
2070 if (likely(!intel_context_is_barrier(ce)))
2071 intel_engine_pm_put_async(ce->engine);
2072 }
2073
guc_context_post_unpin(struct intel_context * ce)2074 static void guc_context_post_unpin(struct intel_context *ce)
2075 {
2076 lrc_post_unpin(ce);
2077 }
2078
__guc_context_sched_enable(struct intel_guc * guc,struct intel_context * ce)2079 static void __guc_context_sched_enable(struct intel_guc *guc,
2080 struct intel_context *ce)
2081 {
2082 u32 action[] = {
2083 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2084 ce->guc_id.id,
2085 GUC_CONTEXT_ENABLE
2086 };
2087
2088 trace_intel_context_sched_enable(ce);
2089
2090 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2091 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2092 }
2093
__guc_context_sched_disable(struct intel_guc * guc,struct intel_context * ce,u16 guc_id)2094 static void __guc_context_sched_disable(struct intel_guc *guc,
2095 struct intel_context *ce,
2096 u16 guc_id)
2097 {
2098 u32 action[] = {
2099 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2100 guc_id, /* ce->guc_id.id not stable */
2101 GUC_CONTEXT_DISABLE
2102 };
2103
2104 GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID);
2105
2106 GEM_BUG_ON(intel_context_is_child(ce));
2107 trace_intel_context_sched_disable(ce);
2108
2109 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2110 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2111 }
2112
guc_blocked_fence_complete(struct intel_context * ce)2113 static void guc_blocked_fence_complete(struct intel_context *ce)
2114 {
2115 lockdep_assert_held(&ce->guc_state.lock);
2116
2117 if (!i915_sw_fence_done(&ce->guc_state.blocked))
2118 i915_sw_fence_complete(&ce->guc_state.blocked);
2119 }
2120
guc_blocked_fence_reinit(struct intel_context * ce)2121 static void guc_blocked_fence_reinit(struct intel_context *ce)
2122 {
2123 lockdep_assert_held(&ce->guc_state.lock);
2124 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked));
2125
2126 /*
2127 * This fence is always complete unless a pending schedule disable is
2128 * outstanding. We arm the fence here and complete it when we receive
2129 * the pending schedule disable complete message.
2130 */
2131 i915_sw_fence_fini(&ce->guc_state.blocked);
2132 i915_sw_fence_reinit(&ce->guc_state.blocked);
2133 i915_sw_fence_await(&ce->guc_state.blocked);
2134 i915_sw_fence_commit(&ce->guc_state.blocked);
2135 }
2136
prep_context_pending_disable(struct intel_context * ce)2137 static u16 prep_context_pending_disable(struct intel_context *ce)
2138 {
2139 lockdep_assert_held(&ce->guc_state.lock);
2140
2141 set_context_pending_disable(ce);
2142 clr_context_enabled(ce);
2143 guc_blocked_fence_reinit(ce);
2144 intel_context_get(ce);
2145
2146 return ce->guc_id.id;
2147 }
2148
guc_context_block(struct intel_context * ce)2149 static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
2150 {
2151 struct intel_guc *guc = ce_to_guc(ce);
2152 unsigned long flags;
2153 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2154 intel_wakeref_t wakeref;
2155 u16 guc_id;
2156 bool enabled;
2157
2158 GEM_BUG_ON(intel_context_is_child(ce));
2159
2160 spin_lock_irqsave(&ce->guc_state.lock, flags);
2161
2162 incr_context_blocked(ce);
2163
2164 enabled = context_enabled(ce);
2165 if (unlikely(!enabled || submission_disabled(guc))) {
2166 if (enabled)
2167 clr_context_enabled(ce);
2168 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2169 return &ce->guc_state.blocked;
2170 }
2171
2172 /*
2173 * We add +2 here as the schedule disable complete CTB handler calls
2174 * intel_context_sched_disable_unpin (-2 to pin_count).
2175 */
2176 atomic_add(2, &ce->pin_count);
2177
2178 guc_id = prep_context_pending_disable(ce);
2179
2180 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2181
2182 with_intel_runtime_pm(runtime_pm, wakeref)
2183 __guc_context_sched_disable(guc, ce, guc_id);
2184
2185 return &ce->guc_state.blocked;
2186 }
2187
2188 #define SCHED_STATE_MULTI_BLOCKED_MASK \
2189 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED)
2190 #define SCHED_STATE_NO_UNBLOCK \
2191 (SCHED_STATE_MULTI_BLOCKED_MASK | \
2192 SCHED_STATE_PENDING_DISABLE | \
2193 SCHED_STATE_BANNED)
2194
context_cant_unblock(struct intel_context * ce)2195 static bool context_cant_unblock(struct intel_context *ce)
2196 {
2197 lockdep_assert_held(&ce->guc_state.lock);
2198
2199 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) ||
2200 context_guc_id_invalid(ce) ||
2201 !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id) ||
2202 !intel_context_is_pinned(ce);
2203 }
2204
guc_context_unblock(struct intel_context * ce)2205 static void guc_context_unblock(struct intel_context *ce)
2206 {
2207 struct intel_guc *guc = ce_to_guc(ce);
2208 unsigned long flags;
2209 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2210 intel_wakeref_t wakeref;
2211 bool enable;
2212
2213 GEM_BUG_ON(context_enabled(ce));
2214 GEM_BUG_ON(intel_context_is_child(ce));
2215
2216 spin_lock_irqsave(&ce->guc_state.lock, flags);
2217
2218 if (unlikely(submission_disabled(guc) ||
2219 context_cant_unblock(ce))) {
2220 enable = false;
2221 } else {
2222 enable = true;
2223 set_context_pending_enable(ce);
2224 set_context_enabled(ce);
2225 intel_context_get(ce);
2226 }
2227
2228 decr_context_blocked(ce);
2229
2230 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2231
2232 if (enable) {
2233 with_intel_runtime_pm(runtime_pm, wakeref)
2234 __guc_context_sched_enable(guc, ce);
2235 }
2236 }
2237
guc_context_cancel_request(struct intel_context * ce,struct i915_request * rq)2238 static void guc_context_cancel_request(struct intel_context *ce,
2239 struct i915_request *rq)
2240 {
2241 struct intel_context *block_context =
2242 request_to_scheduling_context(rq);
2243
2244 if (i915_sw_fence_signaled(&rq->submit)) {
2245 struct i915_sw_fence *fence;
2246
2247 intel_context_get(ce);
2248 fence = guc_context_block(block_context);
2249 i915_sw_fence_wait(fence);
2250 if (!i915_request_completed(rq)) {
2251 __i915_request_skip(rq);
2252 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
2253 true);
2254 }
2255
2256 /*
2257 * XXX: Racey if context is reset, see comment in
2258 * __guc_reset_context().
2259 */
2260 flush_work(&ce_to_guc(ce)->ct.requests.worker);
2261
2262 guc_context_unblock(block_context);
2263 intel_context_put(ce);
2264 }
2265 }
2266
__guc_context_set_preemption_timeout(struct intel_guc * guc,u16 guc_id,u32 preemption_timeout)2267 static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
2268 u16 guc_id,
2269 u32 preemption_timeout)
2270 {
2271 u32 action[] = {
2272 INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT,
2273 guc_id,
2274 preemption_timeout
2275 };
2276
2277 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
2278 }
2279
guc_context_ban(struct intel_context * ce,struct i915_request * rq)2280 static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
2281 {
2282 struct intel_guc *guc = ce_to_guc(ce);
2283 struct intel_runtime_pm *runtime_pm =
2284 &ce->engine->gt->i915->runtime_pm;
2285 intel_wakeref_t wakeref;
2286 unsigned long flags;
2287
2288 GEM_BUG_ON(intel_context_is_child(ce));
2289
2290 guc_flush_submissions(guc);
2291
2292 spin_lock_irqsave(&ce->guc_state.lock, flags);
2293 set_context_banned(ce);
2294
2295 if (submission_disabled(guc) ||
2296 (!context_enabled(ce) && !context_pending_disable(ce))) {
2297 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2298
2299 guc_cancel_context_requests(ce);
2300 intel_engine_signal_breadcrumbs(ce->engine);
2301 } else if (!context_pending_disable(ce)) {
2302 u16 guc_id;
2303
2304 /*
2305 * We add +2 here as the schedule disable complete CTB handler
2306 * calls intel_context_sched_disable_unpin (-2 to pin_count).
2307 */
2308 atomic_add(2, &ce->pin_count);
2309
2310 guc_id = prep_context_pending_disable(ce);
2311 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2312
2313 /*
2314 * In addition to disabling scheduling, set the preemption
2315 * timeout to the minimum value (1 us) so the banned context
2316 * gets kicked off the HW ASAP.
2317 */
2318 with_intel_runtime_pm(runtime_pm, wakeref) {
2319 __guc_context_set_preemption_timeout(guc, guc_id, 1);
2320 __guc_context_sched_disable(guc, ce, guc_id);
2321 }
2322 } else {
2323 if (!context_guc_id_invalid(ce))
2324 with_intel_runtime_pm(runtime_pm, wakeref)
2325 __guc_context_set_preemption_timeout(guc,
2326 ce->guc_id.id,
2327 1);
2328 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2329 }
2330 }
2331
guc_context_sched_disable(struct intel_context * ce)2332 static void guc_context_sched_disable(struct intel_context *ce)
2333 {
2334 struct intel_guc *guc = ce_to_guc(ce);
2335 unsigned long flags;
2336 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
2337 intel_wakeref_t wakeref;
2338 u16 guc_id;
2339
2340 GEM_BUG_ON(intel_context_is_child(ce));
2341
2342 spin_lock_irqsave(&ce->guc_state.lock, flags);
2343
2344 /*
2345 * We have to check if the context has been disabled by another thread,
2346 * check if submssion has been disabled to seal a race with reset and
2347 * finally check if any more requests have been committed to the
2348 * context ensursing that a request doesn't slip through the
2349 * 'context_pending_disable' fence.
2350 */
2351 if (unlikely(!context_enabled(ce) || submission_disabled(guc) ||
2352 context_has_committed_requests(ce))) {
2353 clr_context_enabled(ce);
2354 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2355 goto unpin;
2356 }
2357 guc_id = prep_context_pending_disable(ce);
2358
2359 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2360
2361 with_intel_runtime_pm(runtime_pm, wakeref)
2362 __guc_context_sched_disable(guc, ce, guc_id);
2363
2364 return;
2365 unpin:
2366 intel_context_sched_disable_unpin(ce);
2367 }
2368
guc_lrc_desc_unpin(struct intel_context * ce)2369 static inline void guc_lrc_desc_unpin(struct intel_context *ce)
2370 {
2371 struct intel_guc *guc = ce_to_guc(ce);
2372 struct intel_gt *gt = guc_to_gt(guc);
2373 unsigned long flags;
2374 bool disabled;
2375
2376 lockdep_assert_held(&guc->submission_state.lock);
2377 GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
2378 GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id));
2379 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
2380 GEM_BUG_ON(context_enabled(ce));
2381
2382 /* Seal race with Reset */
2383 spin_lock_irqsave(&ce->guc_state.lock, flags);
2384 disabled = submission_disabled(guc);
2385 if (likely(!disabled)) {
2386 __intel_gt_pm_get(gt);
2387 set_context_destroyed(ce);
2388 clr_context_registered(ce);
2389 }
2390 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2391 if (unlikely(disabled)) {
2392 __release_guc_id(guc, ce);
2393 __guc_context_destroy(ce);
2394 return;
2395 }
2396
2397 deregister_context(ce, ce->guc_id.id);
2398 }
2399
__guc_context_destroy(struct intel_context * ce)2400 static void __guc_context_destroy(struct intel_context *ce)
2401 {
2402 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
2403 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
2404 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
2405 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
2406 GEM_BUG_ON(ce->guc_state.number_committed_requests);
2407
2408 lrc_fini(ce);
2409 intel_context_fini(ce);
2410
2411 if (intel_engine_is_virtual(ce->engine)) {
2412 struct guc_virtual_engine *ve =
2413 container_of(ce, typeof(*ve), context);
2414
2415 if (ve->base.breadcrumbs)
2416 intel_breadcrumbs_put(ve->base.breadcrumbs);
2417
2418 kfree(ve);
2419 } else {
2420 intel_context_free(ce);
2421 }
2422 }
2423
guc_flush_destroyed_contexts(struct intel_guc * guc)2424 static void guc_flush_destroyed_contexts(struct intel_guc *guc)
2425 {
2426 struct intel_context *ce, *cn;
2427 unsigned long flags;
2428
2429 GEM_BUG_ON(!submission_disabled(guc) &&
2430 guc_submission_initialized(guc));
2431
2432 spin_lock_irqsave(&guc->submission_state.lock, flags);
2433 list_for_each_entry_safe(ce, cn,
2434 &guc->submission_state.destroyed_contexts,
2435 destroyed_link) {
2436 list_del_init(&ce->destroyed_link);
2437 __release_guc_id(guc, ce);
2438 __guc_context_destroy(ce);
2439 }
2440 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2441 }
2442
deregister_destroyed_contexts(struct intel_guc * guc)2443 static void deregister_destroyed_contexts(struct intel_guc *guc)
2444 {
2445 struct intel_context *ce, *cn;
2446 unsigned long flags;
2447
2448 spin_lock_irqsave(&guc->submission_state.lock, flags);
2449 list_for_each_entry_safe(ce, cn,
2450 &guc->submission_state.destroyed_contexts,
2451 destroyed_link) {
2452 list_del_init(&ce->destroyed_link);
2453 guc_lrc_desc_unpin(ce);
2454 }
2455 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2456 }
2457
destroyed_worker_func(struct work_struct * w)2458 static void destroyed_worker_func(struct work_struct *w)
2459 {
2460 struct intel_guc *guc = container_of(w, struct intel_guc,
2461 submission_state.destroyed_worker);
2462 struct intel_gt *gt = guc_to_gt(guc);
2463 int tmp;
2464
2465 with_intel_gt_pm(gt, tmp)
2466 deregister_destroyed_contexts(guc);
2467 }
2468
guc_context_destroy(struct kref * kref)2469 static void guc_context_destroy(struct kref *kref)
2470 {
2471 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
2472 struct intel_guc *guc = ce_to_guc(ce);
2473 unsigned long flags;
2474 bool destroy;
2475
2476 /*
2477 * If the guc_id is invalid this context has been stolen and we can free
2478 * it immediately. Also can be freed immediately if the context is not
2479 * registered with the GuC or the GuC is in the middle of a reset.
2480 */
2481 spin_lock_irqsave(&guc->submission_state.lock, flags);
2482 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
2483 !lrc_desc_registered(guc, ce->guc_id.id);
2484 if (likely(!destroy)) {
2485 if (!list_empty(&ce->guc_id.link))
2486 list_del_init(&ce->guc_id.link);
2487 list_add_tail(&ce->destroyed_link,
2488 &guc->submission_state.destroyed_contexts);
2489 } else {
2490 __release_guc_id(guc, ce);
2491 }
2492 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2493 if (unlikely(destroy)) {
2494 __guc_context_destroy(ce);
2495 return;
2496 }
2497
2498 /*
2499 * We use a worker to issue the H2G to deregister the context as we can
2500 * take the GT PM for the first time which isn't allowed from an atomic
2501 * context.
2502 */
2503 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker);
2504 }
2505
guc_context_alloc(struct intel_context * ce)2506 static int guc_context_alloc(struct intel_context *ce)
2507 {
2508 return lrc_alloc(ce, ce->engine);
2509 }
2510
guc_context_set_prio(struct intel_guc * guc,struct intel_context * ce,u8 prio)2511 static void guc_context_set_prio(struct intel_guc *guc,
2512 struct intel_context *ce,
2513 u8 prio)
2514 {
2515 u32 action[] = {
2516 INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY,
2517 ce->guc_id.id,
2518 prio,
2519 };
2520
2521 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||
2522 prio > GUC_CLIENT_PRIORITY_NORMAL);
2523 lockdep_assert_held(&ce->guc_state.lock);
2524
2525 if (ce->guc_state.prio == prio || submission_disabled(guc) ||
2526 !context_registered(ce)) {
2527 ce->guc_state.prio = prio;
2528 return;
2529 }
2530
2531 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
2532
2533 ce->guc_state.prio = prio;
2534 trace_intel_context_set_prio(ce);
2535 }
2536
map_i915_prio_to_guc_prio(int prio)2537 static inline u8 map_i915_prio_to_guc_prio(int prio)
2538 {
2539 if (prio == I915_PRIORITY_NORMAL)
2540 return GUC_CLIENT_PRIORITY_KMD_NORMAL;
2541 else if (prio < I915_PRIORITY_NORMAL)
2542 return GUC_CLIENT_PRIORITY_NORMAL;
2543 else if (prio < I915_PRIORITY_DISPLAY)
2544 return GUC_CLIENT_PRIORITY_HIGH;
2545 else
2546 return GUC_CLIENT_PRIORITY_KMD_HIGH;
2547 }
2548
add_context_inflight_prio(struct intel_context * ce,u8 guc_prio)2549 static inline void add_context_inflight_prio(struct intel_context *ce,
2550 u8 guc_prio)
2551 {
2552 lockdep_assert_held(&ce->guc_state.lock);
2553 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
2554
2555 ++ce->guc_state.prio_count[guc_prio];
2556
2557 /* Overflow protection */
2558 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
2559 }
2560
sub_context_inflight_prio(struct intel_context * ce,u8 guc_prio)2561 static inline void sub_context_inflight_prio(struct intel_context *ce,
2562 u8 guc_prio)
2563 {
2564 lockdep_assert_held(&ce->guc_state.lock);
2565 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
2566
2567 /* Underflow protection */
2568 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
2569
2570 --ce->guc_state.prio_count[guc_prio];
2571 }
2572
update_context_prio(struct intel_context * ce)2573 static inline void update_context_prio(struct intel_context *ce)
2574 {
2575 struct intel_guc *guc = &ce->engine->gt->uc.guc;
2576 int i;
2577
2578 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0);
2579 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL);
2580
2581 lockdep_assert_held(&ce->guc_state.lock);
2582
2583 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) {
2584 if (ce->guc_state.prio_count[i]) {
2585 guc_context_set_prio(guc, ce, i);
2586 break;
2587 }
2588 }
2589 }
2590
new_guc_prio_higher(u8 old_guc_prio,u8 new_guc_prio)2591 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio)
2592 {
2593 /* Lower value is higher priority */
2594 return new_guc_prio < old_guc_prio;
2595 }
2596
add_to_context(struct i915_request * rq)2597 static void add_to_context(struct i915_request *rq)
2598 {
2599 struct intel_context *ce = request_to_scheduling_context(rq);
2600 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq));
2601
2602 GEM_BUG_ON(intel_context_is_child(ce));
2603 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI);
2604
2605 spin_lock(&ce->guc_state.lock);
2606 list_move_tail(&rq->sched.link, &ce->guc_state.requests);
2607
2608 if (rq->guc_prio == GUC_PRIO_INIT) {
2609 rq->guc_prio = new_guc_prio;
2610 add_context_inflight_prio(ce, rq->guc_prio);
2611 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) {
2612 sub_context_inflight_prio(ce, rq->guc_prio);
2613 rq->guc_prio = new_guc_prio;
2614 add_context_inflight_prio(ce, rq->guc_prio);
2615 }
2616 update_context_prio(ce);
2617
2618 spin_unlock(&ce->guc_state.lock);
2619 }
2620
guc_prio_fini(struct i915_request * rq,struct intel_context * ce)2621 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
2622 {
2623 lockdep_assert_held(&ce->guc_state.lock);
2624
2625 if (rq->guc_prio != GUC_PRIO_INIT &&
2626 rq->guc_prio != GUC_PRIO_FINI) {
2627 sub_context_inflight_prio(ce, rq->guc_prio);
2628 update_context_prio(ce);
2629 }
2630 rq->guc_prio = GUC_PRIO_FINI;
2631 }
2632
remove_from_context(struct i915_request * rq)2633 static void remove_from_context(struct i915_request *rq)
2634 {
2635 struct intel_context *ce = request_to_scheduling_context(rq);
2636
2637 GEM_BUG_ON(intel_context_is_child(ce));
2638
2639 spin_lock_irq(&ce->guc_state.lock);
2640
2641 list_del_init(&rq->sched.link);
2642 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2643
2644 /* Prevent further __await_execution() registering a cb, then flush */
2645 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
2646
2647 guc_prio_fini(rq, ce);
2648
2649 decr_context_committed_requests(ce);
2650
2651 spin_unlock_irq(&ce->guc_state.lock);
2652
2653 atomic_dec(&ce->guc_id.ref);
2654 i915_request_notify_execute_cb_imm(rq);
2655 }
2656
2657 static const struct intel_context_ops guc_context_ops = {
2658 .alloc = guc_context_alloc,
2659
2660 .pre_pin = guc_context_pre_pin,
2661 .pin = guc_context_pin,
2662 .unpin = guc_context_unpin,
2663 .post_unpin = guc_context_post_unpin,
2664
2665 .ban = guc_context_ban,
2666
2667 .cancel_request = guc_context_cancel_request,
2668
2669 .enter = intel_context_enter_engine,
2670 .exit = intel_context_exit_engine,
2671
2672 .sched_disable = guc_context_sched_disable,
2673
2674 .reset = lrc_reset,
2675 .destroy = guc_context_destroy,
2676
2677 .create_virtual = guc_create_virtual,
2678 .create_parallel = guc_create_parallel,
2679 };
2680
submit_work_cb(struct irq_work * wrk)2681 static void submit_work_cb(struct irq_work *wrk)
2682 {
2683 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work);
2684
2685 might_lock(&rq->engine->sched_engine->lock);
2686 i915_sw_fence_complete(&rq->submit);
2687 }
2688
__guc_signal_context_fence(struct intel_context * ce)2689 static void __guc_signal_context_fence(struct intel_context *ce)
2690 {
2691 struct i915_request *rq, *rn;
2692
2693 lockdep_assert_held(&ce->guc_state.lock);
2694
2695 if (!list_empty(&ce->guc_state.fences))
2696 trace_intel_context_fence_release(ce);
2697
2698 /*
2699 * Use an IRQ to ensure locking order of sched_engine->lock ->
2700 * ce->guc_state.lock is preserved.
2701 */
2702 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences,
2703 guc_fence_link) {
2704 list_del(&rq->guc_fence_link);
2705 irq_work_queue(&rq->submit_work);
2706 }
2707
2708 INIT_LIST_HEAD(&ce->guc_state.fences);
2709 }
2710
guc_signal_context_fence(struct intel_context * ce)2711 static void guc_signal_context_fence(struct intel_context *ce)
2712 {
2713 unsigned long flags;
2714
2715 GEM_BUG_ON(intel_context_is_child(ce));
2716
2717 spin_lock_irqsave(&ce->guc_state.lock, flags);
2718 clr_context_wait_for_deregister_to_register(ce);
2719 __guc_signal_context_fence(ce);
2720 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2721 }
2722
context_needs_register(struct intel_context * ce,bool new_guc_id)2723 static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
2724 {
2725 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
2726 !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)) &&
2727 !submission_disabled(ce_to_guc(ce));
2728 }
2729
guc_context_init(struct intel_context * ce)2730 static void guc_context_init(struct intel_context *ce)
2731 {
2732 const struct i915_gem_context *ctx;
2733 int prio = I915_CONTEXT_DEFAULT_PRIORITY;
2734
2735 rcu_read_lock();
2736 ctx = rcu_dereference(ce->gem_context);
2737 if (ctx)
2738 prio = ctx->sched.priority;
2739 rcu_read_unlock();
2740
2741 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
2742 set_bit(CONTEXT_GUC_INIT, &ce->flags);
2743 }
2744
guc_request_alloc(struct i915_request * rq)2745 static int guc_request_alloc(struct i915_request *rq)
2746 {
2747 struct intel_context *ce = request_to_scheduling_context(rq);
2748 struct intel_guc *guc = ce_to_guc(ce);
2749 unsigned long flags;
2750 int ret;
2751
2752 GEM_BUG_ON(!intel_context_is_pinned(rq->context));
2753
2754 /*
2755 * Flush enough space to reduce the likelihood of waiting after
2756 * we start building the request - in which case we will just
2757 * have to repeat work.
2758 */
2759 rq->reserved_space += GUC_REQUEST_SIZE;
2760
2761 /*
2762 * Note that after this point, we have committed to using
2763 * this request as it is being used to both track the
2764 * state of engine initialisation and liveness of the
2765 * golden renderstate above. Think twice before you try
2766 * to cancel/unwind this request now.
2767 */
2768
2769 /* Unconditionally invalidate GPU caches and TLBs. */
2770 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
2771 if (ret)
2772 return ret;
2773
2774 rq->reserved_space -= GUC_REQUEST_SIZE;
2775
2776 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
2777 guc_context_init(ce);
2778
2779 /*
2780 * Call pin_guc_id here rather than in the pinning step as with
2781 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
2782 * guc_id and creating horrible race conditions. This is especially bad
2783 * when guc_id are being stolen due to over subscription. By the time
2784 * this function is reached, it is guaranteed that the guc_id will be
2785 * persistent until the generated request is retired. Thus, sealing these
2786 * race conditions. It is still safe to fail here if guc_id are
2787 * exhausted and return -EAGAIN to the user indicating that they can try
2788 * again in the future.
2789 *
2790 * There is no need for a lock here as the timeline mutex ensures at
2791 * most one context can be executing this code path at once. The
2792 * guc_id_ref is incremented once for every request in flight and
2793 * decremented on each retire. When it is zero, a lock around the
2794 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
2795 */
2796 if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
2797 goto out;
2798
2799 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */
2800 if (unlikely(ret < 0))
2801 return ret;
2802 if (context_needs_register(ce, !!ret)) {
2803 ret = guc_lrc_desc_pin(ce, true);
2804 if (unlikely(ret)) { /* unwind */
2805 if (ret == -EPIPE) {
2806 disable_submission(guc);
2807 goto out; /* GPU will be reset */
2808 }
2809 atomic_dec(&ce->guc_id.ref);
2810 unpin_guc_id(guc, ce);
2811 return ret;
2812 }
2813 }
2814
2815 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
2816
2817 out:
2818 /*
2819 * We block all requests on this context if a G2H is pending for a
2820 * schedule disable or context deregistration as the GuC will fail a
2821 * schedule enable or context registration if either G2H is pending
2822 * respectfully. Once a G2H returns, the fence is released that is
2823 * blocking these requests (see guc_signal_context_fence).
2824 */
2825 spin_lock_irqsave(&ce->guc_state.lock, flags);
2826 if (context_wait_for_deregister_to_register(ce) ||
2827 context_pending_disable(ce)) {
2828 init_irq_work(&rq->submit_work, submit_work_cb);
2829 i915_sw_fence_await(&rq->submit);
2830
2831 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
2832 }
2833 incr_context_committed_requests(ce);
2834 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2835
2836 return 0;
2837 }
2838
guc_virtual_context_pre_pin(struct intel_context * ce,struct i915_gem_ww_ctx * ww,void ** vaddr)2839 static int guc_virtual_context_pre_pin(struct intel_context *ce,
2840 struct i915_gem_ww_ctx *ww,
2841 void **vaddr)
2842 {
2843 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
2844
2845 return __guc_context_pre_pin(ce, engine, ww, vaddr);
2846 }
2847
guc_virtual_context_pin(struct intel_context * ce,void * vaddr)2848 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr)
2849 {
2850 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
2851 int ret = __guc_context_pin(ce, engine, vaddr);
2852 intel_engine_mask_t tmp, mask = ce->engine->mask;
2853
2854 if (likely(!ret))
2855 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
2856 intel_engine_pm_get(engine);
2857
2858 return ret;
2859 }
2860
guc_virtual_context_unpin(struct intel_context * ce)2861 static void guc_virtual_context_unpin(struct intel_context *ce)
2862 {
2863 intel_engine_mask_t tmp, mask = ce->engine->mask;
2864 struct intel_engine_cs *engine;
2865 struct intel_guc *guc = ce_to_guc(ce);
2866
2867 GEM_BUG_ON(context_enabled(ce));
2868 GEM_BUG_ON(intel_context_is_barrier(ce));
2869
2870 unpin_guc_id(guc, ce);
2871 lrc_unpin(ce);
2872
2873 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
2874 intel_engine_pm_put_async(engine);
2875 }
2876
guc_virtual_context_enter(struct intel_context * ce)2877 static void guc_virtual_context_enter(struct intel_context *ce)
2878 {
2879 intel_engine_mask_t tmp, mask = ce->engine->mask;
2880 struct intel_engine_cs *engine;
2881
2882 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
2883 intel_engine_pm_get(engine);
2884
2885 intel_timeline_enter(ce->timeline);
2886 }
2887
guc_virtual_context_exit(struct intel_context * ce)2888 static void guc_virtual_context_exit(struct intel_context *ce)
2889 {
2890 intel_engine_mask_t tmp, mask = ce->engine->mask;
2891 struct intel_engine_cs *engine;
2892
2893 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
2894 intel_engine_pm_put(engine);
2895
2896 intel_timeline_exit(ce->timeline);
2897 }
2898
guc_virtual_context_alloc(struct intel_context * ce)2899 static int guc_virtual_context_alloc(struct intel_context *ce)
2900 {
2901 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
2902
2903 return lrc_alloc(ce, engine);
2904 }
2905
2906 static const struct intel_context_ops virtual_guc_context_ops = {
2907 .alloc = guc_virtual_context_alloc,
2908
2909 .pre_pin = guc_virtual_context_pre_pin,
2910 .pin = guc_virtual_context_pin,
2911 .unpin = guc_virtual_context_unpin,
2912 .post_unpin = guc_context_post_unpin,
2913
2914 .ban = guc_context_ban,
2915
2916 .cancel_request = guc_context_cancel_request,
2917
2918 .enter = guc_virtual_context_enter,
2919 .exit = guc_virtual_context_exit,
2920
2921 .sched_disable = guc_context_sched_disable,
2922
2923 .destroy = guc_context_destroy,
2924
2925 .get_sibling = guc_virtual_get_sibling,
2926 };
2927
guc_parent_context_pin(struct intel_context * ce,void * vaddr)2928 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr)
2929 {
2930 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
2931 struct intel_guc *guc = ce_to_guc(ce);
2932 int ret;
2933
2934 GEM_BUG_ON(!intel_context_is_parent(ce));
2935 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
2936
2937 ret = pin_guc_id(guc, ce);
2938 if (unlikely(ret < 0))
2939 return ret;
2940
2941 return __guc_context_pin(ce, engine, vaddr);
2942 }
2943
guc_child_context_pin(struct intel_context * ce,void * vaddr)2944 static int guc_child_context_pin(struct intel_context *ce, void *vaddr)
2945 {
2946 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
2947
2948 GEM_BUG_ON(!intel_context_is_child(ce));
2949 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
2950
2951 __intel_context_pin(ce->parallel.parent);
2952 return __guc_context_pin(ce, engine, vaddr);
2953 }
2954
guc_parent_context_unpin(struct intel_context * ce)2955 static void guc_parent_context_unpin(struct intel_context *ce)
2956 {
2957 struct intel_guc *guc = ce_to_guc(ce);
2958
2959 GEM_BUG_ON(context_enabled(ce));
2960 GEM_BUG_ON(intel_context_is_barrier(ce));
2961 GEM_BUG_ON(!intel_context_is_parent(ce));
2962 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
2963
2964 if (ce->parallel.last_rq)
2965 i915_request_put(ce->parallel.last_rq);
2966 unpin_guc_id(guc, ce);
2967 lrc_unpin(ce);
2968 }
2969
guc_child_context_unpin(struct intel_context * ce)2970 static void guc_child_context_unpin(struct intel_context *ce)
2971 {
2972 GEM_BUG_ON(context_enabled(ce));
2973 GEM_BUG_ON(intel_context_is_barrier(ce));
2974 GEM_BUG_ON(!intel_context_is_child(ce));
2975 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
2976
2977 lrc_unpin(ce);
2978 }
2979
guc_child_context_post_unpin(struct intel_context * ce)2980 static void guc_child_context_post_unpin(struct intel_context *ce)
2981 {
2982 GEM_BUG_ON(!intel_context_is_child(ce));
2983 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
2984 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
2985
2986 lrc_post_unpin(ce);
2987 intel_context_unpin(ce->parallel.parent);
2988 }
2989
guc_child_context_destroy(struct kref * kref)2990 static void guc_child_context_destroy(struct kref *kref)
2991 {
2992 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
2993
2994 __guc_context_destroy(ce);
2995 }
2996
2997 static const struct intel_context_ops virtual_parent_context_ops = {
2998 .alloc = guc_virtual_context_alloc,
2999
3000 .pre_pin = guc_context_pre_pin,
3001 .pin = guc_parent_context_pin,
3002 .unpin = guc_parent_context_unpin,
3003 .post_unpin = guc_context_post_unpin,
3004
3005 .ban = guc_context_ban,
3006
3007 .cancel_request = guc_context_cancel_request,
3008
3009 .enter = guc_virtual_context_enter,
3010 .exit = guc_virtual_context_exit,
3011
3012 .sched_disable = guc_context_sched_disable,
3013
3014 .destroy = guc_context_destroy,
3015
3016 .get_sibling = guc_virtual_get_sibling,
3017 };
3018
3019 static const struct intel_context_ops virtual_child_context_ops = {
3020 .alloc = guc_virtual_context_alloc,
3021
3022 .pre_pin = guc_context_pre_pin,
3023 .pin = guc_child_context_pin,
3024 .unpin = guc_child_context_unpin,
3025 .post_unpin = guc_child_context_post_unpin,
3026
3027 .cancel_request = guc_context_cancel_request,
3028
3029 .enter = guc_virtual_context_enter,
3030 .exit = guc_virtual_context_exit,
3031
3032 .destroy = guc_child_context_destroy,
3033
3034 .get_sibling = guc_virtual_get_sibling,
3035 };
3036
3037 /*
3038 * The below override of the breadcrumbs is enabled when the user configures a
3039 * context for parallel submission (multi-lrc, parent-child).
3040 *
3041 * The overridden breadcrumbs implements an algorithm which allows the GuC to
3042 * safely preempt all the hw contexts configured for parallel submission
3043 * between each BB. The contract between the i915 and GuC is if the parent
3044 * context can be preempted, all the children can be preempted, and the GuC will
3045 * always try to preempt the parent before the children. A handshake between the
3046 * parent / children breadcrumbs ensures the i915 holds up its end of the deal
3047 * creating a window to preempt between each set of BBs.
3048 */
3049 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
3050 u64 offset, u32 len,
3051 const unsigned int flags);
3052 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
3053 u64 offset, u32 len,
3054 const unsigned int flags);
3055 static u32 *
3056 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
3057 u32 *cs);
3058 static u32 *
3059 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
3060 u32 *cs);
3061
3062 static struct intel_context *
guc_create_parallel(struct intel_engine_cs ** engines,unsigned int num_siblings,unsigned int width)3063 guc_create_parallel(struct intel_engine_cs **engines,
3064 unsigned int num_siblings,
3065 unsigned int width)
3066 {
3067 struct intel_engine_cs **siblings = NULL;
3068 struct intel_context *parent = NULL, *ce, *err;
3069 int i, j;
3070
3071 siblings = kmalloc_array(num_siblings,
3072 sizeof(*siblings),
3073 GFP_KERNEL);
3074 if (!siblings)
3075 return ERR_PTR(-ENOMEM);
3076
3077 for (i = 0; i < width; ++i) {
3078 for (j = 0; j < num_siblings; ++j)
3079 siblings[j] = engines[i * num_siblings + j];
3080
3081 ce = intel_engine_create_virtual(siblings, num_siblings,
3082 FORCE_VIRTUAL);
3083 if (IS_ERR(ce)) {
3084 err = ERR_CAST(ce);
3085 goto unwind;
3086 }
3087
3088 if (i == 0) {
3089 parent = ce;
3090 parent->ops = &virtual_parent_context_ops;
3091 } else {
3092 ce->ops = &virtual_child_context_ops;
3093 intel_context_bind_parent_child(parent, ce);
3094 }
3095 }
3096
3097 parent->parallel.fence_context = dma_fence_context_alloc(1);
3098
3099 parent->engine->emit_bb_start =
3100 emit_bb_start_parent_no_preempt_mid_batch;
3101 parent->engine->emit_fini_breadcrumb =
3102 emit_fini_breadcrumb_parent_no_preempt_mid_batch;
3103 parent->engine->emit_fini_breadcrumb_dw =
3104 12 + 4 * parent->parallel.number_children;
3105 for_each_child(parent, ce) {
3106 ce->engine->emit_bb_start =
3107 emit_bb_start_child_no_preempt_mid_batch;
3108 ce->engine->emit_fini_breadcrumb =
3109 emit_fini_breadcrumb_child_no_preempt_mid_batch;
3110 ce->engine->emit_fini_breadcrumb_dw = 16;
3111 }
3112
3113 kfree(siblings);
3114 return parent;
3115
3116 unwind:
3117 if (parent)
3118 intel_context_put(parent);
3119 kfree(siblings);
3120 return err;
3121 }
3122
3123 static bool
guc_irq_enable_breadcrumbs(struct intel_breadcrumbs * b)3124 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b)
3125 {
3126 struct intel_engine_cs *sibling;
3127 intel_engine_mask_t tmp, mask = b->engine_mask;
3128 bool result = false;
3129
3130 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
3131 result |= intel_engine_irq_enable(sibling);
3132
3133 return result;
3134 }
3135
3136 static void
guc_irq_disable_breadcrumbs(struct intel_breadcrumbs * b)3137 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b)
3138 {
3139 struct intel_engine_cs *sibling;
3140 intel_engine_mask_t tmp, mask = b->engine_mask;
3141
3142 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
3143 intel_engine_irq_disable(sibling);
3144 }
3145
guc_init_breadcrumbs(struct intel_engine_cs * engine)3146 static void guc_init_breadcrumbs(struct intel_engine_cs *engine)
3147 {
3148 int i;
3149
3150 /*
3151 * In GuC submission mode we do not know which physical engine a request
3152 * will be scheduled on, this creates a problem because the breadcrumb
3153 * interrupt is per physical engine. To work around this we attach
3154 * requests and direct all breadcrumb interrupts to the first instance
3155 * of an engine per class. In addition all breadcrumb interrupts are
3156 * enabled / disabled across an engine class in unison.
3157 */
3158 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) {
3159 struct intel_engine_cs *sibling =
3160 engine->gt->engine_class[engine->class][i];
3161
3162 if (sibling) {
3163 if (engine->breadcrumbs != sibling->breadcrumbs) {
3164 intel_breadcrumbs_put(engine->breadcrumbs);
3165 engine->breadcrumbs =
3166 intel_breadcrumbs_get(sibling->breadcrumbs);
3167 }
3168 break;
3169 }
3170 }
3171
3172 if (engine->breadcrumbs) {
3173 engine->breadcrumbs->engine_mask |= engine->mask;
3174 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs;
3175 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs;
3176 }
3177 }
3178
guc_bump_inflight_request_prio(struct i915_request * rq,int prio)3179 static void guc_bump_inflight_request_prio(struct i915_request *rq,
3180 int prio)
3181 {
3182 struct intel_context *ce = request_to_scheduling_context(rq);
3183 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
3184
3185 /* Short circuit function */
3186 if (prio < I915_PRIORITY_NORMAL ||
3187 rq->guc_prio == GUC_PRIO_FINI ||
3188 (rq->guc_prio != GUC_PRIO_INIT &&
3189 !new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
3190 return;
3191
3192 spin_lock(&ce->guc_state.lock);
3193 if (rq->guc_prio != GUC_PRIO_FINI) {
3194 if (rq->guc_prio != GUC_PRIO_INIT)
3195 sub_context_inflight_prio(ce, rq->guc_prio);
3196 rq->guc_prio = new_guc_prio;
3197 add_context_inflight_prio(ce, rq->guc_prio);
3198 update_context_prio(ce);
3199 }
3200 spin_unlock(&ce->guc_state.lock);
3201 }
3202
guc_retire_inflight_request_prio(struct i915_request * rq)3203 static void guc_retire_inflight_request_prio(struct i915_request *rq)
3204 {
3205 struct intel_context *ce = request_to_scheduling_context(rq);
3206
3207 spin_lock(&ce->guc_state.lock);
3208 guc_prio_fini(rq, ce);
3209 spin_unlock(&ce->guc_state.lock);
3210 }
3211
sanitize_hwsp(struct intel_engine_cs * engine)3212 static void sanitize_hwsp(struct intel_engine_cs *engine)
3213 {
3214 struct intel_timeline *tl;
3215
3216 list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
3217 intel_timeline_reset_seqno(tl);
3218 }
3219
guc_sanitize(struct intel_engine_cs * engine)3220 static void guc_sanitize(struct intel_engine_cs *engine)
3221 {
3222 /*
3223 * Poison residual state on resume, in case the suspend didn't!
3224 *
3225 * We have to assume that across suspend/resume (or other loss
3226 * of control) that the contents of our pinned buffers has been
3227 * lost, replaced by garbage. Since this doesn't always happen,
3228 * let's poison such state so that we more quickly spot when
3229 * we falsely assume it has been preserved.
3230 */
3231 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3232 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
3233
3234 /*
3235 * The kernel_context HWSP is stored in the status_page. As above,
3236 * that may be lost on resume/initialisation, and so we need to
3237 * reset the value in the HWSP.
3238 */
3239 sanitize_hwsp(engine);
3240
3241 /* And scrub the dirty cachelines for the HWSP */
3242 clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
3243
3244 intel_engine_reset_pinned_contexts(engine);
3245 }
3246
setup_hwsp(struct intel_engine_cs * engine)3247 static void setup_hwsp(struct intel_engine_cs *engine)
3248 {
3249 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
3250
3251 ENGINE_WRITE_FW(engine,
3252 RING_HWS_PGA,
3253 i915_ggtt_offset(engine->status_page.vma));
3254 }
3255
start_engine(struct intel_engine_cs * engine)3256 static void start_engine(struct intel_engine_cs *engine)
3257 {
3258 ENGINE_WRITE_FW(engine,
3259 RING_MODE_GEN7,
3260 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
3261
3262 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
3263 ENGINE_POSTING_READ(engine, RING_MI_MODE);
3264 }
3265
guc_resume(struct intel_engine_cs * engine)3266 static int guc_resume(struct intel_engine_cs *engine)
3267 {
3268 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
3269
3270 intel_mocs_init_engine(engine);
3271
3272 intel_breadcrumbs_reset(engine->breadcrumbs);
3273
3274 setup_hwsp(engine);
3275 start_engine(engine);
3276
3277 return 0;
3278 }
3279
guc_sched_engine_disabled(struct i915_sched_engine * sched_engine)3280 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine)
3281 {
3282 return !sched_engine->tasklet.callback;
3283 }
3284
guc_set_default_submission(struct intel_engine_cs * engine)3285 static void guc_set_default_submission(struct intel_engine_cs *engine)
3286 {
3287 engine->submit_request = guc_submit_request;
3288 }
3289
guc_kernel_context_pin(struct intel_guc * guc,struct intel_context * ce)3290 static inline void guc_kernel_context_pin(struct intel_guc *guc,
3291 struct intel_context *ce)
3292 {
3293 if (context_guc_id_invalid(ce))
3294 pin_guc_id(guc, ce);
3295 guc_lrc_desc_pin(ce, true);
3296 }
3297
guc_init_lrc_mapping(struct intel_guc * guc)3298 static inline void guc_init_lrc_mapping(struct intel_guc *guc)
3299 {
3300 struct intel_gt *gt = guc_to_gt(guc);
3301 struct intel_engine_cs *engine;
3302 enum intel_engine_id id;
3303
3304 /* make sure all descriptors are clean... */
3305 xa_destroy(&guc->context_lookup);
3306
3307 /*
3308 * Some contexts might have been pinned before we enabled GuC
3309 * submission, so we need to add them to the GuC bookeeping.
3310 * Also, after a reset the of the GuC we want to make sure that the
3311 * information shared with GuC is properly reset. The kernel LRCs are
3312 * not attached to the gem_context, so they need to be added separately.
3313 *
3314 * Note: we purposefully do not check the return of guc_lrc_desc_pin,
3315 * because that function can only fail if a reset is just starting. This
3316 * is at the end of reset so presumably another reset isn't happening
3317 * and even it did this code would be run again.
3318 */
3319
3320 for_each_engine(engine, gt, id) {
3321 struct intel_context *ce;
3322
3323 list_for_each_entry(ce, &engine->pinned_contexts_list,
3324 pinned_contexts_link)
3325 guc_kernel_context_pin(guc, ce);
3326 }
3327 }
3328
guc_release(struct intel_engine_cs * engine)3329 static void guc_release(struct intel_engine_cs *engine)
3330 {
3331 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
3332
3333 intel_engine_cleanup_common(engine);
3334 lrc_fini_wa_ctx(engine);
3335 }
3336
virtual_guc_bump_serial(struct intel_engine_cs * engine)3337 static void virtual_guc_bump_serial(struct intel_engine_cs *engine)
3338 {
3339 struct intel_engine_cs *e;
3340 intel_engine_mask_t tmp, mask = engine->mask;
3341
3342 for_each_engine_masked(e, engine->gt, mask, tmp)
3343 e->serial++;
3344 }
3345
guc_default_vfuncs(struct intel_engine_cs * engine)3346 static void guc_default_vfuncs(struct intel_engine_cs *engine)
3347 {
3348 /* Default vfuncs which can be overridden by each engine. */
3349
3350 engine->resume = guc_resume;
3351
3352 engine->cops = &guc_context_ops;
3353 engine->request_alloc = guc_request_alloc;
3354 engine->add_active_request = add_to_context;
3355 engine->remove_active_request = remove_from_context;
3356
3357 engine->sched_engine->schedule = i915_schedule;
3358
3359 engine->reset.prepare = guc_reset_nop;
3360 engine->reset.rewind = guc_rewind_nop;
3361 engine->reset.cancel = guc_reset_nop;
3362 engine->reset.finish = guc_reset_nop;
3363
3364 engine->emit_flush = gen8_emit_flush_xcs;
3365 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
3366 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
3367 if (GRAPHICS_VER(engine->i915) >= 12) {
3368 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
3369 engine->emit_flush = gen12_emit_flush_xcs;
3370 }
3371 engine->set_default_submission = guc_set_default_submission;
3372
3373 engine->flags |= I915_ENGINE_HAS_PREEMPTION;
3374 engine->flags |= I915_ENGINE_HAS_TIMESLICES;
3375
3376 /*
3377 * TODO: GuC supports timeslicing and semaphores as well, but they're
3378 * handled by the firmware so some minor tweaks are required before
3379 * enabling.
3380 *
3381 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
3382 */
3383
3384 engine->emit_bb_start = gen8_emit_bb_start;
3385 }
3386
rcs_submission_override(struct intel_engine_cs * engine)3387 static void rcs_submission_override(struct intel_engine_cs *engine)
3388 {
3389 switch (GRAPHICS_VER(engine->i915)) {
3390 case 12:
3391 engine->emit_flush = gen12_emit_flush_rcs;
3392 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
3393 break;
3394 case 11:
3395 engine->emit_flush = gen11_emit_flush_rcs;
3396 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
3397 break;
3398 default:
3399 engine->emit_flush = gen8_emit_flush_rcs;
3400 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
3401 break;
3402 }
3403 }
3404
guc_default_irqs(struct intel_engine_cs * engine)3405 static inline void guc_default_irqs(struct intel_engine_cs *engine)
3406 {
3407 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
3408 intel_engine_set_irq_handler(engine, cs_irq_handler);
3409 }
3410
guc_sched_engine_destroy(struct kref * kref)3411 static void guc_sched_engine_destroy(struct kref *kref)
3412 {
3413 struct i915_sched_engine *sched_engine =
3414 container_of(kref, typeof(*sched_engine), ref);
3415 struct intel_guc *guc = sched_engine->private_data;
3416
3417 guc->sched_engine = NULL;
3418 tasklet_kill(&sched_engine->tasklet); /* flush the callback */
3419 kfree(sched_engine);
3420 }
3421
intel_guc_submission_setup(struct intel_engine_cs * engine)3422 int intel_guc_submission_setup(struct intel_engine_cs *engine)
3423 {
3424 struct drm_i915_private *i915 = engine->i915;
3425 struct intel_guc *guc = &engine->gt->uc.guc;
3426
3427 /*
3428 * The setup relies on several assumptions (e.g. irqs always enabled)
3429 * that are only valid on gen11+
3430 */
3431 GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
3432
3433 if (!guc->sched_engine) {
3434 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL);
3435 if (!guc->sched_engine)
3436 return -ENOMEM;
3437
3438 guc->sched_engine->schedule = i915_schedule;
3439 guc->sched_engine->disabled = guc_sched_engine_disabled;
3440 guc->sched_engine->private_data = guc;
3441 guc->sched_engine->destroy = guc_sched_engine_destroy;
3442 guc->sched_engine->bump_inflight_request_prio =
3443 guc_bump_inflight_request_prio;
3444 guc->sched_engine->retire_inflight_request_prio =
3445 guc_retire_inflight_request_prio;
3446 tasklet_setup(&guc->sched_engine->tasklet,
3447 guc_submission_tasklet);
3448 }
3449 i915_sched_engine_put(engine->sched_engine);
3450 engine->sched_engine = i915_sched_engine_get(guc->sched_engine);
3451
3452 guc_default_vfuncs(engine);
3453 guc_default_irqs(engine);
3454 guc_init_breadcrumbs(engine);
3455
3456 if (engine->class == RENDER_CLASS)
3457 rcs_submission_override(engine);
3458
3459 lrc_init_wa_ctx(engine);
3460
3461 /* Finally, take ownership and responsibility for cleanup! */
3462 engine->sanitize = guc_sanitize;
3463 engine->release = guc_release;
3464
3465 return 0;
3466 }
3467
intel_guc_submission_enable(struct intel_guc * guc)3468 void intel_guc_submission_enable(struct intel_guc *guc)
3469 {
3470 guc_init_lrc_mapping(guc);
3471 }
3472
intel_guc_submission_disable(struct intel_guc * guc)3473 void intel_guc_submission_disable(struct intel_guc *guc)
3474 {
3475 /* Note: By the time we're here, GuC may have already been reset */
3476 }
3477
__guc_submission_supported(struct intel_guc * guc)3478 static bool __guc_submission_supported(struct intel_guc *guc)
3479 {
3480 /* GuC submission is unavailable for pre-Gen11 */
3481 return intel_guc_is_supported(guc) &&
3482 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
3483 }
3484
__guc_submission_selected(struct intel_guc * guc)3485 static bool __guc_submission_selected(struct intel_guc *guc)
3486 {
3487 struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
3488
3489 if (!intel_guc_submission_is_supported(guc))
3490 return false;
3491
3492 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
3493 }
3494
intel_guc_submission_init_early(struct intel_guc * guc)3495 void intel_guc_submission_init_early(struct intel_guc *guc)
3496 {
3497 guc->submission_supported = __guc_submission_supported(guc);
3498 guc->submission_selected = __guc_submission_selected(guc);
3499 }
3500
3501 static inline struct intel_context *
g2h_context_lookup(struct intel_guc * guc,u32 desc_idx)3502 g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
3503 {
3504 struct intel_context *ce;
3505
3506 if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) {
3507 drm_err(&guc_to_gt(guc)->i915->drm,
3508 "Invalid desc_idx %u", desc_idx);
3509 return NULL;
3510 }
3511
3512 ce = __get_context(guc, desc_idx);
3513 if (unlikely(!ce)) {
3514 drm_err(&guc_to_gt(guc)->i915->drm,
3515 "Context is NULL, desc_idx %u", desc_idx);
3516 return NULL;
3517 }
3518
3519 if (unlikely(intel_context_is_child(ce))) {
3520 drm_err(&guc_to_gt(guc)->i915->drm,
3521 "Context is child, desc_idx %u", desc_idx);
3522 return NULL;
3523 }
3524
3525 return ce;
3526 }
3527
intel_guc_deregister_done_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)3528 int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
3529 const u32 *msg,
3530 u32 len)
3531 {
3532 struct intel_context *ce;
3533 u32 desc_idx = msg[0];
3534
3535 if (unlikely(len < 1)) {
3536 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
3537 return -EPROTO;
3538 }
3539
3540 ce = g2h_context_lookup(guc, desc_idx);
3541 if (unlikely(!ce))
3542 return -EPROTO;
3543
3544 trace_intel_context_deregister_done(ce);
3545
3546 #ifdef CONFIG_DRM_I915_SELFTEST
3547 if (unlikely(ce->drop_deregister)) {
3548 ce->drop_deregister = false;
3549 return 0;
3550 }
3551 #endif
3552
3553 if (context_wait_for_deregister_to_register(ce)) {
3554 struct intel_runtime_pm *runtime_pm =
3555 &ce->engine->gt->i915->runtime_pm;
3556 intel_wakeref_t wakeref;
3557
3558 /*
3559 * Previous owner of this guc_id has been deregistered, now safe
3560 * register this context.
3561 */
3562 with_intel_runtime_pm(runtime_pm, wakeref)
3563 register_context(ce, true);
3564 guc_signal_context_fence(ce);
3565 intel_context_put(ce);
3566 } else if (context_destroyed(ce)) {
3567 /* Context has been destroyed */
3568 intel_gt_pm_put_async(guc_to_gt(guc));
3569 release_guc_id(guc, ce);
3570 __guc_context_destroy(ce);
3571 }
3572
3573 decr_outstanding_submission_g2h(guc);
3574
3575 return 0;
3576 }
3577
intel_guc_sched_done_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)3578 int intel_guc_sched_done_process_msg(struct intel_guc *guc,
3579 const u32 *msg,
3580 u32 len)
3581 {
3582 struct intel_context *ce;
3583 unsigned long flags;
3584 u32 desc_idx = msg[0];
3585
3586 if (unlikely(len < 2)) {
3587 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
3588 return -EPROTO;
3589 }
3590
3591 ce = g2h_context_lookup(guc, desc_idx);
3592 if (unlikely(!ce))
3593 return -EPROTO;
3594
3595 if (unlikely(context_destroyed(ce) ||
3596 (!context_pending_enable(ce) &&
3597 !context_pending_disable(ce)))) {
3598 drm_err(&guc_to_gt(guc)->i915->drm,
3599 "Bad context sched_state 0x%x, desc_idx %u",
3600 ce->guc_state.sched_state, desc_idx);
3601 return -EPROTO;
3602 }
3603
3604 trace_intel_context_sched_done(ce);
3605
3606 if (context_pending_enable(ce)) {
3607 #ifdef CONFIG_DRM_I915_SELFTEST
3608 if (unlikely(ce->drop_schedule_enable)) {
3609 ce->drop_schedule_enable = false;
3610 return 0;
3611 }
3612 #endif
3613
3614 spin_lock_irqsave(&ce->guc_state.lock, flags);
3615 clr_context_pending_enable(ce);
3616 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3617 } else if (context_pending_disable(ce)) {
3618 bool banned;
3619
3620 #ifdef CONFIG_DRM_I915_SELFTEST
3621 if (unlikely(ce->drop_schedule_disable)) {
3622 ce->drop_schedule_disable = false;
3623 return 0;
3624 }
3625 #endif
3626
3627 /*
3628 * Unpin must be done before __guc_signal_context_fence,
3629 * otherwise a race exists between the requests getting
3630 * submitted + retired before this unpin completes resulting in
3631 * the pin_count going to zero and the context still being
3632 * enabled.
3633 */
3634 intel_context_sched_disable_unpin(ce);
3635
3636 spin_lock_irqsave(&ce->guc_state.lock, flags);
3637 banned = context_banned(ce);
3638 clr_context_banned(ce);
3639 clr_context_pending_disable(ce);
3640 __guc_signal_context_fence(ce);
3641 guc_blocked_fence_complete(ce);
3642 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3643
3644 if (banned) {
3645 guc_cancel_context_requests(ce);
3646 intel_engine_signal_breadcrumbs(ce->engine);
3647 }
3648 }
3649
3650 decr_outstanding_submission_g2h(guc);
3651 intel_context_put(ce);
3652
3653 return 0;
3654 }
3655
capture_error_state(struct intel_guc * guc,struct intel_context * ce)3656 static void capture_error_state(struct intel_guc *guc,
3657 struct intel_context *ce)
3658 {
3659 struct intel_gt *gt = guc_to_gt(guc);
3660 struct drm_i915_private *i915 = gt->i915;
3661 struct intel_engine_cs *engine = __context_to_physical_engine(ce);
3662 intel_wakeref_t wakeref;
3663
3664 intel_engine_set_hung_context(engine, ce);
3665 with_intel_runtime_pm(&i915->runtime_pm, wakeref)
3666 i915_capture_error_state(gt, engine->mask);
3667 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]);
3668 }
3669
guc_context_replay(struct intel_context * ce)3670 static void guc_context_replay(struct intel_context *ce)
3671 {
3672 struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
3673
3674 __guc_reset_context(ce, true);
3675 tasklet_hi_schedule(&sched_engine->tasklet);
3676 }
3677
guc_handle_context_reset(struct intel_guc * guc,struct intel_context * ce)3678 static void guc_handle_context_reset(struct intel_guc *guc,
3679 struct intel_context *ce)
3680 {
3681 trace_intel_context_reset(ce);
3682
3683 /*
3684 * XXX: Racey if request cancellation has occurred, see comment in
3685 * __guc_reset_context().
3686 */
3687 if (likely(!intel_context_is_banned(ce) &&
3688 !context_blocked(ce))) {
3689 capture_error_state(guc, ce);
3690 guc_context_replay(ce);
3691 }
3692 }
3693
intel_guc_context_reset_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)3694 int intel_guc_context_reset_process_msg(struct intel_guc *guc,
3695 const u32 *msg, u32 len)
3696 {
3697 struct intel_context *ce;
3698 int desc_idx;
3699
3700 if (unlikely(len != 1)) {
3701 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
3702 return -EPROTO;
3703 }
3704
3705 desc_idx = msg[0];
3706 ce = g2h_context_lookup(guc, desc_idx);
3707 if (unlikely(!ce))
3708 return -EPROTO;
3709
3710 guc_handle_context_reset(guc, ce);
3711
3712 return 0;
3713 }
3714
3715 static struct intel_engine_cs *
guc_lookup_engine(struct intel_guc * guc,u8 guc_class,u8 instance)3716 guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
3717 {
3718 struct intel_gt *gt = guc_to_gt(guc);
3719 u8 engine_class = guc_class_to_engine_class(guc_class);
3720
3721 /* Class index is checked in class converter */
3722 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE);
3723
3724 return gt->engine_class[engine_class][instance];
3725 }
3726
intel_guc_engine_failure_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)3727 int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
3728 const u32 *msg, u32 len)
3729 {
3730 struct intel_engine_cs *engine;
3731 u8 guc_class, instance;
3732 u32 reason;
3733
3734 if (unlikely(len != 3)) {
3735 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
3736 return -EPROTO;
3737 }
3738
3739 guc_class = msg[0];
3740 instance = msg[1];
3741 reason = msg[2];
3742
3743 engine = guc_lookup_engine(guc, guc_class, instance);
3744 if (unlikely(!engine)) {
3745 drm_err(&guc_to_gt(guc)->i915->drm,
3746 "Invalid engine %d:%d", guc_class, instance);
3747 return -EPROTO;
3748 }
3749
3750 intel_gt_handle_error(guc_to_gt(guc), engine->mask,
3751 I915_ERROR_CAPTURE,
3752 "GuC failed to reset %s (reason=0x%08x)\n",
3753 engine->name, reason);
3754
3755 return 0;
3756 }
3757
intel_guc_find_hung_context(struct intel_engine_cs * engine)3758 void intel_guc_find_hung_context(struct intel_engine_cs *engine)
3759 {
3760 struct intel_guc *guc = &engine->gt->uc.guc;
3761 struct intel_context *ce;
3762 struct i915_request *rq;
3763 unsigned long index;
3764 unsigned long flags;
3765
3766 /* Reset called during driver load? GuC not yet initialised! */
3767 if (unlikely(!guc_submission_initialized(guc)))
3768 return;
3769
3770 xa_lock_irqsave(&guc->context_lookup, flags);
3771 xa_for_each(&guc->context_lookup, index, ce) {
3772 if (!kref_get_unless_zero(&ce->ref))
3773 continue;
3774
3775 xa_unlock(&guc->context_lookup);
3776
3777 if (!intel_context_is_pinned(ce))
3778 goto next;
3779
3780 if (intel_engine_is_virtual(ce->engine)) {
3781 if (!(ce->engine->mask & engine->mask))
3782 goto next;
3783 } else {
3784 if (ce->engine != engine)
3785 goto next;
3786 }
3787
3788 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) {
3789 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
3790 continue;
3791
3792 intel_engine_set_hung_context(engine, ce);
3793
3794 /* Can only cope with one hang at a time... */
3795 intel_context_put(ce);
3796 xa_lock(&guc->context_lookup);
3797 goto done;
3798 }
3799 next:
3800 intel_context_put(ce);
3801 xa_lock(&guc->context_lookup);
3802 }
3803 done:
3804 xa_unlock_irqrestore(&guc->context_lookup, flags);
3805 }
3806
intel_guc_dump_active_requests(struct intel_engine_cs * engine,struct i915_request * hung_rq,struct drm_printer * m)3807 void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
3808 struct i915_request *hung_rq,
3809 struct drm_printer *m)
3810 {
3811 struct intel_guc *guc = &engine->gt->uc.guc;
3812 struct intel_context *ce;
3813 unsigned long index;
3814 unsigned long flags;
3815
3816 /* Reset called during driver load? GuC not yet initialised! */
3817 if (unlikely(!guc_submission_initialized(guc)))
3818 return;
3819
3820 xa_lock_irqsave(&guc->context_lookup, flags);
3821 xa_for_each(&guc->context_lookup, index, ce) {
3822 if (!kref_get_unless_zero(&ce->ref))
3823 continue;
3824
3825 xa_unlock(&guc->context_lookup);
3826
3827 if (!intel_context_is_pinned(ce))
3828 goto next;
3829
3830 if (intel_engine_is_virtual(ce->engine)) {
3831 if (!(ce->engine->mask & engine->mask))
3832 goto next;
3833 } else {
3834 if (ce->engine != engine)
3835 goto next;
3836 }
3837
3838 spin_lock(&ce->guc_state.lock);
3839 intel_engine_dump_active_requests(&ce->guc_state.requests,
3840 hung_rq, m);
3841 spin_unlock(&ce->guc_state.lock);
3842
3843 next:
3844 intel_context_put(ce);
3845 xa_lock(&guc->context_lookup);
3846 }
3847 xa_unlock_irqrestore(&guc->context_lookup, flags);
3848 }
3849
intel_guc_submission_print_info(struct intel_guc * guc,struct drm_printer * p)3850 void intel_guc_submission_print_info(struct intel_guc *guc,
3851 struct drm_printer *p)
3852 {
3853 struct i915_sched_engine *sched_engine = guc->sched_engine;
3854 struct rb_node *rb;
3855 unsigned long flags;
3856
3857 if (!sched_engine)
3858 return;
3859
3860 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n",
3861 atomic_read(&guc->outstanding_submission_g2h));
3862 drm_printf(p, "GuC tasklet count: %u\n\n",
3863 atomic_read(&sched_engine->tasklet.count));
3864
3865 spin_lock_irqsave(&sched_engine->lock, flags);
3866 drm_printf(p, "Requests in GuC submit tasklet:\n");
3867 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) {
3868 struct i915_priolist *pl = to_priolist(rb);
3869 struct i915_request *rq;
3870
3871 priolist_for_each_request(rq, pl)
3872 drm_printf(p, "guc_id=%u, seqno=%llu\n",
3873 rq->context->guc_id.id,
3874 rq->fence.seqno);
3875 }
3876 spin_unlock_irqrestore(&sched_engine->lock, flags);
3877 drm_printf(p, "\n");
3878 }
3879
guc_log_context_priority(struct drm_printer * p,struct intel_context * ce)3880 static inline void guc_log_context_priority(struct drm_printer *p,
3881 struct intel_context *ce)
3882 {
3883 int i;
3884
3885 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio);
3886 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n");
3887 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH;
3888 i < GUC_CLIENT_PRIORITY_NUM; ++i) {
3889 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n",
3890 i, ce->guc_state.prio_count[i]);
3891 }
3892 drm_printf(p, "\n");
3893 }
3894
guc_log_context(struct drm_printer * p,struct intel_context * ce)3895 static inline void guc_log_context(struct drm_printer *p,
3896 struct intel_context *ce)
3897 {
3898 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
3899 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
3900 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
3901 ce->ring->head,
3902 ce->lrc_reg_state[CTX_RING_HEAD]);
3903 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
3904 ce->ring->tail,
3905 ce->lrc_reg_state[CTX_RING_TAIL]);
3906 drm_printf(p, "\t\tContext Pin Count: %u\n",
3907 atomic_read(&ce->pin_count));
3908 drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
3909 atomic_read(&ce->guc_id.ref));
3910 drm_printf(p, "\t\tSchedule State: 0x%x\n\n",
3911 ce->guc_state.sched_state);
3912 }
3913
intel_guc_submission_print_context_info(struct intel_guc * guc,struct drm_printer * p)3914 void intel_guc_submission_print_context_info(struct intel_guc *guc,
3915 struct drm_printer *p)
3916 {
3917 struct intel_context *ce;
3918 unsigned long index;
3919 unsigned long flags;
3920
3921 xa_lock_irqsave(&guc->context_lookup, flags);
3922 xa_for_each(&guc->context_lookup, index, ce) {
3923 GEM_BUG_ON(intel_context_is_child(ce));
3924
3925 guc_log_context(p, ce);
3926 guc_log_context_priority(p, ce);
3927
3928 if (intel_context_is_parent(ce)) {
3929 struct guc_process_desc *desc = __get_process_desc(ce);
3930 struct intel_context *child;
3931
3932 drm_printf(p, "\t\tNumber children: %u\n",
3933 ce->parallel.number_children);
3934 drm_printf(p, "\t\tWQI Head: %u\n",
3935 READ_ONCE(desc->head));
3936 drm_printf(p, "\t\tWQI Tail: %u\n",
3937 READ_ONCE(desc->tail));
3938 drm_printf(p, "\t\tWQI Status: %u\n\n",
3939 READ_ONCE(desc->wq_status));
3940
3941 if (ce->engine->emit_bb_start ==
3942 emit_bb_start_parent_no_preempt_mid_batch) {
3943 u8 i;
3944
3945 drm_printf(p, "\t\tChildren Go: %u\n\n",
3946 get_children_go_value(ce));
3947 for (i = 0; i < ce->parallel.number_children; ++i)
3948 drm_printf(p, "\t\tChildren Join: %u\n",
3949 get_children_join_value(ce, i));
3950 }
3951
3952 for_each_child(ce, child)
3953 guc_log_context(p, child);
3954 }
3955 }
3956 xa_unlock_irqrestore(&guc->context_lookup, flags);
3957 }
3958
get_children_go_addr(struct intel_context * ce)3959 static inline u32 get_children_go_addr(struct intel_context *ce)
3960 {
3961 GEM_BUG_ON(!intel_context_is_parent(ce));
3962
3963 return i915_ggtt_offset(ce->state) +
3964 __get_parent_scratch_offset(ce) +
3965 offsetof(struct parent_scratch, go.semaphore);
3966 }
3967
get_children_join_addr(struct intel_context * ce,u8 child_index)3968 static inline u32 get_children_join_addr(struct intel_context *ce,
3969 u8 child_index)
3970 {
3971 GEM_BUG_ON(!intel_context_is_parent(ce));
3972
3973 return i915_ggtt_offset(ce->state) +
3974 __get_parent_scratch_offset(ce) +
3975 offsetof(struct parent_scratch, join[child_index].semaphore);
3976 }
3977
3978 #define PARENT_GO_BB 1
3979 #define PARENT_GO_FINI_BREADCRUMB 0
3980 #define CHILD_GO_BB 1
3981 #define CHILD_GO_FINI_BREADCRUMB 0
emit_bb_start_parent_no_preempt_mid_batch(struct i915_request * rq,u64 offset,u32 len,const unsigned int flags)3982 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
3983 u64 offset, u32 len,
3984 const unsigned int flags)
3985 {
3986 struct intel_context *ce = rq->context;
3987 u32 *cs;
3988 u8 i;
3989
3990 GEM_BUG_ON(!intel_context_is_parent(ce));
3991
3992 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children);
3993 if (IS_ERR(cs))
3994 return PTR_ERR(cs);
3995
3996 /* Wait on children */
3997 for (i = 0; i < ce->parallel.number_children; ++i) {
3998 *cs++ = (MI_SEMAPHORE_WAIT |
3999 MI_SEMAPHORE_GLOBAL_GTT |
4000 MI_SEMAPHORE_POLL |
4001 MI_SEMAPHORE_SAD_EQ_SDD);
4002 *cs++ = PARENT_GO_BB;
4003 *cs++ = get_children_join_addr(ce, i);
4004 *cs++ = 0;
4005 }
4006
4007 /* Turn off preemption */
4008 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4009 *cs++ = MI_NOOP;
4010
4011 /* Tell children go */
4012 cs = gen8_emit_ggtt_write(cs,
4013 CHILD_GO_BB,
4014 get_children_go_addr(ce),
4015 0);
4016
4017 /* Jump to batch */
4018 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4019 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4020 *cs++ = lower_32_bits(offset);
4021 *cs++ = upper_32_bits(offset);
4022 *cs++ = MI_NOOP;
4023
4024 intel_ring_advance(rq, cs);
4025
4026 return 0;
4027 }
4028
emit_bb_start_child_no_preempt_mid_batch(struct i915_request * rq,u64 offset,u32 len,const unsigned int flags)4029 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
4030 u64 offset, u32 len,
4031 const unsigned int flags)
4032 {
4033 struct intel_context *ce = rq->context;
4034 struct intel_context *parent = intel_context_to_parent(ce);
4035 u32 *cs;
4036
4037 GEM_BUG_ON(!intel_context_is_child(ce));
4038
4039 cs = intel_ring_begin(rq, 12);
4040 if (IS_ERR(cs))
4041 return PTR_ERR(cs);
4042
4043 /* Signal parent */
4044 cs = gen8_emit_ggtt_write(cs,
4045 PARENT_GO_BB,
4046 get_children_join_addr(parent,
4047 ce->parallel.child_index),
4048 0);
4049
4050 /* Wait on parent for go */
4051 *cs++ = (MI_SEMAPHORE_WAIT |
4052 MI_SEMAPHORE_GLOBAL_GTT |
4053 MI_SEMAPHORE_POLL |
4054 MI_SEMAPHORE_SAD_EQ_SDD);
4055 *cs++ = CHILD_GO_BB;
4056 *cs++ = get_children_go_addr(parent);
4057 *cs++ = 0;
4058
4059 /* Turn off preemption */
4060 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4061
4062 /* Jump to batch */
4063 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4064 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4065 *cs++ = lower_32_bits(offset);
4066 *cs++ = upper_32_bits(offset);
4067
4068 intel_ring_advance(rq, cs);
4069
4070 return 0;
4071 }
4072
4073 static u32 *
__emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)4074 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
4075 u32 *cs)
4076 {
4077 struct intel_context *ce = rq->context;
4078 u8 i;
4079
4080 GEM_BUG_ON(!intel_context_is_parent(ce));
4081
4082 /* Wait on children */
4083 for (i = 0; i < ce->parallel.number_children; ++i) {
4084 *cs++ = (MI_SEMAPHORE_WAIT |
4085 MI_SEMAPHORE_GLOBAL_GTT |
4086 MI_SEMAPHORE_POLL |
4087 MI_SEMAPHORE_SAD_EQ_SDD);
4088 *cs++ = PARENT_GO_FINI_BREADCRUMB;
4089 *cs++ = get_children_join_addr(ce, i);
4090 *cs++ = 0;
4091 }
4092
4093 /* Turn on preemption */
4094 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4095 *cs++ = MI_NOOP;
4096
4097 /* Tell children go */
4098 cs = gen8_emit_ggtt_write(cs,
4099 CHILD_GO_FINI_BREADCRUMB,
4100 get_children_go_addr(ce),
4101 0);
4102
4103 return cs;
4104 }
4105
4106 /*
4107 * If this true, a submission of multi-lrc requests had an error and the
4108 * requests need to be skipped. The front end (execuf IOCTL) should've called
4109 * i915_request_skip which squashes the BB but we still need to emit the fini
4110 * breadrcrumbs seqno write. At this point we don't know how many of the
4111 * requests in the multi-lrc submission were generated so we can't do the
4112 * handshake between the parent and children (e.g. if 4 requests should be
4113 * generated but 2nd hit an error only 1 would be seen by the GuC backend).
4114 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
4115 * has occurred on any of the requests in submission / relationship.
4116 */
skip_handshake(struct i915_request * rq)4117 static inline bool skip_handshake(struct i915_request *rq)
4118 {
4119 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
4120 }
4121
4122 static u32 *
emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)4123 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
4124 u32 *cs)
4125 {
4126 struct intel_context *ce = rq->context;
4127
4128 GEM_BUG_ON(!intel_context_is_parent(ce));
4129
4130 if (unlikely(skip_handshake(rq))) {
4131 /*
4132 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
4133 * the -6 comes from the length of the emits below.
4134 */
4135 memset(cs, 0, sizeof(u32) *
4136 (ce->engine->emit_fini_breadcrumb_dw - 6));
4137 cs += ce->engine->emit_fini_breadcrumb_dw - 6;
4138 } else {
4139 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
4140 }
4141
4142 /* Emit fini breadcrumb */
4143 cs = gen8_emit_ggtt_write(cs,
4144 rq->fence.seqno,
4145 i915_request_active_timeline(rq)->hwsp_offset,
4146 0);
4147
4148 /* User interrupt */
4149 *cs++ = MI_USER_INTERRUPT;
4150 *cs++ = MI_NOOP;
4151
4152 rq->tail = intel_ring_offset(rq, cs);
4153
4154 return cs;
4155 }
4156
4157 static u32 *
__emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)4158 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
4159 u32 *cs)
4160 {
4161 struct intel_context *ce = rq->context;
4162 struct intel_context *parent = intel_context_to_parent(ce);
4163
4164 GEM_BUG_ON(!intel_context_is_child(ce));
4165
4166 /* Turn on preemption */
4167 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4168 *cs++ = MI_NOOP;
4169
4170 /* Signal parent */
4171 cs = gen8_emit_ggtt_write(cs,
4172 PARENT_GO_FINI_BREADCRUMB,
4173 get_children_join_addr(parent,
4174 ce->parallel.child_index),
4175 0);
4176
4177 /* Wait parent on for go */
4178 *cs++ = (MI_SEMAPHORE_WAIT |
4179 MI_SEMAPHORE_GLOBAL_GTT |
4180 MI_SEMAPHORE_POLL |
4181 MI_SEMAPHORE_SAD_EQ_SDD);
4182 *cs++ = CHILD_GO_FINI_BREADCRUMB;
4183 *cs++ = get_children_go_addr(parent);
4184 *cs++ = 0;
4185
4186 return cs;
4187 }
4188
4189 static u32 *
emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)4190 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
4191 u32 *cs)
4192 {
4193 struct intel_context *ce = rq->context;
4194
4195 GEM_BUG_ON(!intel_context_is_child(ce));
4196
4197 if (unlikely(skip_handshake(rq))) {
4198 /*
4199 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
4200 * the -6 comes from the length of the emits below.
4201 */
4202 memset(cs, 0, sizeof(u32) *
4203 (ce->engine->emit_fini_breadcrumb_dw - 6));
4204 cs += ce->engine->emit_fini_breadcrumb_dw - 6;
4205 } else {
4206 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
4207 }
4208
4209 /* Emit fini breadcrumb */
4210 cs = gen8_emit_ggtt_write(cs,
4211 rq->fence.seqno,
4212 i915_request_active_timeline(rq)->hwsp_offset,
4213 0);
4214
4215 /* User interrupt */
4216 *cs++ = MI_USER_INTERRUPT;
4217 *cs++ = MI_NOOP;
4218
4219 rq->tail = intel_ring_offset(rq, cs);
4220
4221 return cs;
4222 }
4223
4224 static struct intel_context *
guc_create_virtual(struct intel_engine_cs ** siblings,unsigned int count,unsigned long flags)4225 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
4226 unsigned long flags)
4227 {
4228 struct guc_virtual_engine *ve;
4229 struct intel_guc *guc;
4230 unsigned int n;
4231 int err;
4232
4233 ve = kzalloc(sizeof(*ve), GFP_KERNEL);
4234 if (!ve)
4235 return ERR_PTR(-ENOMEM);
4236
4237 guc = &siblings[0]->gt->uc.guc;
4238
4239 ve->base.i915 = siblings[0]->i915;
4240 ve->base.gt = siblings[0]->gt;
4241 ve->base.uncore = siblings[0]->uncore;
4242 ve->base.id = -1;
4243
4244 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
4245 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
4246 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
4247 ve->base.saturated = ALL_ENGINES;
4248
4249 snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
4250
4251 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine);
4252
4253 ve->base.cops = &virtual_guc_context_ops;
4254 ve->base.request_alloc = guc_request_alloc;
4255 ve->base.bump_serial = virtual_guc_bump_serial;
4256
4257 ve->base.submit_request = guc_submit_request;
4258
4259 ve->base.flags = I915_ENGINE_IS_VIRTUAL;
4260
4261 intel_context_init(&ve->context, &ve->base);
4262
4263 for (n = 0; n < count; n++) {
4264 struct intel_engine_cs *sibling = siblings[n];
4265
4266 GEM_BUG_ON(!is_power_of_2(sibling->mask));
4267 if (sibling->mask & ve->base.mask) {
4268 DRM_DEBUG("duplicate %s entry in load balancer\n",
4269 sibling->name);
4270 err = -EINVAL;
4271 goto err_put;
4272 }
4273
4274 ve->base.mask |= sibling->mask;
4275 ve->base.logical_mask |= sibling->logical_mask;
4276
4277 if (n != 0 && ve->base.class != sibling->class) {
4278 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
4279 sibling->class, ve->base.class);
4280 err = -EINVAL;
4281 goto err_put;
4282 } else if (n == 0) {
4283 ve->base.class = sibling->class;
4284 ve->base.uabi_class = sibling->uabi_class;
4285 snprintf(ve->base.name, sizeof(ve->base.name),
4286 "v%dx%d", ve->base.class, count);
4287 ve->base.context_size = sibling->context_size;
4288
4289 ve->base.add_active_request =
4290 sibling->add_active_request;
4291 ve->base.remove_active_request =
4292 sibling->remove_active_request;
4293 ve->base.emit_bb_start = sibling->emit_bb_start;
4294 ve->base.emit_flush = sibling->emit_flush;
4295 ve->base.emit_init_breadcrumb =
4296 sibling->emit_init_breadcrumb;
4297 ve->base.emit_fini_breadcrumb =
4298 sibling->emit_fini_breadcrumb;
4299 ve->base.emit_fini_breadcrumb_dw =
4300 sibling->emit_fini_breadcrumb_dw;
4301 ve->base.breadcrumbs =
4302 intel_breadcrumbs_get(sibling->breadcrumbs);
4303
4304 ve->base.flags |= sibling->flags;
4305
4306 ve->base.props.timeslice_duration_ms =
4307 sibling->props.timeslice_duration_ms;
4308 ve->base.props.preempt_timeout_ms =
4309 sibling->props.preempt_timeout_ms;
4310 }
4311 }
4312
4313 return &ve->context;
4314
4315 err_put:
4316 intel_context_put(&ve->context);
4317 return ERR_PTR(err);
4318 }
4319
intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs * ve)4320 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
4321 {
4322 struct intel_engine_cs *engine;
4323 intel_engine_mask_t tmp, mask = ve->mask;
4324
4325 for_each_engine_masked(engine, ve->gt, mask, tmp)
4326 if (READ_ONCE(engine->props.heartbeat_interval_ms))
4327 return true;
4328
4329 return false;
4330 }
4331
4332 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4333 #include "selftest_guc.c"
4334 #include "selftest_guc_multi_lrc.c"
4335 #endif
4336