1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2023-2024 Intel Corporation
4 */
5
6 #include <drm/drm_managed.h>
7
8 #include "abi/guc_actions_sriov_abi.h"
9
10 #include "xe_device.h"
11 #include "xe_gt.h"
12 #include "xe_gt_sriov_pf_config.h"
13 #include "xe_gt_sriov_pf_control.h"
14 #include "xe_gt_sriov_pf_helpers.h"
15 #include "xe_gt_sriov_pf_monitor.h"
16 #include "xe_gt_sriov_pf_service.h"
17 #include "xe_gt_sriov_printk.h"
18 #include "xe_guc_ct.h"
19 #include "xe_sriov.h"
20
control_cmd_to_string(u32 cmd)21 static const char *control_cmd_to_string(u32 cmd)
22 {
23 switch (cmd) {
24 case GUC_PF_TRIGGER_VF_PAUSE:
25 return "PAUSE";
26 case GUC_PF_TRIGGER_VF_RESUME:
27 return "RESUME";
28 case GUC_PF_TRIGGER_VF_STOP:
29 return "STOP";
30 case GUC_PF_TRIGGER_VF_FLR_START:
31 return "FLR_START";
32 case GUC_PF_TRIGGER_VF_FLR_FINISH:
33 return "FLR_FINISH";
34 default:
35 return "<unknown>";
36 }
37 }
38
guc_action_vf_control_cmd(struct xe_guc * guc,u32 vfid,u32 cmd)39 static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd)
40 {
41 u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = {
42 FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
43 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
44 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL),
45 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid),
46 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd),
47 };
48 int ret;
49
50 ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
51 return ret > 0 ? -EPROTO : ret;
52 }
53
pf_send_vf_control_cmd(struct xe_gt * gt,unsigned int vfid,u32 cmd)54 static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd)
55 {
56 int err;
57
58 xe_gt_assert(gt, vfid != PFID);
59 xe_gt_sriov_dbg_verbose(gt, "sending VF%u control command %s\n",
60 vfid, control_cmd_to_string(cmd));
61
62 err = guc_action_vf_control_cmd(>->uc.guc, vfid, cmd);
63 if (unlikely(err))
64 xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n",
65 vfid, control_cmd_to_string(cmd), ERR_PTR(err));
66 return err;
67 }
68
pf_send_vf_pause(struct xe_gt * gt,unsigned int vfid)69 static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid)
70 {
71 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE);
72 }
73
pf_send_vf_resume(struct xe_gt * gt,unsigned int vfid)74 static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid)
75 {
76 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME);
77 }
78
pf_send_vf_stop(struct xe_gt * gt,unsigned int vfid)79 static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid)
80 {
81 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP);
82 }
83
pf_send_vf_flr_start(struct xe_gt * gt,unsigned int vfid)84 static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid)
85 {
86 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START);
87 }
88
pf_send_vf_flr_finish(struct xe_gt * gt,unsigned int vfid)89 static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid)
90 {
91 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH);
92 }
93
94 /**
95 * DOC: The VF state machine
96 *
97 * The simplified VF state machine could be presented as::
98 *
99 * pause--------------------------o
100 * / |
101 * / v
102 * (READY)<------------------resume-----(PAUSED)
103 * ^ \ / /
104 * | \ / /
105 * | stop---->(STOPPED)<----stop /
106 * | / /
107 * | / /
108 * o--------<-----flr /
109 * \ /
110 * o------<--------------------flr
111 *
112 * Where:
113 *
114 * * READY - represents a state in which VF is fully operable
115 * * PAUSED - represents a state in which VF activity is temporarily suspended
116 * * STOPPED - represents a state in which VF activity is definitely halted
117 * * pause - represents a request to temporarily suspend VF activity
118 * * resume - represents a request to resume VF activity
119 * * stop - represents a request to definitely halt VF activity
120 * * flr - represents a request to perform VF FLR to restore VF activity
121 *
122 * However, each state transition requires additional steps that involves
123 * communication with GuC that might fail or be interrupted by other requests::
124 *
125 * .................................WIP....
126 * : :
127 * pause--------------------->PAUSE_WIP----------------------------o
128 * / : / \ : |
129 * / : o----<---stop flr--o : |
130 * / : | \ / | : V
131 * (READY,RESUMED)<--------+------------RESUME_WIP<----+--<-----resume--(PAUSED)
132 * ^ \ \ : | | : / /
133 * | \ \ : | | : / /
134 * | \ \ : | | : / /
135 * | \ \ : o----<----------------------+--<-------stop /
136 * | \ \ : | | : /
137 * | \ \ : V | : /
138 * | \ stop----->STOP_WIP---------flr--->-----o : /
139 * | \ : | | : /
140 * | \ : | V : /
141 * | flr--------+----->----------------->FLR_WIP<-----flr
142 * | : | / ^ :
143 * | : | / | :
144 * o--------<-------:----+-----<----------------o | :
145 * : | | :
146 * :....|...........................|.....:
147 * | |
148 * V |
149 * (STOPPED)--------------------flr
150 *
151 * For details about each internal WIP state machine see:
152 *
153 * * `The VF PAUSE state machine`_
154 * * `The VF RESUME state machine`_
155 * * `The VF STOP state machine`_
156 * * `The VF FLR state machine`_
157 */
158
159 #ifdef CONFIG_DRM_XE_DEBUG_SRIOV
control_bit_to_string(enum xe_gt_sriov_control_bits bit)160 static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
161 {
162 switch (bit) {
163 #define CASE2STR(_X) \
164 case XE_GT_SRIOV_STATE_##_X: return #_X
165 CASE2STR(WIP);
166 CASE2STR(FLR_WIP);
167 CASE2STR(FLR_SEND_START);
168 CASE2STR(FLR_WAIT_GUC);
169 CASE2STR(FLR_GUC_DONE);
170 CASE2STR(FLR_RESET_CONFIG);
171 CASE2STR(FLR_RESET_DATA);
172 CASE2STR(FLR_RESET_MMIO);
173 CASE2STR(FLR_SEND_FINISH);
174 CASE2STR(FLR_FAILED);
175 CASE2STR(PAUSE_WIP);
176 CASE2STR(PAUSE_SEND_PAUSE);
177 CASE2STR(PAUSE_WAIT_GUC);
178 CASE2STR(PAUSE_GUC_DONE);
179 CASE2STR(PAUSE_FAILED);
180 CASE2STR(PAUSED);
181 CASE2STR(RESUME_WIP);
182 CASE2STR(RESUME_SEND_RESUME);
183 CASE2STR(RESUME_FAILED);
184 CASE2STR(RESUMED);
185 CASE2STR(STOP_WIP);
186 CASE2STR(STOP_SEND_STOP);
187 CASE2STR(STOP_FAILED);
188 CASE2STR(STOPPED);
189 CASE2STR(MISMATCH);
190 #undef CASE2STR
191 default: return "?";
192 }
193 }
194 #endif
195
pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)196 static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)
197 {
198 switch (bit) {
199 case XE_GT_SRIOV_STATE_FLR_WAIT_GUC:
200 case XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC:
201 return HZ / 2;
202 case XE_GT_SRIOV_STATE_FLR_WIP:
203 case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG:
204 return 5 * HZ;
205 default:
206 return HZ;
207 }
208 }
209
pf_pick_vf_control(struct xe_gt * gt,unsigned int vfid)210 static struct xe_gt_sriov_control_state *pf_pick_vf_control(struct xe_gt *gt, unsigned int vfid)
211 {
212 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
213 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
214
215 return >->sriov.pf.vfs[vfid].control;
216 }
217
pf_peek_vf_state(struct xe_gt * gt,unsigned int vfid)218 static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid)
219 {
220 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
221
222 return &cs->state;
223 }
224
pf_check_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)225 static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid,
226 enum xe_gt_sriov_control_bits bit)
227 {
228 return test_bit(bit, pf_peek_vf_state(gt, vfid));
229 }
230
pf_dump_vf_state(struct xe_gt * gt,unsigned int vfid)231 static void pf_dump_vf_state(struct xe_gt *gt, unsigned int vfid)
232 {
233 unsigned long state = *pf_peek_vf_state(gt, vfid);
234 enum xe_gt_sriov_control_bits bit;
235
236 if (state) {
237 xe_gt_sriov_dbg_verbose(gt, "VF%u state %#lx%s%*pbl\n",
238 vfid, state, state ? " bits " : "",
239 (int)BITS_PER_LONG, &state);
240 for_each_set_bit(bit, &state, BITS_PER_LONG)
241 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d)\n",
242 vfid, control_bit_to_string(bit), bit);
243 } else {
244 xe_gt_sriov_dbg_verbose(gt, "VF%u state READY\n", vfid);
245 }
246 }
247
pf_expect_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)248 static bool pf_expect_vf_state(struct xe_gt *gt, unsigned int vfid,
249 enum xe_gt_sriov_control_bits bit)
250 {
251 bool result = pf_check_vf_state(gt, vfid, bit);
252
253 if (unlikely(!result))
254 pf_dump_vf_state(gt, vfid);
255
256 return result;
257 }
258
pf_expect_vf_not_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)259 static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid,
260 enum xe_gt_sriov_control_bits bit)
261 {
262 bool result = !pf_check_vf_state(gt, vfid, bit);
263
264 if (unlikely(!result))
265 pf_dump_vf_state(gt, vfid);
266
267 return result;
268 }
269
pf_enter_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)270 static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid,
271 enum xe_gt_sriov_control_bits bit)
272 {
273 if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) {
274 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) enter\n",
275 vfid, control_bit_to_string(bit), bit);
276 return true;
277 }
278 return false;
279 }
280
pf_exit_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)281 static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid,
282 enum xe_gt_sriov_control_bits bit)
283 {
284 if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) {
285 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) exit\n",
286 vfid, control_bit_to_string(bit), bit);
287 return true;
288 }
289 return false;
290 }
291
pf_escape_vf_state(struct xe_gt * gt,unsigned int vfid,enum xe_gt_sriov_control_bits bit)292 static void pf_escape_vf_state(struct xe_gt *gt, unsigned int vfid,
293 enum xe_gt_sriov_control_bits bit)
294 {
295 if (pf_exit_vf_state(gt, vfid, bit))
296 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) escaped by %ps\n",
297 vfid, control_bit_to_string(bit), bit,
298 __builtin_return_address(0));
299 }
300
pf_enter_vf_mismatch(struct xe_gt * gt,unsigned int vfid)301 static void pf_enter_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
302 {
303 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) {
304 xe_gt_sriov_dbg(gt, "VF%u state mismatch detected by %ps\n",
305 vfid, __builtin_return_address(0));
306 pf_dump_vf_state(gt, vfid);
307 }
308 }
309
pf_exit_vf_mismatch(struct xe_gt * gt,unsigned int vfid)310 static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
311 {
312 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH))
313 xe_gt_sriov_dbg(gt, "VF%u state mismatch cleared by %ps\n",
314 vfid, __builtin_return_address(0));
315
316 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
317 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
318 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
319 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED);
320 }
321
322 #define pf_enter_vf_state_machine_bug(gt, vfid) ({ \
323 pf_enter_vf_mismatch((gt), (vfid)); \
324 })
325
pf_queue_control_worker(struct xe_gt * gt)326 static void pf_queue_control_worker(struct xe_gt *gt)
327 {
328 struct xe_device *xe = gt_to_xe(gt);
329
330 xe_gt_assert(gt, IS_SRIOV_PF(xe));
331
332 queue_work(xe->sriov.wq, >->sriov.pf.control.worker);
333 }
334
pf_queue_vf(struct xe_gt * gt,unsigned int vfid)335 static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid)
336 {
337 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control;
338
339 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
340
341 spin_lock(&pfc->lock);
342 list_move_tail(>->sriov.pf.vfs[vfid].control.link, &pfc->list);
343 spin_unlock(&pfc->lock);
344
345 pf_queue_control_worker(gt);
346 }
347
348 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid);
349 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid);
350 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid);
351 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid);
352
pf_enter_vf_wip(struct xe_gt * gt,unsigned int vfid)353 static bool pf_enter_vf_wip(struct xe_gt *gt, unsigned int vfid)
354 {
355 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
356 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
357
358 reinit_completion(&cs->done);
359 return true;
360 }
361 return false;
362 }
363
pf_exit_vf_wip(struct xe_gt * gt,unsigned int vfid)364 static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid)
365 {
366 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) {
367 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
368
369 pf_exit_vf_flr_wip(gt, vfid);
370 pf_exit_vf_stop_wip(gt, vfid);
371 pf_exit_vf_pause_wip(gt, vfid);
372 pf_exit_vf_resume_wip(gt, vfid);
373
374 complete_all(&cs->done);
375 }
376 }
377
pf_wait_vf_wip_done(struct xe_gt * gt,unsigned int vfid,unsigned long timeout)378 static int pf_wait_vf_wip_done(struct xe_gt *gt, unsigned int vfid, unsigned long timeout)
379 {
380 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
381
382 return wait_for_completion_timeout(&cs->done, timeout) ? 0 : -ETIMEDOUT;
383 }
384
pf_enter_vf_ready(struct xe_gt * gt,unsigned int vfid)385 static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
386 {
387 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
388 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED);
389 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
390 pf_exit_vf_mismatch(gt, vfid);
391 pf_exit_vf_wip(gt, vfid);
392 }
393
394 /**
395 * DOC: The VF PAUSE state machine
396 *
397 * The VF PAUSE state machine looks like::
398 *
399 * (READY,RESUMED)<-------------<---------------------o---------o
400 * | \ \
401 * pause \ \
402 * | \ \
403 * ....V...........................PAUSE_WIP........ \ \
404 * : \ : o \
405 * : \ o------<-----busy : | \
406 * : \ / / : | |
407 * : PAUSE_SEND_PAUSE ---failed--->----------o--->(PAUSE_FAILED) |
408 * : | \ : | |
409 * : acked rejected---->----------o--->(MISMATCH) /
410 * : | : /
411 * : v : /
412 * : PAUSE_WAIT_GUC : /
413 * : | : /
414 * : done : /
415 * : | : /
416 * : v : /
417 * : PAUSE_GUC_DONE o-----restart
418 * : / :
419 * : / :
420 * :....o..............o...............o...........:
421 * | | |
422 * completed flr stop
423 * | | |
424 * V .....V..... ......V.....
425 * (PAUSED) : FLR_WIP : : STOP_WIP :
426 * :.........: :..........:
427 *
428 * For the full state machine view, see `The VF state machine`_.
429 */
430
pf_exit_vf_pause_wip(struct xe_gt * gt,unsigned int vfid)431 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
432 {
433 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
434 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE);
435 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
436 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE);
437 }
438 }
439
pf_enter_vf_paused(struct xe_gt * gt,unsigned int vfid)440 static void pf_enter_vf_paused(struct xe_gt *gt, unsigned int vfid)
441 {
442 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED))
443 pf_enter_vf_state_machine_bug(gt, vfid);
444
445 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
446 pf_exit_vf_mismatch(gt, vfid);
447 pf_exit_vf_wip(gt, vfid);
448 }
449
pf_enter_vf_pause_completed(struct xe_gt * gt,unsigned int vfid)450 static void pf_enter_vf_pause_completed(struct xe_gt *gt, unsigned int vfid)
451 {
452 pf_enter_vf_paused(gt, vfid);
453 }
454
pf_enter_vf_pause_failed(struct xe_gt * gt,unsigned int vfid)455 static void pf_enter_vf_pause_failed(struct xe_gt *gt, unsigned int vfid)
456 {
457 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
458 pf_exit_vf_wip(gt, vfid);
459 }
460
pf_enter_vf_pause_rejected(struct xe_gt * gt,unsigned int vfid)461 static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid)
462 {
463 pf_enter_vf_mismatch(gt, vfid);
464 pf_enter_vf_pause_failed(gt, vfid);
465 }
466
pf_exit_vf_pause_guc_done(struct xe_gt * gt,unsigned int vfid)467 static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
468 {
469 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
470 return false;
471
472 pf_enter_vf_pause_completed(gt, vfid);
473 return true;
474 }
475
pf_enter_vf_pause_guc_done(struct xe_gt * gt,unsigned int vfid)476 static void pf_enter_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
477 {
478 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
479 pf_queue_vf(gt, vfid);
480 }
481
pf_enter_pause_wait_guc(struct xe_gt * gt,unsigned int vfid)482 static void pf_enter_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
483 {
484 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC))
485 pf_enter_vf_state_machine_bug(gt, vfid);
486 }
487
pf_exit_pause_wait_guc(struct xe_gt * gt,unsigned int vfid)488 static bool pf_exit_pause_wait_guc(struct xe_gt *gt, unsigned int vfid)
489 {
490 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
491 }
492
pf_enter_vf_pause_send_pause(struct xe_gt * gt,unsigned int vfid)493 static void pf_enter_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
494 {
495 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
496 pf_enter_vf_state_machine_bug(gt, vfid);
497
498 pf_queue_vf(gt, vfid);
499 }
500
pf_exit_vf_pause_send_pause(struct xe_gt * gt,unsigned int vfid)501 static bool pf_exit_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid)
502 {
503 int err;
504
505 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE))
506 return false;
507
508 /* GuC may actually send a PAUSE_DONE before we get a RESPONSE */
509 pf_enter_pause_wait_guc(gt, vfid);
510
511 err = pf_send_vf_pause(gt, vfid);
512 if (err) {
513 /* send failed, so we shouldn't expect PAUSE_DONE from GuC */
514 pf_exit_pause_wait_guc(gt, vfid);
515
516 if (err == -EBUSY)
517 pf_enter_vf_pause_send_pause(gt, vfid);
518 else if (err == -EIO)
519 pf_enter_vf_pause_rejected(gt, vfid);
520 else
521 pf_enter_vf_pause_failed(gt, vfid);
522 } else {
523 /*
524 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
525 * but since GuC didn't complain, we may clear MISMATCH
526 */
527 pf_exit_vf_mismatch(gt, vfid);
528 }
529
530 return true;
531 }
532
pf_enter_vf_pause_wip(struct xe_gt * gt,unsigned int vfid)533 static bool pf_enter_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
534 {
535 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) {
536 pf_enter_vf_wip(gt, vfid);
537 pf_enter_vf_pause_send_pause(gt, vfid);
538 return true;
539 }
540
541 return false;
542 }
543
544 /**
545 * xe_gt_sriov_pf_control_pause_vf - Pause a VF.
546 * @gt: the &xe_gt
547 * @vfid: the VF identifier
548 *
549 * This function is for PF only.
550 *
551 * Return: 0 on success or a negative error code on failure.
552 */
xe_gt_sriov_pf_control_pause_vf(struct xe_gt * gt,unsigned int vfid)553 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid)
554 {
555 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_PAUSE_WIP);
556 int err;
557
558 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
559 xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
560 return -EPERM;
561 }
562
563 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
564 xe_gt_sriov_dbg(gt, "VF%u was already paused!\n", vfid);
565 return -ESTALE;
566 }
567
568 if (!pf_enter_vf_pause_wip(gt, vfid)) {
569 xe_gt_sriov_dbg(gt, "VF%u pause already in progress!\n", vfid);
570 return -EALREADY;
571 }
572
573 err = pf_wait_vf_wip_done(gt, vfid, timeout);
574 if (err) {
575 xe_gt_sriov_dbg(gt, "VF%u pause didn't finish in %u ms (%pe)\n",
576 vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
577 return err;
578 }
579
580 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
581 xe_gt_sriov_info(gt, "VF%u paused!\n", vfid);
582 return 0;
583 }
584
585 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED)) {
586 xe_gt_sriov_dbg(gt, "VF%u pause failed!\n", vfid);
587 return -EIO;
588 }
589
590 xe_gt_sriov_dbg(gt, "VF%u pause was canceled!\n", vfid);
591 return -ECANCELED;
592 }
593
594 /**
595 * DOC: The VF RESUME state machine
596 *
597 * The VF RESUME state machine looks like::
598 *
599 * (PAUSED)<-----------------<------------------------o
600 * | \
601 * resume \
602 * | \
603 * ....V............................RESUME_WIP...... \
604 * : \ : o
605 * : \ o-------<-----busy : |
606 * : \ / / : |
607 * : RESUME_SEND_RESUME ---failed--->--------o--->(RESUME_FAILED)
608 * : / \ : |
609 * : acked rejected---->---------o--->(MISMATCH)
610 * : / :
611 * :....o..............o...............o.....o.....:
612 * | | | \
613 * completed flr stop restart-->(READY)
614 * | | |
615 * V .....V..... ......V.....
616 * (RESUMED) : FLR_WIP : : STOP_WIP :
617 * :.........: :..........:
618 *
619 * For the full state machine view, see `The VF state machine`_.
620 */
621
pf_exit_vf_resume_wip(struct xe_gt * gt,unsigned int vfid)622 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
623 {
624 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP))
625 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME);
626 }
627
pf_enter_vf_resumed(struct xe_gt * gt,unsigned int vfid)628 static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid)
629 {
630 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
631 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
632 pf_exit_vf_mismatch(gt, vfid);
633 pf_exit_vf_wip(gt, vfid);
634 }
635
pf_enter_vf_resume_completed(struct xe_gt * gt,unsigned int vfid)636 static void pf_enter_vf_resume_completed(struct xe_gt *gt, unsigned int vfid)
637 {
638 pf_enter_vf_resumed(gt, vfid);
639 }
640
pf_enter_vf_resume_failed(struct xe_gt * gt,unsigned int vfid)641 static void pf_enter_vf_resume_failed(struct xe_gt *gt, unsigned int vfid)
642 {
643 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
644 pf_exit_vf_wip(gt, vfid);
645 }
646
pf_enter_vf_resume_rejected(struct xe_gt * gt,unsigned int vfid)647 static void pf_enter_vf_resume_rejected(struct xe_gt *gt, unsigned int vfid)
648 {
649 pf_enter_vf_mismatch(gt, vfid);
650 pf_enter_vf_resume_failed(gt, vfid);
651 }
652
pf_enter_vf_resume_send_resume(struct xe_gt * gt,unsigned int vfid)653 static void pf_enter_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
654 {
655 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
656 pf_enter_vf_state_machine_bug(gt, vfid);
657
658 pf_queue_vf(gt, vfid);
659 }
660
pf_exit_vf_resume_send_resume(struct xe_gt * gt,unsigned int vfid)661 static bool pf_exit_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid)
662 {
663 int err;
664
665 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME))
666 return false;
667
668 err = pf_send_vf_resume(gt, vfid);
669 if (err == -EBUSY)
670 pf_enter_vf_resume_send_resume(gt, vfid);
671 else if (err == -EIO)
672 pf_enter_vf_resume_rejected(gt, vfid);
673 else if (err)
674 pf_enter_vf_resume_failed(gt, vfid);
675 else
676 pf_enter_vf_resume_completed(gt, vfid);
677 return true;
678 }
679
pf_enter_vf_resume_wip(struct xe_gt * gt,unsigned int vfid)680 static bool pf_enter_vf_resume_wip(struct xe_gt *gt, unsigned int vfid)
681 {
682 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) {
683 pf_enter_vf_wip(gt, vfid);
684 pf_enter_vf_resume_send_resume(gt, vfid);
685 return true;
686 }
687
688 return false;
689 }
690
691 /**
692 * xe_gt_sriov_pf_control_resume_vf - Resume a VF.
693 * @gt: the &xe_gt
694 * @vfid: the VF identifier
695 *
696 * This function is for PF only.
697 *
698 * Return: 0 on success or a negative error code on failure.
699 */
xe_gt_sriov_pf_control_resume_vf(struct xe_gt * gt,unsigned int vfid)700 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
701 {
702 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESUME_WIP);
703 int err;
704
705 if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
706 xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
707 return -EPERM;
708 }
709
710 if (!pf_enter_vf_resume_wip(gt, vfid)) {
711 xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid);
712 return -EALREADY;
713 }
714
715 err = pf_wait_vf_wip_done(gt, vfid, timeout);
716 if (err)
717 return err;
718
719 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) {
720 xe_gt_sriov_info(gt, "VF%u resumed!\n", vfid);
721 return 0;
722 }
723
724 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED)) {
725 xe_gt_sriov_dbg(gt, "VF%u resume failed!\n", vfid);
726 return -EIO;
727 }
728
729 xe_gt_sriov_dbg(gt, "VF%u resume was canceled!\n", vfid);
730 return -ECANCELED;
731 }
732
733 /**
734 * DOC: The VF STOP state machine
735 *
736 * The VF STOP state machine looks like::
737 *
738 * (READY,PAUSED,RESUMED)<-------<--------------------o
739 * | \
740 * stop \
741 * | \
742 * ....V..............................STOP_WIP...... \
743 * : \ : o
744 * : \ o----<----busy : |
745 * : \ / / : |
746 * : STOP_SEND_STOP--------failed--->--------o--->(STOP_FAILED)
747 * : / \ : |
748 * : acked rejected-------->--------o--->(MISMATCH)
749 * : / :
750 * :....o..............o...............o...........:
751 * | | |
752 * completed flr restart
753 * | | |
754 * V .....V..... V
755 * (STOPPED) : FLR_WIP : (READY)
756 * :.........:
757 *
758 * For the full state machine view, see `The VF state machine`_.
759 */
760
pf_exit_vf_stop_wip(struct xe_gt * gt,unsigned int vfid)761 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
762 {
763 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP))
764 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP);
765 }
766
pf_enter_vf_stopped(struct xe_gt * gt,unsigned int vfid)767 static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid)
768 {
769 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED))
770 pf_enter_vf_state_machine_bug(gt, vfid);
771
772 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
773 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
774 pf_exit_vf_mismatch(gt, vfid);
775 pf_exit_vf_wip(gt, vfid);
776 }
777
pf_enter_vf_stop_completed(struct xe_gt * gt,unsigned int vfid)778 static void pf_enter_vf_stop_completed(struct xe_gt *gt, unsigned int vfid)
779 {
780 pf_enter_vf_stopped(gt, vfid);
781 }
782
pf_enter_vf_stop_failed(struct xe_gt * gt,unsigned int vfid)783 static void pf_enter_vf_stop_failed(struct xe_gt *gt, unsigned int vfid)
784 {
785 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED);
786 pf_exit_vf_wip(gt, vfid);
787 }
788
pf_enter_vf_stop_rejected(struct xe_gt * gt,unsigned int vfid)789 static void pf_enter_vf_stop_rejected(struct xe_gt *gt, unsigned int vfid)
790 {
791 pf_enter_vf_mismatch(gt, vfid);
792 pf_enter_vf_stop_failed(gt, vfid);
793 }
794
pf_enter_vf_stop_send_stop(struct xe_gt * gt,unsigned int vfid)795 static void pf_enter_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
796 {
797 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
798 pf_enter_vf_state_machine_bug(gt, vfid);
799
800 pf_queue_vf(gt, vfid);
801 }
802
pf_exit_vf_stop_send_stop(struct xe_gt * gt,unsigned int vfid)803 static bool pf_exit_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid)
804 {
805 int err;
806
807 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP))
808 return false;
809
810 err = pf_send_vf_stop(gt, vfid);
811 if (err == -EBUSY)
812 pf_enter_vf_stop_send_stop(gt, vfid);
813 else if (err == -EIO)
814 pf_enter_vf_stop_rejected(gt, vfid);
815 else if (err)
816 pf_enter_vf_stop_failed(gt, vfid);
817 else
818 pf_enter_vf_stop_completed(gt, vfid);
819 return true;
820 }
821
pf_enter_vf_stop_wip(struct xe_gt * gt,unsigned int vfid)822 static bool pf_enter_vf_stop_wip(struct xe_gt *gt, unsigned int vfid)
823 {
824 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) {
825 pf_enter_vf_wip(gt, vfid);
826 pf_enter_vf_stop_send_stop(gt, vfid);
827 return true;
828 }
829 return false;
830 }
831
832 /**
833 * xe_gt_sriov_pf_control_stop_vf - Stop a VF.
834 * @gt: the &xe_gt
835 * @vfid: the VF identifier
836 *
837 * This function is for PF only.
838 *
839 * Return: 0 on success or a negative error code on failure.
840 */
xe_gt_sriov_pf_control_stop_vf(struct xe_gt * gt,unsigned int vfid)841 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid)
842 {
843 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_STOP_WIP);
844 int err;
845
846 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
847 xe_gt_sriov_dbg(gt, "VF%u was already stopped!\n", vfid);
848 return -ESTALE;
849 }
850
851 if (!pf_enter_vf_stop_wip(gt, vfid)) {
852 xe_gt_sriov_dbg(gt, "VF%u stop already in progress!\n", vfid);
853 return -EALREADY;
854 }
855
856 err = pf_wait_vf_wip_done(gt, vfid, timeout);
857 if (err)
858 return err;
859
860 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
861 xe_gt_sriov_info(gt, "VF%u stopped!\n", vfid);
862 return 0;
863 }
864
865 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED)) {
866 xe_gt_sriov_dbg(gt, "VF%u stop failed!\n", vfid);
867 return -EIO;
868 }
869
870 xe_gt_sriov_dbg(gt, "VF%u stop was canceled!\n", vfid);
871 return -ECANCELED;
872 }
873
874 /**
875 * DOC: The VF FLR state machine
876 *
877 * The VF FLR state machine looks like::
878 *
879 * (READY,PAUSED,STOPPED)<------------<--------------o
880 * | \
881 * flr \
882 * | \
883 * ....V..........................FLR_WIP........... \
884 * : \ : \
885 * : \ o----<----busy : |
886 * : \ / / : |
887 * : FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o
888 * : | \ : | |
889 * : acked rejected----->-----------o--->(MISMATCH) |
890 * : | : ^ |
891 * : v : | |
892 * : FLR_WAIT_GUC : | |
893 * : | : | |
894 * : done : | |
895 * : | : | |
896 * : v : | |
897 * : FLR_GUC_DONE : | |
898 * : | : | |
899 * : FLR_RESET_CONFIG---failed--->-----------o--------+-----------o
900 * : | : | |
901 * : FLR_RESET_DATA : | |
902 * : | : | |
903 * : FLR_RESET_MMIO : | |
904 * : | : | |
905 * : | o----<----busy : | |
906 * : |/ / : | |
907 * : FLR_SEND_FINISH----failed--->-----------o--------+-----------o
908 * : / \ : |
909 * : acked rejected----->-----------o--------o
910 * : / :
911 * :....o..............................o...........:
912 * | |
913 * completed restart
914 * | /
915 * V /
916 * (READY)<----------<------------o
917 *
918 * For the full state machine view, see `The VF state machine`_.
919 */
920
pf_enter_vf_flr_send_start(struct xe_gt * gt,unsigned int vfid)921 static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
922 {
923 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
924 pf_enter_vf_state_machine_bug(gt, vfid);
925
926 pf_queue_vf(gt, vfid);
927 }
928
pf_enter_vf_flr_wip(struct xe_gt * gt,unsigned int vfid)929 static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
930 {
931 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
932 xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid);
933 return;
934 }
935
936 pf_enter_vf_wip(gt, vfid);
937 pf_enter_vf_flr_send_start(gt, vfid);
938 }
939
pf_exit_vf_flr_wip(struct xe_gt * gt,unsigned int vfid)940 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
941 {
942 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
943 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH);
944 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO);
945 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA);
946 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
947 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE);
948 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
949 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START);
950 }
951 }
952
pf_enter_vf_flr_completed(struct xe_gt * gt,unsigned int vfid)953 static void pf_enter_vf_flr_completed(struct xe_gt *gt, unsigned int vfid)
954 {
955 pf_enter_vf_ready(gt, vfid);
956 }
957
pf_enter_vf_flr_failed(struct xe_gt * gt,unsigned int vfid)958 static void pf_enter_vf_flr_failed(struct xe_gt *gt, unsigned int vfid)
959 {
960 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
961 xe_gt_sriov_notice(gt, "VF%u FLR failed!\n", vfid);
962 pf_exit_vf_wip(gt, vfid);
963 }
964
pf_enter_vf_flr_rejected(struct xe_gt * gt,unsigned int vfid)965 static void pf_enter_vf_flr_rejected(struct xe_gt *gt, unsigned int vfid)
966 {
967 pf_enter_vf_mismatch(gt, vfid);
968 pf_enter_vf_flr_failed(gt, vfid);
969 }
970
pf_enter_vf_flr_send_finish(struct xe_gt * gt,unsigned int vfid)971 static void pf_enter_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
972 {
973 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
974 pf_enter_vf_state_machine_bug(gt, vfid);
975
976 pf_queue_vf(gt, vfid);
977 }
978
pf_exit_vf_flr_send_finish(struct xe_gt * gt,unsigned int vfid)979 static bool pf_exit_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid)
980 {
981 int err;
982
983 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH))
984 return false;
985
986 err = pf_send_vf_flr_finish(gt, vfid);
987 if (err == -EBUSY)
988 pf_enter_vf_flr_send_finish(gt, vfid);
989 else if (err == -EIO)
990 pf_enter_vf_flr_rejected(gt, vfid);
991 else if (err)
992 pf_enter_vf_flr_failed(gt, vfid);
993 else
994 pf_enter_vf_flr_completed(gt, vfid);
995 return true;
996 }
997
pf_enter_vf_flr_reset_mmio(struct xe_gt * gt,unsigned int vfid)998 static void pf_enter_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
999 {
1000 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1001 pf_enter_vf_state_machine_bug(gt, vfid);
1002
1003 pf_queue_vf(gt, vfid);
1004 }
1005
pf_exit_vf_flr_reset_mmio(struct xe_gt * gt,unsigned int vfid)1006 static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
1007 {
1008 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
1009 return false;
1010
1011 /* XXX: placeholder */
1012
1013 pf_enter_vf_flr_send_finish(gt, vfid);
1014 return true;
1015 }
1016
pf_enter_vf_flr_reset_data(struct xe_gt * gt,unsigned int vfid)1017 static void pf_enter_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1018 {
1019 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1020 pf_enter_vf_state_machine_bug(gt, vfid);
1021
1022 pf_queue_vf(gt, vfid);
1023 }
1024
pf_exit_vf_flr_reset_data(struct xe_gt * gt,unsigned int vfid)1025 static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
1026 {
1027 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
1028 return false;
1029
1030 xe_gt_sriov_pf_service_reset(gt, vfid);
1031 xe_gt_sriov_pf_monitor_flr(gt, vfid);
1032
1033 pf_enter_vf_flr_reset_mmio(gt, vfid);
1034 return true;
1035 }
1036
pf_enter_vf_flr_reset_config(struct xe_gt * gt,unsigned int vfid)1037 static void pf_enter_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1038 {
1039 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1040 pf_enter_vf_state_machine_bug(gt, vfid);
1041
1042 pf_queue_vf(gt, vfid);
1043 }
1044
pf_exit_vf_flr_reset_config(struct xe_gt * gt,unsigned int vfid)1045 static bool pf_exit_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid)
1046 {
1047 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_RESET_CONFIG);
1048 int err;
1049
1050 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG))
1051 return false;
1052
1053 err = xe_gt_sriov_pf_config_sanitize(gt, vfid, timeout);
1054 if (err)
1055 pf_enter_vf_flr_failed(gt, vfid);
1056 else
1057 pf_enter_vf_flr_reset_data(gt, vfid);
1058 return true;
1059 }
1060
pf_enter_vf_flr_wait_guc(struct xe_gt * gt,unsigned int vfid)1061 static void pf_enter_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1062 {
1063 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC))
1064 pf_enter_vf_state_machine_bug(gt, vfid);
1065 }
1066
pf_exit_vf_flr_wait_guc(struct xe_gt * gt,unsigned int vfid)1067 static bool pf_exit_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid)
1068 {
1069 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
1070 }
1071
pf_exit_vf_flr_send_start(struct xe_gt * gt,unsigned int vfid)1072 static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
1073 {
1074 int err;
1075
1076 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START))
1077 return false;
1078
1079 /* GuC may actually send a FLR_DONE before we get a RESPONSE */
1080 pf_enter_vf_flr_wait_guc(gt, vfid);
1081
1082 err = pf_send_vf_flr_start(gt, vfid);
1083 if (err) {
1084 /* send failed, so we shouldn't expect FLR_DONE from GuC */
1085 pf_exit_vf_flr_wait_guc(gt, vfid);
1086
1087 if (err == -EBUSY)
1088 pf_enter_vf_flr_send_start(gt, vfid);
1089 else if (err == -EIO)
1090 pf_enter_vf_flr_rejected(gt, vfid);
1091 else
1092 pf_enter_vf_flr_failed(gt, vfid);
1093 } else {
1094 /*
1095 * we have already moved to WAIT_GUC, maybe even to GUC_DONE
1096 * but since GuC didn't complain, we may clear MISMATCH
1097 */
1098 pf_exit_vf_mismatch(gt, vfid);
1099 }
1100
1101 return true;
1102 }
1103
pf_exit_vf_flr_guc_done(struct xe_gt * gt,unsigned int vfid)1104 static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1105 {
1106 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1107 return false;
1108
1109 pf_enter_vf_flr_reset_config(gt, vfid);
1110 return true;
1111 }
1112
pf_enter_vf_flr_guc_done(struct xe_gt * gt,unsigned int vfid)1113 static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
1114 {
1115 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
1116 pf_queue_vf(gt, vfid);
1117 }
1118
1119 /**
1120 * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence.
1121 * @gt: the &xe_gt
1122 * @vfid: the VF identifier
1123 *
1124 * This function is for PF only.
1125 *
1126 * Return: 0 on success or a negative error code on failure.
1127 */
xe_gt_sriov_pf_control_trigger_flr(struct xe_gt * gt,unsigned int vfid)1128 int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid)
1129 {
1130 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP);
1131 int err;
1132
1133 pf_enter_vf_flr_wip(gt, vfid);
1134
1135 err = pf_wait_vf_wip_done(gt, vfid, timeout);
1136 if (err) {
1137 xe_gt_sriov_notice(gt, "VF%u FLR didn't finish in %u ms (%pe)\n",
1138 vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
1139 return err;
1140 }
1141
1142 if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
1143 return -EIO;
1144
1145 return 0;
1146 }
1147
1148 /**
1149 * DOC: The VF FLR Flow with GuC
1150 *
1151 * The VF FLR flow includes several steps::
1152 *
1153 * PF GUC PCI
1154 * ========================================================
1155 * | | |
1156 * (1) | [ ] <----- FLR --|
1157 * | [ ] :
1158 * (2) [ ] <-------- NOTIFY FLR --[ ]
1159 * [ ] |
1160 * (3) [ ] |
1161 * [ ] |
1162 * [ ]-- START FLR ---------> [ ]
1163 * | [ ]
1164 * (4) | [ ]
1165 * | [ ]
1166 * [ ] <--------- FLR DONE -- [ ]
1167 * [ ] |
1168 * (5) [ ] |
1169 * [ ] |
1170 * [ ]-- FINISH FLR --------> [ ]
1171 * | |
1172 *
1173 * * Step 1: PCI HW generates interrupt to the GuC about VF FLR
1174 * * Step 2: GuC FW sends G2H notification to the PF about VF FLR
1175 * * Step 2a: on some platforms G2H is only received from root GuC
1176 * * Step 3: PF sends H2G request to the GuC to start VF FLR sequence
1177 * * Step 3a: on some platforms PF must send H2G to all other GuCs
1178 * * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done
1179 * * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished
1180 */
1181
needs_dispatch_flr(struct xe_device * xe)1182 static bool needs_dispatch_flr(struct xe_device *xe)
1183 {
1184 return xe->info.platform == XE_PVC;
1185 }
1186
pf_handle_vf_flr(struct xe_gt * gt,u32 vfid)1187 static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid)
1188 {
1189 struct xe_device *xe = gt_to_xe(gt);
1190 struct xe_gt *gtit;
1191 unsigned int gtid;
1192
1193 xe_gt_sriov_info(gt, "VF%u FLR\n", vfid);
1194
1195 if (needs_dispatch_flr(xe)) {
1196 for_each_gt(gtit, xe, gtid)
1197 pf_enter_vf_flr_wip(gtit, vfid);
1198 } else {
1199 pf_enter_vf_flr_wip(gt, vfid);
1200 }
1201 }
1202
pf_handle_vf_flr_done(struct xe_gt * gt,u32 vfid)1203 static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid)
1204 {
1205 if (!pf_exit_vf_flr_wait_guc(gt, vfid)) {
1206 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u FLR done'\n", vfid);
1207 pf_enter_vf_mismatch(gt, vfid);
1208 return;
1209 }
1210
1211 pf_enter_vf_flr_guc_done(gt, vfid);
1212 }
1213
pf_handle_vf_pause_done(struct xe_gt * gt,u32 vfid)1214 static void pf_handle_vf_pause_done(struct xe_gt *gt, u32 vfid)
1215 {
1216 if (!pf_exit_pause_wait_guc(gt, vfid)) {
1217 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u PAUSE done'\n", vfid);
1218 pf_enter_vf_mismatch(gt, vfid);
1219 return;
1220 }
1221
1222 pf_enter_vf_pause_guc_done(gt, vfid);
1223 }
1224
pf_handle_vf_event(struct xe_gt * gt,u32 vfid,u32 eventid)1225 static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid)
1226 {
1227 xe_gt_sriov_dbg_verbose(gt, "received VF%u event %#x\n", vfid, eventid);
1228
1229 if (vfid > xe_gt_sriov_pf_get_totalvfs(gt))
1230 return -EPROTO;
1231
1232 switch (eventid) {
1233 case GUC_PF_NOTIFY_VF_FLR:
1234 pf_handle_vf_flr(gt, vfid);
1235 break;
1236 case GUC_PF_NOTIFY_VF_FLR_DONE:
1237 pf_handle_vf_flr_done(gt, vfid);
1238 break;
1239 case GUC_PF_NOTIFY_VF_PAUSE_DONE:
1240 pf_handle_vf_pause_done(gt, vfid);
1241 break;
1242 case GUC_PF_NOTIFY_VF_FIXUP_DONE:
1243 break;
1244 default:
1245 return -ENOPKG;
1246 }
1247 return 0;
1248 }
1249
pf_handle_pf_event(struct xe_gt * gt,u32 eventid)1250 static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid)
1251 {
1252 switch (eventid) {
1253 case GUC_PF_NOTIFY_VF_ENABLE:
1254 xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n",
1255 str_enabled_disabled(true),
1256 str_enabled_disabled(false));
1257 break;
1258 default:
1259 return -ENOPKG;
1260 }
1261 return 0;
1262 }
1263
1264 /**
1265 * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC.
1266 * @gt: the &xe_gt
1267 * @msg: the G2H message
1268 * @len: the length of the G2H message
1269 *
1270 * This function is for PF only.
1271 *
1272 * Return: 0 on success or a negative error code on failure.
1273 */
xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt * gt,const u32 * msg,u32 len)1274 int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
1275 {
1276 u32 vfid;
1277 u32 eventid;
1278
1279 xe_gt_assert(gt, len);
1280 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
1281 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
1282 xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
1283 GUC_ACTION_GUC2PF_VF_STATE_NOTIFY);
1284
1285 if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt))))
1286 return -EPROTO;
1287
1288 if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0])))
1289 return -EPFNOSUPPORT;
1290
1291 if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN))
1292 return -EPROTO;
1293
1294 vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]);
1295 eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]);
1296
1297 return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid);
1298 }
1299
pf_process_vf_state_machine(struct xe_gt * gt,unsigned int vfid)1300 static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid)
1301 {
1302 if (pf_exit_vf_flr_send_start(gt, vfid))
1303 return true;
1304
1305 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) {
1306 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1307 control_bit_to_string(XE_GT_SRIOV_STATE_FLR_WAIT_GUC));
1308 return false;
1309 }
1310
1311 if (pf_exit_vf_flr_guc_done(gt, vfid))
1312 return true;
1313
1314 if (pf_exit_vf_flr_reset_config(gt, vfid))
1315 return true;
1316
1317 if (pf_exit_vf_flr_reset_data(gt, vfid))
1318 return true;
1319
1320 if (pf_exit_vf_flr_reset_mmio(gt, vfid))
1321 return true;
1322
1323 if (pf_exit_vf_flr_send_finish(gt, vfid))
1324 return true;
1325
1326 if (pf_exit_vf_stop_send_stop(gt, vfid))
1327 return true;
1328
1329 if (pf_exit_vf_pause_send_pause(gt, vfid))
1330 return true;
1331
1332 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) {
1333 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
1334 control_bit_to_string(XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC));
1335 return true;
1336 }
1337
1338 if (pf_exit_vf_pause_guc_done(gt, vfid))
1339 return true;
1340
1341 if (pf_exit_vf_resume_send_resume(gt, vfid))
1342 return true;
1343
1344 return false;
1345 }
1346
pf_control_state_index(struct xe_gt * gt,struct xe_gt_sriov_control_state * cs)1347 static unsigned int pf_control_state_index(struct xe_gt *gt,
1348 struct xe_gt_sriov_control_state *cs)
1349 {
1350 return container_of(cs, struct xe_gt_sriov_metadata, control) - gt->sriov.pf.vfs;
1351 }
1352
pf_worker_find_work(struct xe_gt * gt)1353 static void pf_worker_find_work(struct xe_gt *gt)
1354 {
1355 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control;
1356 struct xe_gt_sriov_control_state *cs;
1357 unsigned int vfid;
1358 bool empty;
1359 bool more;
1360
1361 spin_lock(&pfc->lock);
1362 cs = list_first_entry_or_null(&pfc->list, struct xe_gt_sriov_control_state, link);
1363 if (cs)
1364 list_del_init(&cs->link);
1365 empty = list_empty(&pfc->list);
1366 spin_unlock(&pfc->lock);
1367
1368 if (!cs)
1369 return;
1370
1371 /* VF metadata structures are indexed by the VFID */
1372 vfid = pf_control_state_index(gt, cs);
1373 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt));
1374
1375 more = pf_process_vf_state_machine(gt, vfid);
1376 if (more)
1377 pf_queue_vf(gt, vfid);
1378 else if (!empty)
1379 pf_queue_control_worker(gt);
1380 }
1381
control_worker_func(struct work_struct * w)1382 static void control_worker_func(struct work_struct *w)
1383 {
1384 struct xe_gt *gt = container_of(w, struct xe_gt, sriov.pf.control.worker);
1385
1386 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
1387 pf_worker_find_work(gt);
1388 }
1389
pf_stop_worker(struct xe_gt * gt)1390 static void pf_stop_worker(struct xe_gt *gt)
1391 {
1392 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
1393 cancel_work_sync(>->sriov.pf.control.worker);
1394 }
1395
control_fini_action(struct drm_device * dev,void * data)1396 static void control_fini_action(struct drm_device *dev, void *data)
1397 {
1398 struct xe_gt *gt = data;
1399
1400 pf_stop_worker(gt);
1401 }
1402
1403 /**
1404 * xe_gt_sriov_pf_control_init() - Initialize PF's control data.
1405 * @gt: the &xe_gt
1406 *
1407 * This function is for PF only.
1408 *
1409 * Return: 0 on success or a negative error code on failure.
1410 */
xe_gt_sriov_pf_control_init(struct xe_gt * gt)1411 int xe_gt_sriov_pf_control_init(struct xe_gt *gt)
1412 {
1413 struct xe_device *xe = gt_to_xe(gt);
1414 unsigned int n, totalvfs;
1415
1416 xe_gt_assert(gt, IS_SRIOV_PF(xe));
1417
1418 totalvfs = xe_sriov_pf_get_totalvfs(xe);
1419 for (n = 0; n <= totalvfs; n++) {
1420 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, n);
1421
1422 init_completion(&cs->done);
1423 INIT_LIST_HEAD(&cs->link);
1424 }
1425
1426 spin_lock_init(>->sriov.pf.control.lock);
1427 INIT_LIST_HEAD(>->sriov.pf.control.list);
1428 INIT_WORK(>->sriov.pf.control.worker, control_worker_func);
1429
1430 return drmm_add_action_or_reset(&xe->drm, control_fini_action, gt);
1431 }
1432
1433 /**
1434 * xe_gt_sriov_pf_control_restart() - Restart SR-IOV control data after a GT reset.
1435 * @gt: the &xe_gt
1436 *
1437 * Any per-VF status maintained by the PF or any ongoing VF control activity
1438 * performed by the PF must be reset or cancelled when the GT is reset.
1439 *
1440 * This function is for PF only.
1441 */
xe_gt_sriov_pf_control_restart(struct xe_gt * gt)1442 void xe_gt_sriov_pf_control_restart(struct xe_gt *gt)
1443 {
1444 struct xe_device *xe = gt_to_xe(gt);
1445 unsigned int n, totalvfs;
1446
1447 xe_gt_assert(gt, IS_SRIOV_PF(xe));
1448
1449 pf_stop_worker(gt);
1450
1451 totalvfs = xe_sriov_pf_get_totalvfs(xe);
1452 for (n = 1; n <= totalvfs; n++)
1453 pf_enter_vf_ready(gt, n);
1454 }
1455