1 // SPDX-License-Identifier: GPL-2.0+
2 /* Copyright (c) 2016-2017 Hisilicon Limited. */
3
4 #include <linux/sched/clock.h>
5
6 #include "hclge_err.h"
7
8 static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = {
9 {
10 .int_msk = BIT(1),
11 .msg = "imp_itcm0_ecc_mbit_err",
12 .reset_level = HNAE3_NONE_RESET
13 }, {
14 .int_msk = BIT(3),
15 .msg = "imp_itcm1_ecc_mbit_err",
16 .reset_level = HNAE3_NONE_RESET
17 }, {
18 .int_msk = BIT(5),
19 .msg = "imp_itcm2_ecc_mbit_err",
20 .reset_level = HNAE3_NONE_RESET
21 }, {
22 .int_msk = BIT(7),
23 .msg = "imp_itcm3_ecc_mbit_err",
24 .reset_level = HNAE3_NONE_RESET
25 }, {
26 .int_msk = BIT(9),
27 .msg = "imp_dtcm0_mem0_ecc_mbit_err",
28 .reset_level = HNAE3_NONE_RESET
29 }, {
30 .int_msk = BIT(11),
31 .msg = "imp_dtcm0_mem1_ecc_mbit_err",
32 .reset_level = HNAE3_NONE_RESET
33 }, {
34 .int_msk = BIT(13),
35 .msg = "imp_dtcm1_mem0_ecc_mbit_err",
36 .reset_level = HNAE3_NONE_RESET
37 }, {
38 .int_msk = BIT(15),
39 .msg = "imp_dtcm1_mem1_ecc_mbit_err",
40 .reset_level = HNAE3_NONE_RESET
41 }, {
42 .int_msk = BIT(17),
43 .msg = "imp_itcm4_ecc_mbit_err",
44 .reset_level = HNAE3_NONE_RESET
45 }, {
46 /* sentinel */
47 }
48 };
49
50 static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = {
51 {
52 .int_msk = BIT(1),
53 .msg = "cmdq_nic_rx_depth_ecc_mbit_err",
54 .reset_level = HNAE3_NONE_RESET
55 }, {
56 .int_msk = BIT(3),
57 .msg = "cmdq_nic_tx_depth_ecc_mbit_err",
58 .reset_level = HNAE3_NONE_RESET
59 }, {
60 .int_msk = BIT(5),
61 .msg = "cmdq_nic_rx_tail_ecc_mbit_err",
62 .reset_level = HNAE3_NONE_RESET
63 }, {
64 .int_msk = BIT(7),
65 .msg = "cmdq_nic_tx_tail_ecc_mbit_err",
66 .reset_level = HNAE3_NONE_RESET
67 }, {
68 .int_msk = BIT(9),
69 .msg = "cmdq_nic_rx_head_ecc_mbit_err",
70 .reset_level = HNAE3_NONE_RESET
71 }, {
72 .int_msk = BIT(11),
73 .msg = "cmdq_nic_tx_head_ecc_mbit_err",
74 .reset_level = HNAE3_NONE_RESET
75 }, {
76 .int_msk = BIT(13),
77 .msg = "cmdq_nic_rx_addr_ecc_mbit_err",
78 .reset_level = HNAE3_NONE_RESET
79 }, {
80 .int_msk = BIT(15),
81 .msg = "cmdq_nic_tx_addr_ecc_mbit_err",
82 .reset_level = HNAE3_NONE_RESET
83 }, {
84 .int_msk = BIT(17),
85 .msg = "cmdq_rocee_rx_depth_ecc_mbit_err",
86 .reset_level = HNAE3_NONE_RESET
87 }, {
88 .int_msk = BIT(19),
89 .msg = "cmdq_rocee_tx_depth_ecc_mbit_err",
90 .reset_level = HNAE3_NONE_RESET
91 }, {
92 .int_msk = BIT(21),
93 .msg = "cmdq_rocee_rx_tail_ecc_mbit_err",
94 .reset_level = HNAE3_NONE_RESET
95 }, {
96 .int_msk = BIT(23),
97 .msg = "cmdq_rocee_tx_tail_ecc_mbit_err",
98 .reset_level = HNAE3_NONE_RESET
99 }, {
100 .int_msk = BIT(25),
101 .msg = "cmdq_rocee_rx_head_ecc_mbit_err",
102 .reset_level = HNAE3_NONE_RESET
103 }, {
104 .int_msk = BIT(27),
105 .msg = "cmdq_rocee_tx_head_ecc_mbit_err",
106 .reset_level = HNAE3_NONE_RESET
107 }, {
108 .int_msk = BIT(29),
109 .msg = "cmdq_rocee_rx_addr_ecc_mbit_err",
110 .reset_level = HNAE3_NONE_RESET
111 }, {
112 .int_msk = BIT(31),
113 .msg = "cmdq_rocee_tx_addr_ecc_mbit_err",
114 .reset_level = HNAE3_NONE_RESET
115 }, {
116 /* sentinel */
117 }
118 };
119
120 static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = {
121 {
122 .int_msk = BIT(6),
123 .msg = "tqp_int_cfg_even_ecc_mbit_err",
124 .reset_level = HNAE3_NONE_RESET
125 }, {
126 .int_msk = BIT(7),
127 .msg = "tqp_int_cfg_odd_ecc_mbit_err",
128 .reset_level = HNAE3_NONE_RESET
129 }, {
130 .int_msk = BIT(8),
131 .msg = "tqp_int_ctrl_even_ecc_mbit_err",
132 .reset_level = HNAE3_NONE_RESET
133 }, {
134 .int_msk = BIT(9),
135 .msg = "tqp_int_ctrl_odd_ecc_mbit_err",
136 .reset_level = HNAE3_NONE_RESET
137 }, {
138 .int_msk = BIT(10),
139 .msg = "tx_que_scan_int_ecc_mbit_err",
140 .reset_level = HNAE3_NONE_RESET
141 }, {
142 .int_msk = BIT(11),
143 .msg = "rx_que_scan_int_ecc_mbit_err",
144 .reset_level = HNAE3_NONE_RESET
145 }, {
146 /* sentinel */
147 }
148 };
149
150 static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = {
151 {
152 .int_msk = BIT(1),
153 .msg = "msix_nic_ecc_mbit_err",
154 .reset_level = HNAE3_NONE_RESET
155 }, {
156 .int_msk = BIT(3),
157 .msg = "msix_rocee_ecc_mbit_err",
158 .reset_level = HNAE3_NONE_RESET
159 }, {
160 /* sentinel */
161 }
162 };
163
164 static const struct hclge_hw_error hclge_igu_int[] = {
165 {
166 .int_msk = BIT(0),
167 .msg = "igu_rx_buf0_ecc_mbit_err",
168 .reset_level = HNAE3_GLOBAL_RESET
169 }, {
170 .int_msk = BIT(2),
171 .msg = "igu_rx_buf1_ecc_mbit_err",
172 .reset_level = HNAE3_GLOBAL_RESET
173 }, {
174 /* sentinel */
175 }
176 };
177
178 static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = {
179 {
180 .int_msk = BIT(0),
181 .msg = "rx_buf_overflow",
182 .reset_level = HNAE3_GLOBAL_RESET
183 }, {
184 .int_msk = BIT(1),
185 .msg = "rx_stp_fifo_overflow",
186 .reset_level = HNAE3_GLOBAL_RESET
187 }, {
188 .int_msk = BIT(2),
189 .msg = "rx_stp_fifo_underflow",
190 .reset_level = HNAE3_GLOBAL_RESET
191 }, {
192 .int_msk = BIT(3),
193 .msg = "tx_buf_overflow",
194 .reset_level = HNAE3_GLOBAL_RESET
195 }, {
196 .int_msk = BIT(4),
197 .msg = "tx_buf_underrun",
198 .reset_level = HNAE3_GLOBAL_RESET
199 }, {
200 .int_msk = BIT(5),
201 .msg = "rx_stp_buf_overflow",
202 .reset_level = HNAE3_GLOBAL_RESET
203 }, {
204 /* sentinel */
205 }
206 };
207
208 static const struct hclge_hw_error hclge_ncsi_err_int[] = {
209 {
210 .int_msk = BIT(1),
211 .msg = "ncsi_tx_ecc_mbit_err",
212 .reset_level = HNAE3_NONE_RESET
213 }, {
214 /* sentinel */
215 }
216 };
217
218 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = {
219 {
220 .int_msk = BIT(0),
221 .msg = "vf_vlan_ad_mem_ecc_mbit_err",
222 .reset_level = HNAE3_GLOBAL_RESET
223 }, {
224 .int_msk = BIT(1),
225 .msg = "umv_mcast_group_mem_ecc_mbit_err",
226 .reset_level = HNAE3_GLOBAL_RESET
227 }, {
228 .int_msk = BIT(2),
229 .msg = "umv_key_mem0_ecc_mbit_err",
230 .reset_level = HNAE3_GLOBAL_RESET
231 }, {
232 .int_msk = BIT(3),
233 .msg = "umv_key_mem1_ecc_mbit_err",
234 .reset_level = HNAE3_GLOBAL_RESET
235 }, {
236 .int_msk = BIT(4),
237 .msg = "umv_key_mem2_ecc_mbit_err",
238 .reset_level = HNAE3_GLOBAL_RESET
239 }, {
240 .int_msk = BIT(5),
241 .msg = "umv_key_mem3_ecc_mbit_err",
242 .reset_level = HNAE3_GLOBAL_RESET
243 }, {
244 .int_msk = BIT(6),
245 .msg = "umv_ad_mem_ecc_mbit_err",
246 .reset_level = HNAE3_GLOBAL_RESET
247 }, {
248 .int_msk = BIT(7),
249 .msg = "rss_tc_mode_mem_ecc_mbit_err",
250 .reset_level = HNAE3_GLOBAL_RESET
251 }, {
252 .int_msk = BIT(8),
253 .msg = "rss_idt_mem0_ecc_mbit_err",
254 .reset_level = HNAE3_GLOBAL_RESET
255 }, {
256 .int_msk = BIT(9),
257 .msg = "rss_idt_mem1_ecc_mbit_err",
258 .reset_level = HNAE3_GLOBAL_RESET
259 }, {
260 .int_msk = BIT(10),
261 .msg = "rss_idt_mem2_ecc_mbit_err",
262 .reset_level = HNAE3_GLOBAL_RESET
263 }, {
264 .int_msk = BIT(11),
265 .msg = "rss_idt_mem3_ecc_mbit_err",
266 .reset_level = HNAE3_GLOBAL_RESET
267 }, {
268 .int_msk = BIT(12),
269 .msg = "rss_idt_mem4_ecc_mbit_err",
270 .reset_level = HNAE3_GLOBAL_RESET
271 }, {
272 .int_msk = BIT(13),
273 .msg = "rss_idt_mem5_ecc_mbit_err",
274 .reset_level = HNAE3_GLOBAL_RESET
275 }, {
276 .int_msk = BIT(14),
277 .msg = "rss_idt_mem6_ecc_mbit_err",
278 .reset_level = HNAE3_GLOBAL_RESET
279 }, {
280 .int_msk = BIT(15),
281 .msg = "rss_idt_mem7_ecc_mbit_err",
282 .reset_level = HNAE3_GLOBAL_RESET
283 }, {
284 .int_msk = BIT(16),
285 .msg = "rss_idt_mem8_ecc_mbit_err",
286 .reset_level = HNAE3_GLOBAL_RESET
287 }, {
288 .int_msk = BIT(17),
289 .msg = "rss_idt_mem9_ecc_mbit_err",
290 .reset_level = HNAE3_GLOBAL_RESET
291 }, {
292 .int_msk = BIT(18),
293 .msg = "rss_idt_mem10_ecc_mbit_err",
294 .reset_level = HNAE3_GLOBAL_RESET
295 }, {
296 .int_msk = BIT(19),
297 .msg = "rss_idt_mem11_ecc_mbit_err",
298 .reset_level = HNAE3_GLOBAL_RESET
299 }, {
300 .int_msk = BIT(20),
301 .msg = "rss_idt_mem12_ecc_mbit_err",
302 .reset_level = HNAE3_GLOBAL_RESET
303 }, {
304 .int_msk = BIT(21),
305 .msg = "rss_idt_mem13_ecc_mbit_err",
306 .reset_level = HNAE3_GLOBAL_RESET
307 }, {
308 .int_msk = BIT(22),
309 .msg = "rss_idt_mem14_ecc_mbit_err",
310 .reset_level = HNAE3_GLOBAL_RESET
311 }, {
312 .int_msk = BIT(23),
313 .msg = "rss_idt_mem15_ecc_mbit_err",
314 .reset_level = HNAE3_GLOBAL_RESET
315 }, {
316 .int_msk = BIT(24),
317 .msg = "port_vlan_mem_ecc_mbit_err",
318 .reset_level = HNAE3_GLOBAL_RESET
319 }, {
320 .int_msk = BIT(25),
321 .msg = "mcast_linear_table_mem_ecc_mbit_err",
322 .reset_level = HNAE3_GLOBAL_RESET
323 }, {
324 .int_msk = BIT(26),
325 .msg = "mcast_result_mem_ecc_mbit_err",
326 .reset_level = HNAE3_GLOBAL_RESET
327 }, {
328 .int_msk = BIT(27),
329 .msg = "flow_director_ad_mem0_ecc_mbit_err",
330 .reset_level = HNAE3_GLOBAL_RESET
331 }, {
332 .int_msk = BIT(28),
333 .msg = "flow_director_ad_mem1_ecc_mbit_err",
334 .reset_level = HNAE3_GLOBAL_RESET
335 }, {
336 .int_msk = BIT(29),
337 .msg = "rx_vlan_tag_memory_ecc_mbit_err",
338 .reset_level = HNAE3_GLOBAL_RESET
339 }, {
340 .int_msk = BIT(30),
341 .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err",
342 .reset_level = HNAE3_GLOBAL_RESET
343 }, {
344 /* sentinel */
345 }
346 };
347
348 static const struct hclge_hw_error hclge_ppp_pf_abnormal_int[] = {
349 {
350 .int_msk = BIT(0),
351 .msg = "tx_vlan_tag_err",
352 .reset_level = HNAE3_NONE_RESET
353 }, {
354 .int_msk = BIT(1),
355 .msg = "rss_list_tc_unassigned_queue_err",
356 .reset_level = HNAE3_NONE_RESET
357 }, {
358 /* sentinel */
359 }
360 };
361
362 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = {
363 {
364 .int_msk = BIT(0),
365 .msg = "hfs_fifo_mem_ecc_mbit_err",
366 .reset_level = HNAE3_GLOBAL_RESET
367 }, {
368 .int_msk = BIT(1),
369 .msg = "rslt_descr_fifo_mem_ecc_mbit_err",
370 .reset_level = HNAE3_GLOBAL_RESET
371 }, {
372 .int_msk = BIT(2),
373 .msg = "tx_vlan_tag_mem_ecc_mbit_err",
374 .reset_level = HNAE3_GLOBAL_RESET
375 }, {
376 .int_msk = BIT(3),
377 .msg = "FD_CN0_memory_ecc_mbit_err",
378 .reset_level = HNAE3_GLOBAL_RESET
379 }, {
380 .int_msk = BIT(4),
381 .msg = "FD_CN1_memory_ecc_mbit_err",
382 .reset_level = HNAE3_GLOBAL_RESET
383 }, {
384 .int_msk = BIT(5),
385 .msg = "GRO_AD_memory_ecc_mbit_err",
386 .reset_level = HNAE3_GLOBAL_RESET
387 }, {
388 /* sentinel */
389 }
390 };
391
392 static const struct hclge_hw_error hclge_tm_sch_rint[] = {
393 {
394 .int_msk = BIT(1),
395 .msg = "tm_sch_ecc_mbit_err",
396 .reset_level = HNAE3_GLOBAL_RESET
397 }, {
398 .int_msk = BIT(2),
399 .msg = "tm_sch_port_shap_sub_fifo_wr_err",
400 .reset_level = HNAE3_GLOBAL_RESET
401 }, {
402 .int_msk = BIT(3),
403 .msg = "tm_sch_port_shap_sub_fifo_rd_err",
404 .reset_level = HNAE3_GLOBAL_RESET
405 }, {
406 .int_msk = BIT(4),
407 .msg = "tm_sch_pg_pshap_sub_fifo_wr_err",
408 .reset_level = HNAE3_GLOBAL_RESET
409 }, {
410 .int_msk = BIT(5),
411 .msg = "tm_sch_pg_pshap_sub_fifo_rd_err",
412 .reset_level = HNAE3_GLOBAL_RESET
413 }, {
414 .int_msk = BIT(6),
415 .msg = "tm_sch_pg_cshap_sub_fifo_wr_err",
416 .reset_level = HNAE3_GLOBAL_RESET
417 }, {
418 .int_msk = BIT(7),
419 .msg = "tm_sch_pg_cshap_sub_fifo_rd_err",
420 .reset_level = HNAE3_GLOBAL_RESET
421 }, {
422 .int_msk = BIT(8),
423 .msg = "tm_sch_pri_pshap_sub_fifo_wr_err",
424 .reset_level = HNAE3_GLOBAL_RESET
425 }, {
426 .int_msk = BIT(9),
427 .msg = "tm_sch_pri_pshap_sub_fifo_rd_err",
428 .reset_level = HNAE3_GLOBAL_RESET
429 }, {
430 .int_msk = BIT(10),
431 .msg = "tm_sch_pri_cshap_sub_fifo_wr_err",
432 .reset_level = HNAE3_GLOBAL_RESET
433 }, {
434 .int_msk = BIT(11),
435 .msg = "tm_sch_pri_cshap_sub_fifo_rd_err",
436 .reset_level = HNAE3_GLOBAL_RESET
437 }, {
438 .int_msk = BIT(12),
439 .msg = "tm_sch_port_shap_offset_fifo_wr_err",
440 .reset_level = HNAE3_GLOBAL_RESET
441 }, {
442 .int_msk = BIT(13),
443 .msg = "tm_sch_port_shap_offset_fifo_rd_err",
444 .reset_level = HNAE3_GLOBAL_RESET
445 }, {
446 .int_msk = BIT(14),
447 .msg = "tm_sch_pg_pshap_offset_fifo_wr_err",
448 .reset_level = HNAE3_GLOBAL_RESET
449 }, {
450 .int_msk = BIT(15),
451 .msg = "tm_sch_pg_pshap_offset_fifo_rd_err",
452 .reset_level = HNAE3_GLOBAL_RESET
453 }, {
454 .int_msk = BIT(16),
455 .msg = "tm_sch_pg_cshap_offset_fifo_wr_err",
456 .reset_level = HNAE3_GLOBAL_RESET
457 }, {
458 .int_msk = BIT(17),
459 .msg = "tm_sch_pg_cshap_offset_fifo_rd_err",
460 .reset_level = HNAE3_GLOBAL_RESET
461 }, {
462 .int_msk = BIT(18),
463 .msg = "tm_sch_pri_pshap_offset_fifo_wr_err",
464 .reset_level = HNAE3_GLOBAL_RESET
465 }, {
466 .int_msk = BIT(19),
467 .msg = "tm_sch_pri_pshap_offset_fifo_rd_err",
468 .reset_level = HNAE3_GLOBAL_RESET
469 }, {
470 .int_msk = BIT(20),
471 .msg = "tm_sch_pri_cshap_offset_fifo_wr_err",
472 .reset_level = HNAE3_GLOBAL_RESET
473 }, {
474 .int_msk = BIT(21),
475 .msg = "tm_sch_pri_cshap_offset_fifo_rd_err",
476 .reset_level = HNAE3_GLOBAL_RESET
477 }, {
478 .int_msk = BIT(22),
479 .msg = "tm_sch_rq_fifo_wr_err",
480 .reset_level = HNAE3_GLOBAL_RESET
481 }, {
482 .int_msk = BIT(23),
483 .msg = "tm_sch_rq_fifo_rd_err",
484 .reset_level = HNAE3_GLOBAL_RESET
485 }, {
486 .int_msk = BIT(24),
487 .msg = "tm_sch_nq_fifo_wr_err",
488 .reset_level = HNAE3_GLOBAL_RESET
489 }, {
490 .int_msk = BIT(25),
491 .msg = "tm_sch_nq_fifo_rd_err",
492 .reset_level = HNAE3_GLOBAL_RESET
493 }, {
494 .int_msk = BIT(26),
495 .msg = "tm_sch_roce_up_fifo_wr_err",
496 .reset_level = HNAE3_GLOBAL_RESET
497 }, {
498 .int_msk = BIT(27),
499 .msg = "tm_sch_roce_up_fifo_rd_err",
500 .reset_level = HNAE3_GLOBAL_RESET
501 }, {
502 .int_msk = BIT(28),
503 .msg = "tm_sch_rcb_byte_fifo_wr_err",
504 .reset_level = HNAE3_GLOBAL_RESET
505 }, {
506 .int_msk = BIT(29),
507 .msg = "tm_sch_rcb_byte_fifo_rd_err",
508 .reset_level = HNAE3_GLOBAL_RESET
509 }, {
510 .int_msk = BIT(30),
511 .msg = "tm_sch_ssu_byte_fifo_wr_err",
512 .reset_level = HNAE3_GLOBAL_RESET
513 }, {
514 .int_msk = BIT(31),
515 .msg = "tm_sch_ssu_byte_fifo_rd_err",
516 .reset_level = HNAE3_GLOBAL_RESET
517 }, {
518 /* sentinel */
519 }
520 };
521
522 static const struct hclge_hw_error hclge_qcn_fifo_rint[] = {
523 {
524 .int_msk = BIT(0),
525 .msg = "qcn_shap_gp0_sch_fifo_rd_err",
526 .reset_level = HNAE3_GLOBAL_RESET
527 }, {
528 .int_msk = BIT(1),
529 .msg = "qcn_shap_gp0_sch_fifo_wr_err",
530 .reset_level = HNAE3_GLOBAL_RESET
531 }, {
532 .int_msk = BIT(2),
533 .msg = "qcn_shap_gp1_sch_fifo_rd_err",
534 .reset_level = HNAE3_GLOBAL_RESET
535 }, {
536 .int_msk = BIT(3),
537 .msg = "qcn_shap_gp1_sch_fifo_wr_err",
538 .reset_level = HNAE3_GLOBAL_RESET
539 }, {
540 .int_msk = BIT(4),
541 .msg = "qcn_shap_gp2_sch_fifo_rd_err",
542 .reset_level = HNAE3_GLOBAL_RESET
543 }, {
544 .int_msk = BIT(5),
545 .msg = "qcn_shap_gp2_sch_fifo_wr_err",
546 .reset_level = HNAE3_GLOBAL_RESET
547 }, {
548 .int_msk = BIT(6),
549 .msg = "qcn_shap_gp3_sch_fifo_rd_err",
550 .reset_level = HNAE3_GLOBAL_RESET
551 }, {
552 .int_msk = BIT(7),
553 .msg = "qcn_shap_gp3_sch_fifo_wr_err",
554 .reset_level = HNAE3_GLOBAL_RESET
555 }, {
556 .int_msk = BIT(8),
557 .msg = "qcn_shap_gp0_offset_fifo_rd_err",
558 .reset_level = HNAE3_GLOBAL_RESET
559 }, {
560 .int_msk = BIT(9),
561 .msg = "qcn_shap_gp0_offset_fifo_wr_err",
562 .reset_level = HNAE3_GLOBAL_RESET
563 }, {
564 .int_msk = BIT(10),
565 .msg = "qcn_shap_gp1_offset_fifo_rd_err",
566 .reset_level = HNAE3_GLOBAL_RESET
567 }, {
568 .int_msk = BIT(11),
569 .msg = "qcn_shap_gp1_offset_fifo_wr_err",
570 .reset_level = HNAE3_GLOBAL_RESET
571 }, {
572 .int_msk = BIT(12),
573 .msg = "qcn_shap_gp2_offset_fifo_rd_err",
574 .reset_level = HNAE3_GLOBAL_RESET
575 }, {
576 .int_msk = BIT(13),
577 .msg = "qcn_shap_gp2_offset_fifo_wr_err",
578 .reset_level = HNAE3_GLOBAL_RESET
579 }, {
580 .int_msk = BIT(14),
581 .msg = "qcn_shap_gp3_offset_fifo_rd_err",
582 .reset_level = HNAE3_GLOBAL_RESET
583 }, {
584 .int_msk = BIT(15),
585 .msg = "qcn_shap_gp3_offset_fifo_wr_err",
586 .reset_level = HNAE3_GLOBAL_RESET
587 }, {
588 .int_msk = BIT(16),
589 .msg = "qcn_byte_info_fifo_rd_err",
590 .reset_level = HNAE3_GLOBAL_RESET
591 }, {
592 .int_msk = BIT(17),
593 .msg = "qcn_byte_info_fifo_wr_err",
594 .reset_level = HNAE3_GLOBAL_RESET
595 }, {
596 /* sentinel */
597 }
598 };
599
600 static const struct hclge_hw_error hclge_qcn_ecc_rint[] = {
601 {
602 .int_msk = BIT(1),
603 .msg = "qcn_byte_mem_ecc_mbit_err",
604 .reset_level = HNAE3_GLOBAL_RESET
605 }, {
606 .int_msk = BIT(3),
607 .msg = "qcn_time_mem_ecc_mbit_err",
608 .reset_level = HNAE3_GLOBAL_RESET
609 }, {
610 .int_msk = BIT(5),
611 .msg = "qcn_fb_mem_ecc_mbit_err",
612 .reset_level = HNAE3_GLOBAL_RESET
613 }, {
614 .int_msk = BIT(7),
615 .msg = "qcn_link_mem_ecc_mbit_err",
616 .reset_level = HNAE3_GLOBAL_RESET
617 }, {
618 .int_msk = BIT(9),
619 .msg = "qcn_rate_mem_ecc_mbit_err",
620 .reset_level = HNAE3_GLOBAL_RESET
621 }, {
622 .int_msk = BIT(11),
623 .msg = "qcn_tmplt_mem_ecc_mbit_err",
624 .reset_level = HNAE3_GLOBAL_RESET
625 }, {
626 .int_msk = BIT(13),
627 .msg = "qcn_shap_cfg_mem_ecc_mbit_err",
628 .reset_level = HNAE3_GLOBAL_RESET
629 }, {
630 .int_msk = BIT(15),
631 .msg = "qcn_gp0_barrel_mem_ecc_mbit_err",
632 .reset_level = HNAE3_GLOBAL_RESET
633 }, {
634 .int_msk = BIT(17),
635 .msg = "qcn_gp1_barrel_mem_ecc_mbit_err",
636 .reset_level = HNAE3_GLOBAL_RESET
637 }, {
638 .int_msk = BIT(19),
639 .msg = "qcn_gp2_barrel_mem_ecc_mbit_err",
640 .reset_level = HNAE3_GLOBAL_RESET
641 }, {
642 .int_msk = BIT(21),
643 .msg = "qcn_gp3_barral_mem_ecc_mbit_err",
644 .reset_level = HNAE3_GLOBAL_RESET
645 }, {
646 /* sentinel */
647 }
648 };
649
650 static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = {
651 {
652 .int_msk = BIT(0),
653 .msg = "egu_cge_afifo_ecc_1bit_err",
654 .reset_level = HNAE3_NONE_RESET
655 }, {
656 .int_msk = BIT(1),
657 .msg = "egu_cge_afifo_ecc_mbit_err",
658 .reset_level = HNAE3_GLOBAL_RESET
659 }, {
660 .int_msk = BIT(2),
661 .msg = "egu_lge_afifo_ecc_1bit_err",
662 .reset_level = HNAE3_NONE_RESET
663 }, {
664 .int_msk = BIT(3),
665 .msg = "egu_lge_afifo_ecc_mbit_err",
666 .reset_level = HNAE3_GLOBAL_RESET
667 }, {
668 .int_msk = BIT(4),
669 .msg = "cge_igu_afifo_ecc_1bit_err",
670 .reset_level = HNAE3_NONE_RESET
671 }, {
672 .int_msk = BIT(5),
673 .msg = "cge_igu_afifo_ecc_mbit_err",
674 .reset_level = HNAE3_GLOBAL_RESET
675 }, {
676 .int_msk = BIT(6),
677 .msg = "lge_igu_afifo_ecc_1bit_err",
678 .reset_level = HNAE3_NONE_RESET
679 }, {
680 .int_msk = BIT(7),
681 .msg = "lge_igu_afifo_ecc_mbit_err",
682 .reset_level = HNAE3_GLOBAL_RESET
683 }, {
684 .int_msk = BIT(8),
685 .msg = "cge_igu_afifo_overflow_err",
686 .reset_level = HNAE3_GLOBAL_RESET
687 }, {
688 .int_msk = BIT(9),
689 .msg = "lge_igu_afifo_overflow_err",
690 .reset_level = HNAE3_GLOBAL_RESET
691 }, {
692 .int_msk = BIT(10),
693 .msg = "egu_cge_afifo_underrun_err",
694 .reset_level = HNAE3_GLOBAL_RESET
695 }, {
696 .int_msk = BIT(11),
697 .msg = "egu_lge_afifo_underrun_err",
698 .reset_level = HNAE3_GLOBAL_RESET
699 }, {
700 .int_msk = BIT(12),
701 .msg = "egu_ge_afifo_underrun_err",
702 .reset_level = HNAE3_GLOBAL_RESET
703 }, {
704 .int_msk = BIT(13),
705 .msg = "ge_igu_afifo_overflow_err",
706 .reset_level = HNAE3_GLOBAL_RESET
707 }, {
708 /* sentinel */
709 }
710 };
711
712 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = {
713 {
714 .int_msk = BIT(13),
715 .msg = "rpu_rx_pkt_bit32_ecc_mbit_err",
716 .reset_level = HNAE3_GLOBAL_RESET
717 }, {
718 .int_msk = BIT(14),
719 .msg = "rpu_rx_pkt_bit33_ecc_mbit_err",
720 .reset_level = HNAE3_GLOBAL_RESET
721 }, {
722 .int_msk = BIT(15),
723 .msg = "rpu_rx_pkt_bit34_ecc_mbit_err",
724 .reset_level = HNAE3_GLOBAL_RESET
725 }, {
726 .int_msk = BIT(16),
727 .msg = "rpu_rx_pkt_bit35_ecc_mbit_err",
728 .reset_level = HNAE3_GLOBAL_RESET
729 }, {
730 .int_msk = BIT(17),
731 .msg = "rcb_tx_ring_ecc_mbit_err",
732 .reset_level = HNAE3_GLOBAL_RESET
733 }, {
734 .int_msk = BIT(18),
735 .msg = "rcb_rx_ring_ecc_mbit_err",
736 .reset_level = HNAE3_GLOBAL_RESET
737 }, {
738 .int_msk = BIT(19),
739 .msg = "rcb_tx_fbd_ecc_mbit_err",
740 .reset_level = HNAE3_GLOBAL_RESET
741 }, {
742 .int_msk = BIT(20),
743 .msg = "rcb_rx_ebd_ecc_mbit_err",
744 .reset_level = HNAE3_GLOBAL_RESET
745 }, {
746 .int_msk = BIT(21),
747 .msg = "rcb_tso_info_ecc_mbit_err",
748 .reset_level = HNAE3_GLOBAL_RESET
749 }, {
750 .int_msk = BIT(22),
751 .msg = "rcb_tx_int_info_ecc_mbit_err",
752 .reset_level = HNAE3_GLOBAL_RESET
753 }, {
754 .int_msk = BIT(23),
755 .msg = "rcb_rx_int_info_ecc_mbit_err",
756 .reset_level = HNAE3_GLOBAL_RESET
757 }, {
758 .int_msk = BIT(24),
759 .msg = "tpu_tx_pkt_0_ecc_mbit_err",
760 .reset_level = HNAE3_GLOBAL_RESET
761 }, {
762 .int_msk = BIT(25),
763 .msg = "tpu_tx_pkt_1_ecc_mbit_err",
764 .reset_level = HNAE3_GLOBAL_RESET
765 }, {
766 .int_msk = BIT(26),
767 .msg = "rd_bus_err",
768 .reset_level = HNAE3_GLOBAL_RESET
769 }, {
770 .int_msk = BIT(27),
771 .msg = "wr_bus_err",
772 .reset_level = HNAE3_GLOBAL_RESET
773 }, {
774 .int_msk = BIT(28),
775 .msg = "reg_search_miss",
776 .reset_level = HNAE3_GLOBAL_RESET
777 }, {
778 .int_msk = BIT(29),
779 .msg = "rx_q_search_miss",
780 .reset_level = HNAE3_NONE_RESET
781 }, {
782 .int_msk = BIT(30),
783 .msg = "ooo_ecc_err_detect",
784 .reset_level = HNAE3_NONE_RESET
785 }, {
786 .int_msk = BIT(31),
787 .msg = "ooo_ecc_err_multpl",
788 .reset_level = HNAE3_GLOBAL_RESET
789 }, {
790 /* sentinel */
791 }
792 };
793
794 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = {
795 {
796 .int_msk = BIT(4),
797 .msg = "gro_bd_ecc_mbit_err",
798 .reset_level = HNAE3_GLOBAL_RESET
799 }, {
800 .int_msk = BIT(5),
801 .msg = "gro_context_ecc_mbit_err",
802 .reset_level = HNAE3_GLOBAL_RESET
803 }, {
804 .int_msk = BIT(6),
805 .msg = "rx_stash_cfg_ecc_mbit_err",
806 .reset_level = HNAE3_GLOBAL_RESET
807 }, {
808 .int_msk = BIT(7),
809 .msg = "axi_rd_fbd_ecc_mbit_err",
810 .reset_level = HNAE3_GLOBAL_RESET
811 }, {
812 /* sentinel */
813 }
814 };
815
816 static const struct hclge_hw_error hclge_ppu_pf_abnormal_int[] = {
817 {
818 .int_msk = BIT(0),
819 .msg = "over_8bd_no_fe",
820 .reset_level = HNAE3_FUNC_RESET
821 }, {
822 .int_msk = BIT(1),
823 .msg = "tso_mss_cmp_min_err",
824 .reset_level = HNAE3_NONE_RESET
825 }, {
826 .int_msk = BIT(2),
827 .msg = "tso_mss_cmp_max_err",
828 .reset_level = HNAE3_NONE_RESET
829 }, {
830 .int_msk = BIT(3),
831 .msg = "tx_rd_fbd_poison",
832 .reset_level = HNAE3_FUNC_RESET
833 }, {
834 .int_msk = BIT(4),
835 .msg = "rx_rd_ebd_poison",
836 .reset_level = HNAE3_FUNC_RESET
837 }, {
838 .int_msk = BIT(5),
839 .msg = "buf_wait_timeout",
840 .reset_level = HNAE3_NONE_RESET
841 }, {
842 /* sentinel */
843 }
844 };
845
846 static const struct hclge_hw_error hclge_ssu_com_err_int[] = {
847 {
848 .int_msk = BIT(0),
849 .msg = "buf_sum_err",
850 .reset_level = HNAE3_NONE_RESET
851 }, {
852 .int_msk = BIT(1),
853 .msg = "ppp_mb_num_err",
854 .reset_level = HNAE3_NONE_RESET
855 }, {
856 .int_msk = BIT(2),
857 .msg = "ppp_mbid_err",
858 .reset_level = HNAE3_GLOBAL_RESET
859 }, {
860 .int_msk = BIT(3),
861 .msg = "ppp_rlt_mac_err",
862 .reset_level = HNAE3_GLOBAL_RESET
863 }, {
864 .int_msk = BIT(4),
865 .msg = "ppp_rlt_host_err",
866 .reset_level = HNAE3_GLOBAL_RESET
867 }, {
868 .int_msk = BIT(5),
869 .msg = "cks_edit_position_err",
870 .reset_level = HNAE3_GLOBAL_RESET
871 }, {
872 .int_msk = BIT(6),
873 .msg = "cks_edit_condition_err",
874 .reset_level = HNAE3_GLOBAL_RESET
875 }, {
876 .int_msk = BIT(7),
877 .msg = "vlan_edit_condition_err",
878 .reset_level = HNAE3_GLOBAL_RESET
879 }, {
880 .int_msk = BIT(8),
881 .msg = "vlan_num_ot_err",
882 .reset_level = HNAE3_GLOBAL_RESET
883 }, {
884 .int_msk = BIT(9),
885 .msg = "vlan_num_in_err",
886 .reset_level = HNAE3_GLOBAL_RESET
887 }, {
888 /* sentinel */
889 }
890 };
891
892 #define HCLGE_SSU_MEM_ECC_ERR(x) \
893 { \
894 .int_msk = BIT(x), \
895 .msg = "ssu_mem" #x "_ecc_mbit_err", \
896 .reset_level = HNAE3_GLOBAL_RESET \
897 }
898
899 static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = {
900 HCLGE_SSU_MEM_ECC_ERR(0),
901 HCLGE_SSU_MEM_ECC_ERR(1),
902 HCLGE_SSU_MEM_ECC_ERR(2),
903 HCLGE_SSU_MEM_ECC_ERR(3),
904 HCLGE_SSU_MEM_ECC_ERR(4),
905 HCLGE_SSU_MEM_ECC_ERR(5),
906 HCLGE_SSU_MEM_ECC_ERR(6),
907 HCLGE_SSU_MEM_ECC_ERR(7),
908 HCLGE_SSU_MEM_ECC_ERR(8),
909 HCLGE_SSU_MEM_ECC_ERR(9),
910 HCLGE_SSU_MEM_ECC_ERR(10),
911 HCLGE_SSU_MEM_ECC_ERR(11),
912 HCLGE_SSU_MEM_ECC_ERR(12),
913 HCLGE_SSU_MEM_ECC_ERR(13),
914 HCLGE_SSU_MEM_ECC_ERR(14),
915 HCLGE_SSU_MEM_ECC_ERR(15),
916 HCLGE_SSU_MEM_ECC_ERR(16),
917 HCLGE_SSU_MEM_ECC_ERR(17),
918 HCLGE_SSU_MEM_ECC_ERR(18),
919 HCLGE_SSU_MEM_ECC_ERR(19),
920 HCLGE_SSU_MEM_ECC_ERR(20),
921 HCLGE_SSU_MEM_ECC_ERR(21),
922 HCLGE_SSU_MEM_ECC_ERR(22),
923 HCLGE_SSU_MEM_ECC_ERR(23),
924 HCLGE_SSU_MEM_ECC_ERR(24),
925 HCLGE_SSU_MEM_ECC_ERR(25),
926 HCLGE_SSU_MEM_ECC_ERR(26),
927 HCLGE_SSU_MEM_ECC_ERR(27),
928 HCLGE_SSU_MEM_ECC_ERR(28),
929 HCLGE_SSU_MEM_ECC_ERR(29),
930 HCLGE_SSU_MEM_ECC_ERR(30),
931 HCLGE_SSU_MEM_ECC_ERR(31),
932 { /* sentinel */ }
933 };
934
935 static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = {
936 {
937 .int_msk = BIT(0),
938 .msg = "roc_pkt_without_key_port",
939 .reset_level = HNAE3_FUNC_RESET
940 }, {
941 .int_msk = BIT(1),
942 .msg = "tpu_pkt_without_key_port",
943 .reset_level = HNAE3_GLOBAL_RESET
944 }, {
945 .int_msk = BIT(2),
946 .msg = "igu_pkt_without_key_port",
947 .reset_level = HNAE3_GLOBAL_RESET
948 }, {
949 .int_msk = BIT(3),
950 .msg = "roc_eof_mis_match_port",
951 .reset_level = HNAE3_GLOBAL_RESET
952 }, {
953 .int_msk = BIT(4),
954 .msg = "tpu_eof_mis_match_port",
955 .reset_level = HNAE3_GLOBAL_RESET
956 }, {
957 .int_msk = BIT(5),
958 .msg = "igu_eof_mis_match_port",
959 .reset_level = HNAE3_GLOBAL_RESET
960 }, {
961 .int_msk = BIT(6),
962 .msg = "roc_sof_mis_match_port",
963 .reset_level = HNAE3_GLOBAL_RESET
964 }, {
965 .int_msk = BIT(7),
966 .msg = "tpu_sof_mis_match_port",
967 .reset_level = HNAE3_GLOBAL_RESET
968 }, {
969 .int_msk = BIT(8),
970 .msg = "igu_sof_mis_match_port",
971 .reset_level = HNAE3_GLOBAL_RESET
972 }, {
973 .int_msk = BIT(11),
974 .msg = "ets_rd_int_rx_port",
975 .reset_level = HNAE3_GLOBAL_RESET
976 }, {
977 .int_msk = BIT(12),
978 .msg = "ets_wr_int_rx_port",
979 .reset_level = HNAE3_GLOBAL_RESET
980 }, {
981 .int_msk = BIT(13),
982 .msg = "ets_rd_int_tx_port",
983 .reset_level = HNAE3_GLOBAL_RESET
984 }, {
985 .int_msk = BIT(14),
986 .msg = "ets_wr_int_tx_port",
987 .reset_level = HNAE3_GLOBAL_RESET
988 }, {
989 /* sentinel */
990 }
991 };
992
993 static const struct hclge_hw_error hclge_ssu_fifo_overflow_int[] = {
994 {
995 .int_msk = BIT(0),
996 .msg = "ig_mac_inf_int",
997 .reset_level = HNAE3_GLOBAL_RESET
998 }, {
999 .int_msk = BIT(1),
1000 .msg = "ig_host_inf_int",
1001 .reset_level = HNAE3_GLOBAL_RESET
1002 }, {
1003 .int_msk = BIT(2),
1004 .msg = "ig_roc_buf_int",
1005 .reset_level = HNAE3_GLOBAL_RESET
1006 }, {
1007 .int_msk = BIT(3),
1008 .msg = "ig_host_data_fifo_int",
1009 .reset_level = HNAE3_GLOBAL_RESET
1010 }, {
1011 .int_msk = BIT(4),
1012 .msg = "ig_host_key_fifo_int",
1013 .reset_level = HNAE3_GLOBAL_RESET
1014 }, {
1015 .int_msk = BIT(5),
1016 .msg = "tx_qcn_fifo_int",
1017 .reset_level = HNAE3_GLOBAL_RESET
1018 }, {
1019 .int_msk = BIT(6),
1020 .msg = "rx_qcn_fifo_int",
1021 .reset_level = HNAE3_GLOBAL_RESET
1022 }, {
1023 .int_msk = BIT(7),
1024 .msg = "tx_pf_rd_fifo_int",
1025 .reset_level = HNAE3_GLOBAL_RESET
1026 }, {
1027 .int_msk = BIT(8),
1028 .msg = "rx_pf_rd_fifo_int",
1029 .reset_level = HNAE3_GLOBAL_RESET
1030 }, {
1031 .int_msk = BIT(9),
1032 .msg = "qm_eof_fifo_int",
1033 .reset_level = HNAE3_GLOBAL_RESET
1034 }, {
1035 .int_msk = BIT(10),
1036 .msg = "mb_rlt_fifo_int",
1037 .reset_level = HNAE3_GLOBAL_RESET
1038 }, {
1039 .int_msk = BIT(11),
1040 .msg = "dup_uncopy_fifo_int",
1041 .reset_level = HNAE3_GLOBAL_RESET
1042 }, {
1043 .int_msk = BIT(12),
1044 .msg = "dup_cnt_rd_fifo_int",
1045 .reset_level = HNAE3_GLOBAL_RESET
1046 }, {
1047 .int_msk = BIT(13),
1048 .msg = "dup_cnt_drop_fifo_int",
1049 .reset_level = HNAE3_GLOBAL_RESET
1050 }, {
1051 .int_msk = BIT(14),
1052 .msg = "dup_cnt_wrb_fifo_int",
1053 .reset_level = HNAE3_GLOBAL_RESET
1054 }, {
1055 .int_msk = BIT(15),
1056 .msg = "host_cmd_fifo_int",
1057 .reset_level = HNAE3_GLOBAL_RESET
1058 }, {
1059 .int_msk = BIT(16),
1060 .msg = "mac_cmd_fifo_int",
1061 .reset_level = HNAE3_GLOBAL_RESET
1062 }, {
1063 .int_msk = BIT(17),
1064 .msg = "host_cmd_bitmap_empty_int",
1065 .reset_level = HNAE3_GLOBAL_RESET
1066 }, {
1067 .int_msk = BIT(18),
1068 .msg = "mac_cmd_bitmap_empty_int",
1069 .reset_level = HNAE3_GLOBAL_RESET
1070 }, {
1071 .int_msk = BIT(19),
1072 .msg = "dup_bitmap_empty_int",
1073 .reset_level = HNAE3_GLOBAL_RESET
1074 }, {
1075 .int_msk = BIT(20),
1076 .msg = "out_queue_bitmap_empty_int",
1077 .reset_level = HNAE3_GLOBAL_RESET
1078 }, {
1079 .int_msk = BIT(21),
1080 .msg = "bank2_bitmap_empty_int",
1081 .reset_level = HNAE3_GLOBAL_RESET
1082 }, {
1083 .int_msk = BIT(22),
1084 .msg = "bank1_bitmap_empty_int",
1085 .reset_level = HNAE3_GLOBAL_RESET
1086 }, {
1087 .int_msk = BIT(23),
1088 .msg = "bank0_bitmap_empty_int",
1089 .reset_level = HNAE3_GLOBAL_RESET
1090 }, {
1091 /* sentinel */
1092 }
1093 };
1094
1095 static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = {
1096 {
1097 .int_msk = BIT(0),
1098 .msg = "ets_rd_int_rx_tcg",
1099 .reset_level = HNAE3_GLOBAL_RESET
1100 }, {
1101 .int_msk = BIT(1),
1102 .msg = "ets_wr_int_rx_tcg",
1103 .reset_level = HNAE3_GLOBAL_RESET
1104 }, {
1105 .int_msk = BIT(2),
1106 .msg = "ets_rd_int_tx_tcg",
1107 .reset_level = HNAE3_GLOBAL_RESET
1108 }, {
1109 .int_msk = BIT(3),
1110 .msg = "ets_wr_int_tx_tcg",
1111 .reset_level = HNAE3_GLOBAL_RESET
1112 }, {
1113 /* sentinel */
1114 }
1115 };
1116
1117 static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = {
1118 {
1119 .int_msk = BIT(0),
1120 .msg = "roc_pkt_without_key_port",
1121 .reset_level = HNAE3_FUNC_RESET
1122 }, {
1123 .int_msk = BIT(9),
1124 .msg = "low_water_line_err_port",
1125 .reset_level = HNAE3_NONE_RESET
1126 }, {
1127 .int_msk = BIT(10),
1128 .msg = "hi_water_line_err_port",
1129 .reset_level = HNAE3_GLOBAL_RESET
1130 }, {
1131 /* sentinel */
1132 }
1133 };
1134
1135 static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = {
1136 {
1137 .int_msk = 0,
1138 .msg = "rocee qmm ovf: sgid invalid err"
1139 }, {
1140 .int_msk = 0x4,
1141 .msg = "rocee qmm ovf: sgid ovf err"
1142 }, {
1143 .int_msk = 0x8,
1144 .msg = "rocee qmm ovf: smac invalid err"
1145 }, {
1146 .int_msk = 0xC,
1147 .msg = "rocee qmm ovf: smac ovf err"
1148 }, {
1149 .int_msk = 0x10,
1150 .msg = "rocee qmm ovf: cqc invalid err"
1151 }, {
1152 .int_msk = 0x11,
1153 .msg = "rocee qmm ovf: cqc ovf err"
1154 }, {
1155 .int_msk = 0x12,
1156 .msg = "rocee qmm ovf: cqc hopnum err"
1157 }, {
1158 .int_msk = 0x13,
1159 .msg = "rocee qmm ovf: cqc ba0 err"
1160 }, {
1161 .int_msk = 0x14,
1162 .msg = "rocee qmm ovf: srqc invalid err"
1163 }, {
1164 .int_msk = 0x15,
1165 .msg = "rocee qmm ovf: srqc ovf err"
1166 }, {
1167 .int_msk = 0x16,
1168 .msg = "rocee qmm ovf: srqc hopnum err"
1169 }, {
1170 .int_msk = 0x17,
1171 .msg = "rocee qmm ovf: srqc ba0 err"
1172 }, {
1173 .int_msk = 0x18,
1174 .msg = "rocee qmm ovf: mpt invalid err"
1175 }, {
1176 .int_msk = 0x19,
1177 .msg = "rocee qmm ovf: mpt ovf err"
1178 }, {
1179 .int_msk = 0x1A,
1180 .msg = "rocee qmm ovf: mpt hopnum err"
1181 }, {
1182 .int_msk = 0x1B,
1183 .msg = "rocee qmm ovf: mpt ba0 err"
1184 }, {
1185 .int_msk = 0x1C,
1186 .msg = "rocee qmm ovf: qpc invalid err"
1187 }, {
1188 .int_msk = 0x1D,
1189 .msg = "rocee qmm ovf: qpc ovf err"
1190 }, {
1191 .int_msk = 0x1E,
1192 .msg = "rocee qmm ovf: qpc hopnum err"
1193 }, {
1194 .int_msk = 0x1F,
1195 .msg = "rocee qmm ovf: qpc ba0 err"
1196 }, {
1197 /* sentinel */
1198 }
1199 };
1200
1201 static const struct hclge_mod_reg_info hclge_ssu_reg_0_info[] = {
1202 {
1203 .reg_name = "SSU_BP_STATUS_0~5",
1204 .reg_offset_group = { 5, 6, 7, 8, 9, 10},
1205 .group_size = 6
1206 }, {
1207 .reg_name = "LO_PRI_UNICAST_CUR_CNT",
1208 .reg_offset_group = {54},
1209 .group_size = 1
1210 }, {
1211 .reg_name = "HI/LO_PRI_MULTICAST_CUR_CNT",
1212 .reg_offset_group = {55, 56},
1213 .group_size = 2
1214 }, {
1215 .reg_name = "SSU_MB_RD_RLT_DROP_CNT",
1216 .reg_offset_group = {29},
1217 .group_size = 1
1218 }, {
1219 .reg_name = "SSU_PPP_MAC_KEY_NUM",
1220 .reg_offset_group = {31, 30},
1221 .group_size = 2
1222 }, {
1223 .reg_name = "SSU_PPP_HOST_KEY_NUM",
1224 .reg_offset_group = {33, 32},
1225 .group_size = 2
1226 }, {
1227 .reg_name = "PPP_SSU_MAC/HOST_RLT_NUM",
1228 .reg_offset_group = {35, 34, 37, 36},
1229 .group_size = 4
1230 }, {
1231 .reg_name = "FULL/PART_DROP_NUM",
1232 .reg_offset_group = {18, 19},
1233 .group_size = 2
1234 }, {
1235 .reg_name = "PPP_KEY/RLT_DROP_NUM",
1236 .reg_offset_group = {20, 21},
1237 .group_size = 2
1238 }, {
1239 .reg_name = "NIC/ROC_L2_ERR_DROP_PKT_CNT",
1240 .reg_offset_group = {48, 49},
1241 .group_size = 2
1242 }, {
1243 .reg_name = "NIC/ROC_L2_ERR_DROP_PKT_CNT_RX",
1244 .reg_offset_group = {50, 51},
1245 .group_size = 2
1246 },
1247 };
1248
1249 static const struct hclge_mod_reg_info hclge_ssu_reg_1_info[] = {
1250 {
1251 .reg_name = "RX_PACKET_IN/OUT_CNT",
1252 .reg_offset_group = {13, 12, 15, 14},
1253 .group_size = 4
1254 }, {
1255 .reg_name = "TX_PACKET_IN/OUT_CNT",
1256 .reg_offset_group = {17, 16, 19, 18},
1257 .group_size = 4
1258 }, {
1259 .reg_name = "RX_PACKET_TC0_IN/OUT_CNT",
1260 .reg_offset_group = {25, 24, 41, 40},
1261 .group_size = 4
1262 }, {
1263 .reg_name = "RX_PACKET_TC1_IN/OUT_CNT",
1264 .reg_offset_group = {27, 26, 43, 42},
1265 .group_size = 4
1266 }, {
1267 .reg_name = "RX_PACKET_TC2_IN/OUT_CNT",
1268 .reg_offset_group = {29, 28, 45, 44},
1269 .group_size = 4
1270 }, {
1271 .reg_name = "RX_PACKET_TC3_IN/OUT_CNT",
1272 .reg_offset_group = {31, 30, 47, 46},
1273 .group_size = 4
1274 }, {
1275 .reg_name = "RX_PACKET_TC4_IN/OUT_CNT",
1276 .reg_offset_group = {33, 32, 49, 48},
1277 .group_size = 4
1278 }, {
1279 .reg_name = "RX_PACKET_TC5_IN/OUT_CNT",
1280 .reg_offset_group = {35, 34, 51, 50},
1281 .group_size = 4
1282 }, {
1283 .reg_name = "RX_PACKET_TC6_IN/OUT_CNT",
1284 .reg_offset_group = {37, 36, 53, 52},
1285 .group_size = 4
1286 }, {
1287 .reg_name = "RX_PACKET_TC7_IN/OUT_CNT",
1288 .reg_offset_group = {39, 38, 55, 54},
1289 .group_size = 4
1290 }, {
1291 .reg_name = "TX_PACKET_TC0_IN/OUT_CNT",
1292 .reg_offset_group = {57, 56, 73, 72},
1293 .group_size = 4
1294 }, {
1295 .reg_name = "TX_PACKET_TC1_IN/OUT_CNT",
1296 .reg_offset_group = {59, 58, 75, 74},
1297 .group_size = 4
1298 }, {
1299 .reg_name = "TX_PACKET_TC2_IN/OUT_CNT",
1300 .reg_offset_group = {61, 60, 77, 76},
1301 .group_size = 4
1302 }, {
1303 .reg_name = "TX_PACKET_TC3_IN/OUT_CNT",
1304 .reg_offset_group = {63, 62, 79, 78},
1305 .group_size = 4
1306 }, {
1307 .reg_name = "TX_PACKET_TC4_IN/OUT_CNT",
1308 .reg_offset_group = {65, 64, 81, 80},
1309 .group_size = 4
1310 }, {
1311 .reg_name = "TX_PACKET_TC5_IN/OUT_CNT",
1312 .reg_offset_group = {67, 66, 83, 82},
1313 .group_size = 4
1314 }, {
1315 .reg_name = "TX_PACKET_TC6_IN/OUT_CNT",
1316 .reg_offset_group = {69, 68, 85, 84},
1317 .group_size = 4
1318 }, {
1319 .reg_name = "TX_PACKET_TC7_IN/OUT_CNT",
1320 .reg_offset_group = {71, 70, 87, 86},
1321 .group_size = 4
1322 }, {
1323 .reg_name = "PACKET_TC0~3_CURR_BUFFER_CNT",
1324 .reg_offset_group = {1, 2, 3, 4},
1325 .group_size = 4
1326 }, {
1327 .reg_name = "PACKET_TC4~7_CURR_BUFFER_CNT",
1328 .reg_offset_group = {5, 6, 7, 8},
1329 .group_size = 4
1330 }, {
1331 .reg_name = "ROC_RX_PACKET_IN_CNT",
1332 .reg_offset_group = {21, 20},
1333 .group_size = 2
1334 }, {
1335 .reg_name = "ROC_TX_PACKET_OUT_CNT",
1336 .reg_offset_group = {23, 22},
1337 .group_size = 2
1338 }
1339 };
1340
1341 static const struct hclge_mod_reg_info hclge_rpu_reg_0_info[] = {
1342 {
1343 .reg_name = "RPU_FSM_DFX_ST0/ST1_TNL",
1344 .has_suffix = true,
1345 .reg_offset_group = {1, 2},
1346 .group_size = 2
1347 }, {
1348 .reg_name = "RPU_RX_PKT_DROP_CNT_TNL",
1349 .has_suffix = true,
1350 .reg_offset_group = {3},
1351 .group_size = 1
1352 }
1353 };
1354
1355 static const struct hclge_mod_reg_info hclge_rpu_reg_1_info[] = {
1356 {
1357 .reg_name = "FIFO_DFX_ST0_1_2_4",
1358 .reg_offset_group = {1, 2, 3, 5},
1359 .group_size = 4
1360 }
1361 };
1362
1363 static const struct hclge_mod_reg_info hclge_igu_egu_reg_info[] = {
1364 {
1365 .reg_name = "IGU_RX_ERR_PKT",
1366 .reg_offset_group = {1},
1367 .group_size = 1
1368 }, {
1369 .reg_name = "IGU_RX_OUT_ALL_PKT",
1370 .reg_offset_group = {29, 28},
1371 .group_size = 2
1372 }, {
1373 .reg_name = "EGU_TX_OUT_ALL_PKT",
1374 .reg_offset_group = {39, 38},
1375 .group_size = 2
1376 }, {
1377 .reg_name = "EGU_TX_ERR_PKT",
1378 .reg_offset_group = {5},
1379 .group_size = 1
1380 }
1381 };
1382
1383 static const struct hclge_mod_reg_info hclge_gen_reg_info_tnl[] = {
1384 {
1385 .reg_name = "SSU2RPU_TNL_WR_PKT_CNT_TNL",
1386 .has_suffix = true,
1387 .reg_offset_group = {1},
1388 .group_size = 1
1389 }, {
1390 .reg_name = "RPU2HST_TNL_WR_PKT_CNT_TNL",
1391 .has_suffix = true,
1392 .reg_offset_group = {12},
1393 .group_size = 1
1394 }
1395 };
1396
1397 static const struct hclge_mod_reg_info hclge_gen_reg_info[] = {
1398 {
1399 .reg_name = "SSU_OVERSIZE_DROP_CNT",
1400 .reg_offset_group = {12},
1401 .group_size = 1
1402 }, {
1403 .reg_name = "ROCE_RX_BYPASS_5NS_DROP_NUM",
1404 .reg_offset_group = {13},
1405 .group_size = 1
1406 }, {
1407 .reg_name = "RX_PKT_IN/OUT_ERR_CNT",
1408 .reg_offset_group = {15, 14, 19, 18},
1409 .group_size = 4
1410 }, {
1411 .reg_name = "TX_PKT_IN/OUT_ERR_CNT",
1412 .reg_offset_group = {17, 16, 21, 20},
1413 .group_size = 4
1414 }, {
1415 .reg_name = "ETS_TC_READY",
1416 .reg_offset_group = {22},
1417 .group_size = 1
1418 }, {
1419 .reg_name = "MIB_TX/RX_BAD_PKTS",
1420 .reg_offset_group = {19, 18, 29, 28},
1421 .group_size = 4
1422 }, {
1423 .reg_name = "MIB_TX/RX_GOOD_PKTS",
1424 .reg_offset_group = {21, 20, 31, 30},
1425 .group_size = 4
1426 }, {
1427 .reg_name = "MIB_TX/RX_TOTAL_PKTS",
1428 .reg_offset_group = {23, 22, 33, 32},
1429 .group_size = 4
1430 }, {
1431 .reg_name = "MIB_TX/RX_PAUSE_PKTS",
1432 .reg_offset_group = {25, 24, 35, 34},
1433 .group_size = 4
1434 }, {
1435 .reg_name = "MIB_TX_ERR_ALL_PKTS",
1436 .reg_offset_group = {27, 26},
1437 .group_size = 2
1438 }, {
1439 .reg_name = "MIB_RX_FCS_ERR_PKTS",
1440 .reg_offset_group = {37, 36},
1441 .group_size = 2
1442 }, {
1443 .reg_name = "IGU_EGU_AUTO_GATE_EN",
1444 .reg_offset_group = {42},
1445 .group_size = 1
1446 }, {
1447 .reg_name = "IGU_EGU_INT_SRC",
1448 .reg_offset_group = {43},
1449 .group_size = 1
1450 }, {
1451 .reg_name = "EGU_READY_NUM_CFG",
1452 .reg_offset_group = {44},
1453 .group_size = 1
1454 }, {
1455 .reg_name = "IGU_EGU_TNL_DFX",
1456 .reg_offset_group = {45},
1457 .group_size = 1
1458 }, {
1459 .reg_name = "TX_TNL_NOTE_PKT",
1460 .reg_offset_group = {46},
1461 .group_size = 1
1462 }
1463 };
1464
1465 static const struct hclge_mod_reg_common_msg hclge_ssu_reg_common_msg[] = {
1466 {
1467 .cmd = HCLGE_OPC_DFX_SSU_REG_0,
1468 .result_regs = hclge_ssu_reg_0_info,
1469 .bd_num = HCLGE_BD_NUM_SSU_REG_0,
1470 .result_regs_size = ARRAY_SIZE(hclge_ssu_reg_0_info)
1471 }, {
1472 .cmd = HCLGE_OPC_DFX_SSU_REG_1,
1473 .result_regs = hclge_ssu_reg_1_info,
1474 .bd_num = HCLGE_BD_NUM_SSU_REG_1,
1475 .result_regs_size = ARRAY_SIZE(hclge_ssu_reg_1_info)
1476 }, {
1477 .cmd = HCLGE_OPC_DFX_RPU_REG_0,
1478 .result_regs = hclge_rpu_reg_0_info,
1479 .bd_num = HCLGE_BD_NUM_RPU_REG_0,
1480 .result_regs_size = ARRAY_SIZE(hclge_rpu_reg_0_info),
1481 .need_para = true
1482 }, {
1483 .cmd = HCLGE_OPC_DFX_RPU_REG_1,
1484 .result_regs = hclge_rpu_reg_1_info,
1485 .bd_num = HCLGE_BD_NUM_RPU_REG_1,
1486 .result_regs_size = ARRAY_SIZE(hclge_rpu_reg_1_info)
1487 }, {
1488 .cmd = HCLGE_OPC_DFX_IGU_EGU_REG,
1489 .result_regs = hclge_igu_egu_reg_info,
1490 .bd_num = HCLGE_BD_NUM_IGU_EGU_REG,
1491 .result_regs_size = ARRAY_SIZE(hclge_igu_egu_reg_info)
1492 }, {
1493 .cmd = HCLGE_OPC_DFX_GEN_REG,
1494 .result_regs = hclge_gen_reg_info_tnl,
1495 .bd_num = HCLGE_BD_NUM_GEN_REG,
1496 .result_regs_size = ARRAY_SIZE(hclge_gen_reg_info_tnl),
1497 .need_para = true
1498 }, {
1499 .cmd = HCLGE_OPC_DFX_GEN_REG,
1500 .result_regs = hclge_gen_reg_info,
1501 .bd_num = HCLGE_BD_NUM_GEN_REG,
1502 .result_regs_size = ARRAY_SIZE(hclge_gen_reg_info)
1503 }
1504 };
1505
1506 static int
hclge_print_mod_reg_info(struct device * dev,struct hclge_desc * desc,const struct hclge_mod_reg_info * reg_info,int size)1507 hclge_print_mod_reg_info(struct device *dev, struct hclge_desc *desc,
1508 const struct hclge_mod_reg_info *reg_info, int size)
1509 {
1510 int i, j, pos, actual_len;
1511 u8 offset, bd_idx, index;
1512 char *buf;
1513
1514 buf = kzalloc(HCLGE_MOD_REG_INFO_LEN_MAX, GFP_KERNEL);
1515 if (!buf)
1516 return -ENOMEM;
1517
1518 for (i = 0; i < size; i++) {
1519 actual_len = strlen(reg_info[i].reg_name) +
1520 HCLGE_MOD_REG_EXTRA_LEN +
1521 HCLGE_MOD_REG_VALUE_LEN * reg_info[i].group_size;
1522 if (actual_len > HCLGE_MOD_REG_INFO_LEN_MAX) {
1523 dev_info(dev, "length of reg(%s) is invalid, len=%d\n",
1524 reg_info[i].reg_name, actual_len);
1525 continue;
1526 }
1527
1528 pos = scnprintf(buf, HCLGE_MOD_REG_INFO_LEN_MAX, "%s",
1529 reg_info[i].reg_name);
1530 if (reg_info[i].has_suffix)
1531 pos += scnprintf(buf + pos,
1532 HCLGE_MOD_REG_INFO_LEN_MAX - pos, "%u",
1533 le32_to_cpu(desc->data[0]));
1534 pos += scnprintf(buf + pos,
1535 HCLGE_MOD_REG_INFO_LEN_MAX - pos,
1536 ":");
1537 for (j = 0; j < reg_info[i].group_size; j++) {
1538 offset = reg_info[i].reg_offset_group[j];
1539 index = offset % HCLGE_DESC_DATA_LEN;
1540 bd_idx = offset / HCLGE_DESC_DATA_LEN;
1541 pos += scnprintf(buf + pos,
1542 HCLGE_MOD_REG_INFO_LEN_MAX - pos,
1543 " %08x",
1544 le32_to_cpu(desc[bd_idx].data[index]));
1545 }
1546 dev_info(dev, "%s\n", buf);
1547 }
1548
1549 kfree(buf);
1550 return 0;
1551 }
1552
hclge_err_mod_check_support_cmd(enum hclge_opcode_type opcode,struct hclge_dev * hdev)1553 static bool hclge_err_mod_check_support_cmd(enum hclge_opcode_type opcode,
1554 struct hclge_dev *hdev)
1555 {
1556 if (opcode == HCLGE_OPC_DFX_GEN_REG &&
1557 !hnae3_ae_dev_gen_reg_dfx_supported(hdev))
1558 return false;
1559 return true;
1560 }
1561
1562 /* For each common msg, send cmdq to IMP and print result reg info.
1563 * If there is a parameter, loop it and request.
1564 */
1565 static void
hclge_query_reg_info(struct hclge_dev * hdev,struct hclge_mod_reg_common_msg * msg,u32 loop_time,u32 * loop_para)1566 hclge_query_reg_info(struct hclge_dev *hdev,
1567 struct hclge_mod_reg_common_msg *msg, u32 loop_time,
1568 u32 *loop_para)
1569 {
1570 int desc_len, i, ret;
1571
1572 desc_len = msg->bd_num * sizeof(struct hclge_desc);
1573 msg->desc = kzalloc(desc_len, GFP_KERNEL);
1574 if (!msg->desc) {
1575 dev_err(&hdev->pdev->dev, "failed to query reg info, ret=%d",
1576 -ENOMEM);
1577 return;
1578 }
1579
1580 for (i = 0; i < loop_time; i++) {
1581 ret = hclge_dbg_cmd_send(hdev, msg->desc, *loop_para,
1582 msg->bd_num, msg->cmd);
1583 loop_para++;
1584 if (ret)
1585 continue;
1586 ret = hclge_print_mod_reg_info(&hdev->pdev->dev, msg->desc,
1587 msg->result_regs,
1588 msg->result_regs_size);
1589 if (ret)
1590 dev_err(&hdev->pdev->dev, "failed to print mod reg info, ret=%d\n",
1591 ret);
1592 }
1593
1594 kfree(msg->desc);
1595 }
1596
hclge_query_reg_info_of_ssu(struct hclge_dev * hdev)1597 static void hclge_query_reg_info_of_ssu(struct hclge_dev *hdev)
1598 {
1599 u32 loop_para[HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE] = {0};
1600 struct hclge_mod_reg_common_msg msg;
1601 u8 i, j, num, loop_time;
1602
1603 num = ARRAY_SIZE(hclge_ssu_reg_common_msg);
1604 for (i = 0; i < num; i++) {
1605 msg = hclge_ssu_reg_common_msg[i];
1606 if (!hclge_err_mod_check_support_cmd(msg.cmd, hdev))
1607 continue;
1608 loop_time = 1;
1609 loop_para[0] = 0;
1610 if (msg.need_para) {
1611 loop_time = min(hdev->ae_dev->dev_specs.tnl_num,
1612 HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE);
1613 for (j = 0; j < loop_time; j++)
1614 loop_para[j] = j + 1;
1615 }
1616 hclge_query_reg_info(hdev, &msg, loop_time, loop_para);
1617 }
1618 }
1619
1620 static const struct hclge_hw_module_id hclge_hw_module_id_st[] = {
1621 {
1622 .module_id = MODULE_NONE,
1623 .msg = "MODULE_NONE"
1624 }, {
1625 .module_id = MODULE_BIOS_COMMON,
1626 .msg = "MODULE_BIOS_COMMON"
1627 }, {
1628 .module_id = MODULE_GE,
1629 .msg = "MODULE_GE"
1630 }, {
1631 .module_id = MODULE_IGU_EGU,
1632 .msg = "MODULE_IGU_EGU",
1633 .query_reg_info = hclge_query_reg_info_of_ssu
1634 }, {
1635 .module_id = MODULE_LGE,
1636 .msg = "MODULE_LGE"
1637 }, {
1638 .module_id = MODULE_NCSI,
1639 .msg = "MODULE_NCSI"
1640 }, {
1641 .module_id = MODULE_PPP,
1642 .msg = "MODULE_PPP"
1643 }, {
1644 .module_id = MODULE_QCN,
1645 .msg = "MODULE_QCN"
1646 }, {
1647 .module_id = MODULE_RCB_RX,
1648 .msg = "MODULE_RCB_RX"
1649 }, {
1650 .module_id = MODULE_RTC,
1651 .msg = "MODULE_RTC"
1652 }, {
1653 .module_id = MODULE_SSU,
1654 .msg = "MODULE_SSU",
1655 .query_reg_info = hclge_query_reg_info_of_ssu
1656 }, {
1657 .module_id = MODULE_TM,
1658 .msg = "MODULE_TM"
1659 }, {
1660 .module_id = MODULE_RCB_TX,
1661 .msg = "MODULE_RCB_TX"
1662 }, {
1663 .module_id = MODULE_TXDMA,
1664 .msg = "MODULE_TXDMA"
1665 }, {
1666 .module_id = MODULE_MASTER,
1667 .msg = "MODULE_MASTER"
1668 }, {
1669 .module_id = MODULE_HIMAC,
1670 .msg = "MODULE_HIMAC"
1671 }, {
1672 .module_id = MODULE_ROCEE_TOP,
1673 .msg = "MODULE_ROCEE_TOP"
1674 }, {
1675 .module_id = MODULE_ROCEE_TIMER,
1676 .msg = "MODULE_ROCEE_TIMER"
1677 }, {
1678 .module_id = MODULE_ROCEE_MDB,
1679 .msg = "MODULE_ROCEE_MDB"
1680 }, {
1681 .module_id = MODULE_ROCEE_TSP,
1682 .msg = "MODULE_ROCEE_TSP"
1683 }, {
1684 .module_id = MODULE_ROCEE_TRP,
1685 .msg = "MODULE_ROCEE_TRP"
1686 }, {
1687 .module_id = MODULE_ROCEE_SCC,
1688 .msg = "MODULE_ROCEE_SCC"
1689 }, {
1690 .module_id = MODULE_ROCEE_CAEP,
1691 .msg = "MODULE_ROCEE_CAEP"
1692 }, {
1693 .module_id = MODULE_ROCEE_GEN_AC,
1694 .msg = "MODULE_ROCEE_GEN_AC"
1695 }, {
1696 .module_id = MODULE_ROCEE_QMM,
1697 .msg = "MODULE_ROCEE_QMM"
1698 }, {
1699 .module_id = MODULE_ROCEE_LSAN,
1700 .msg = "MODULE_ROCEE_LSAN"
1701 }
1702 };
1703
1704 static const struct hclge_hw_type_id hclge_hw_type_id_st[] = {
1705 {
1706 .type_id = NONE_ERROR,
1707 .msg = "none_error"
1708 }, {
1709 .type_id = FIFO_ERROR,
1710 .msg = "fifo_error"
1711 }, {
1712 .type_id = MEMORY_ERROR,
1713 .msg = "memory_error"
1714 }, {
1715 .type_id = POISON_ERROR,
1716 .msg = "poison_error"
1717 }, {
1718 .type_id = MSIX_ECC_ERROR,
1719 .msg = "msix_ecc_error"
1720 }, {
1721 .type_id = TQP_INT_ECC_ERROR,
1722 .msg = "tqp_int_ecc_error"
1723 }, {
1724 .type_id = PF_ABNORMAL_INT_ERROR,
1725 .msg = "pf_abnormal_int_error",
1726 .cause_by_vf = true
1727 }, {
1728 .type_id = MPF_ABNORMAL_INT_ERROR,
1729 .msg = "mpf_abnormal_int_error",
1730 .cause_by_vf = true
1731 }, {
1732 .type_id = COMMON_ERROR,
1733 .msg = "common_error"
1734 }, {
1735 .type_id = PORT_ERROR,
1736 .msg = "port_error"
1737 }, {
1738 .type_id = ETS_ERROR,
1739 .msg = "ets_error"
1740 }, {
1741 .type_id = NCSI_ERROR,
1742 .msg = "ncsi_error"
1743 }, {
1744 .type_id = GLB_ERROR,
1745 .msg = "glb_error"
1746 }, {
1747 .type_id = LINK_ERROR,
1748 .msg = "link_error"
1749 }, {
1750 .type_id = PTP_ERROR,
1751 .msg = "ptp_error"
1752 }, {
1753 .type_id = ROCEE_NORMAL_ERR,
1754 .msg = "rocee_normal_error"
1755 }, {
1756 .type_id = ROCEE_OVF_ERR,
1757 .msg = "rocee_ovf_error"
1758 }, {
1759 .type_id = ROCEE_BUS_ERR,
1760 .msg = "rocee_bus_error"
1761 },
1762 };
1763
hclge_log_error(struct device * dev,char * reg,const struct hclge_hw_error * err,u32 err_sts,unsigned long * reset_requests)1764 static void hclge_log_error(struct device *dev, char *reg,
1765 const struct hclge_hw_error *err,
1766 u32 err_sts, unsigned long *reset_requests)
1767 {
1768 while (err->msg) {
1769 if (err->int_msk & err_sts) {
1770 dev_err(dev, "%s %s found [error status=0x%x]\n",
1771 reg, err->msg, err_sts);
1772 if (err->reset_level &&
1773 err->reset_level != HNAE3_NONE_RESET)
1774 set_bit(err->reset_level, reset_requests);
1775 }
1776 err++;
1777 }
1778 }
1779
1780 /* hclge_cmd_query_error: read the error information
1781 * @hdev: pointer to struct hclge_dev
1782 * @desc: descriptor for describing the command
1783 * @cmd: command opcode
1784 * @flag: flag for extended command structure
1785 *
1786 * This function query the error info from hw register/s using command
1787 */
hclge_cmd_query_error(struct hclge_dev * hdev,struct hclge_desc * desc,u32 cmd,u16 flag)1788 static int hclge_cmd_query_error(struct hclge_dev *hdev,
1789 struct hclge_desc *desc, u32 cmd, u16 flag)
1790 {
1791 struct device *dev = &hdev->pdev->dev;
1792 int desc_num = 1;
1793 int ret;
1794
1795 hclge_cmd_setup_basic_desc(&desc[0], cmd, true);
1796 if (flag) {
1797 desc[0].flag |= cpu_to_le16(flag);
1798 hclge_cmd_setup_basic_desc(&desc[1], cmd, true);
1799 desc_num = 2;
1800 }
1801
1802 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num);
1803 if (ret)
1804 dev_err(dev, "query error cmd failed (%d)\n", ret);
1805
1806 return ret;
1807 }
1808
hclge_clear_mac_tnl_int(struct hclge_dev * hdev)1809 static int hclge_clear_mac_tnl_int(struct hclge_dev *hdev)
1810 {
1811 struct hclge_desc desc;
1812
1813 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CLEAR_MAC_TNL_INT, false);
1814 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_CLR);
1815
1816 return hclge_cmd_send(&hdev->hw, &desc, 1);
1817 }
1818
hclge_config_common_hw_err_int(struct hclge_dev * hdev,bool en)1819 static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en)
1820 {
1821 struct device *dev = &hdev->pdev->dev;
1822 struct hclge_desc desc[2];
1823 int ret;
1824
1825 /* configure common error interrupts */
1826 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false);
1827 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
1828 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false);
1829
1830 if (en) {
1831 desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN);
1832 desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN |
1833 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN);
1834 desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN);
1835 desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN |
1836 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN);
1837 desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN);
1838 }
1839
1840 desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK);
1841 desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK |
1842 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK);
1843 desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK);
1844 desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK |
1845 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK);
1846 desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK);
1847
1848 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
1849 if (ret)
1850 dev_err(dev,
1851 "fail(%d) to configure common err interrupts\n", ret);
1852
1853 return ret;
1854 }
1855
hclge_config_ncsi_hw_err_int(struct hclge_dev * hdev,bool en)1856 static int hclge_config_ncsi_hw_err_int(struct hclge_dev *hdev, bool en)
1857 {
1858 struct device *dev = &hdev->pdev->dev;
1859 struct hclge_desc desc;
1860 int ret;
1861
1862 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
1863 return 0;
1864
1865 /* configure NCSI error interrupts */
1866 hclge_cmd_setup_basic_desc(&desc, HCLGE_NCSI_INT_EN, false);
1867 if (en)
1868 desc.data[0] = cpu_to_le32(HCLGE_NCSI_ERR_INT_EN);
1869
1870 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
1871 if (ret)
1872 dev_err(dev,
1873 "fail(%d) to configure NCSI error interrupts\n", ret);
1874
1875 return ret;
1876 }
1877
hclge_config_igu_egu_hw_err_int(struct hclge_dev * hdev,bool en)1878 static int hclge_config_igu_egu_hw_err_int(struct hclge_dev *hdev, bool en)
1879 {
1880 struct device *dev = &hdev->pdev->dev;
1881 struct hclge_desc desc;
1882 int ret;
1883
1884 /* configure IGU,EGU error interrupts */
1885 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_COMMON_INT_EN, false);
1886 desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_TYPE);
1887 if (en)
1888 desc.data[0] |= cpu_to_le32(HCLGE_IGU_ERR_INT_EN);
1889
1890 desc.data[1] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN_MASK);
1891
1892 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
1893 if (ret) {
1894 dev_err(dev,
1895 "fail(%d) to configure IGU common interrupts\n", ret);
1896 return ret;
1897 }
1898
1899 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_EGU_TNL_INT_EN, false);
1900 if (en)
1901 desc.data[0] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN);
1902
1903 desc.data[1] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN_MASK);
1904
1905 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
1906 if (ret) {
1907 dev_err(dev,
1908 "fail(%d) to configure IGU-EGU TNL interrupts\n", ret);
1909 return ret;
1910 }
1911
1912 ret = hclge_config_ncsi_hw_err_int(hdev, en);
1913
1914 return ret;
1915 }
1916
hclge_config_ppp_error_interrupt(struct hclge_dev * hdev,u32 cmd,bool en)1917 static int hclge_config_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd,
1918 bool en)
1919 {
1920 struct device *dev = &hdev->pdev->dev;
1921 struct hclge_desc desc[2];
1922 int ret;
1923
1924 /* configure PPP error interrupts */
1925 hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
1926 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
1927 hclge_cmd_setup_basic_desc(&desc[1], cmd, false);
1928
1929 if (cmd == HCLGE_PPP_CMD0_INT_CMD) {
1930 if (en) {
1931 desc[0].data[0] =
1932 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN);
1933 desc[0].data[1] =
1934 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN);
1935 desc[0].data[4] = cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN);
1936 }
1937
1938 desc[1].data[0] =
1939 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN_MASK);
1940 desc[1].data[1] =
1941 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN_MASK);
1942 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
1943 desc[1].data[2] =
1944 cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN_MASK);
1945 } else if (cmd == HCLGE_PPP_CMD1_INT_CMD) {
1946 if (en) {
1947 desc[0].data[0] =
1948 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN);
1949 desc[0].data[1] =
1950 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN);
1951 }
1952
1953 desc[1].data[0] =
1954 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN_MASK);
1955 desc[1].data[1] =
1956 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN_MASK);
1957 }
1958
1959 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
1960 if (ret)
1961 dev_err(dev, "fail(%d) to configure PPP error intr\n", ret);
1962
1963 return ret;
1964 }
1965
hclge_config_ppp_hw_err_int(struct hclge_dev * hdev,bool en)1966 static int hclge_config_ppp_hw_err_int(struct hclge_dev *hdev, bool en)
1967 {
1968 int ret;
1969
1970 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD0_INT_CMD,
1971 en);
1972 if (ret)
1973 return ret;
1974
1975 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD1_INT_CMD,
1976 en);
1977
1978 return ret;
1979 }
1980
hclge_config_tm_hw_err_int(struct hclge_dev * hdev,bool en)1981 static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en)
1982 {
1983 struct device *dev = &hdev->pdev->dev;
1984 struct hclge_desc desc;
1985 int ret;
1986
1987 /* configure TM SCH hw errors */
1988 hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_SCH_ECC_INT_EN, false);
1989 if (en)
1990 desc.data[0] = cpu_to_le32(HCLGE_TM_SCH_ECC_ERR_INT_EN);
1991
1992 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
1993 if (ret) {
1994 dev_err(dev, "fail(%d) to configure TM SCH errors\n", ret);
1995 return ret;
1996 }
1997
1998 /* configure TM QCN hw errors */
1999 hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_QCN_MEM_INT_CFG, false);
2000 desc.data[0] = cpu_to_le32(HCLGE_TM_QCN_ERR_INT_TYPE);
2001 if (en) {
2002 desc.data[0] |= cpu_to_le32(HCLGE_TM_QCN_FIFO_INT_EN);
2003 desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN);
2004 }
2005
2006 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
2007 if (ret)
2008 dev_err(dev,
2009 "fail(%d) to configure TM QCN mem errors\n", ret);
2010
2011 return ret;
2012 }
2013
hclge_config_mac_err_int(struct hclge_dev * hdev,bool en)2014 static int hclge_config_mac_err_int(struct hclge_dev *hdev, bool en)
2015 {
2016 struct device *dev = &hdev->pdev->dev;
2017 struct hclge_desc desc;
2018 int ret;
2019
2020 /* configure MAC common error interrupts */
2021 hclge_cmd_setup_basic_desc(&desc, HCLGE_MAC_COMMON_INT_EN, false);
2022 if (en)
2023 desc.data[0] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN);
2024
2025 desc.data[1] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN_MASK);
2026
2027 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
2028 if (ret)
2029 dev_err(dev,
2030 "fail(%d) to configure MAC COMMON error intr\n", ret);
2031
2032 return ret;
2033 }
2034
hclge_config_mac_tnl_int(struct hclge_dev * hdev,bool en)2035 int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en)
2036 {
2037 struct hclge_desc desc;
2038
2039 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_TNL_INT_EN, false);
2040 if (en)
2041 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN);
2042 else
2043 desc.data[0] = 0;
2044
2045 desc.data[1] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN_MASK);
2046
2047 return hclge_cmd_send(&hdev->hw, &desc, 1);
2048 }
2049
hclge_config_ppu_error_interrupts(struct hclge_dev * hdev,u32 cmd,bool en)2050 static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd,
2051 bool en)
2052 {
2053 struct device *dev = &hdev->pdev->dev;
2054 struct hclge_desc desc[2];
2055 int desc_num = 1;
2056 int ret;
2057
2058 /* configure PPU error interrupts */
2059 if (cmd == HCLGE_PPU_MPF_ECC_INT_CMD) {
2060 hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
2061 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
2062 hclge_cmd_setup_basic_desc(&desc[1], cmd, false);
2063 if (en) {
2064 desc[0].data[0] =
2065 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN);
2066 desc[0].data[1] =
2067 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN);
2068 desc[1].data[3] =
2069 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN);
2070 desc[1].data[4] =
2071 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN);
2072 }
2073
2074 desc[1].data[0] =
2075 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK);
2076 desc[1].data[1] =
2077 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK);
2078 desc[1].data[2] =
2079 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK);
2080 desc[1].data[3] |=
2081 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK);
2082 desc_num = 2;
2083 } else if (cmd == HCLGE_PPU_MPF_OTHER_INT_CMD) {
2084 hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
2085 if (en)
2086 desc[0].data[0] =
2087 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2);
2088
2089 desc[0].data[2] =
2090 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2_MASK);
2091 } else if (cmd == HCLGE_PPU_PF_OTHER_INT_CMD) {
2092 hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
2093 if (en)
2094 desc[0].data[0] =
2095 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN);
2096
2097 desc[0].data[2] =
2098 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN_MASK);
2099 } else {
2100 dev_err(dev, "Invalid cmd to configure PPU error interrupts\n");
2101 return -EINVAL;
2102 }
2103
2104 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num);
2105
2106 return ret;
2107 }
2108
hclge_config_ppu_hw_err_int(struct hclge_dev * hdev,bool en)2109 static int hclge_config_ppu_hw_err_int(struct hclge_dev *hdev, bool en)
2110 {
2111 struct device *dev = &hdev->pdev->dev;
2112 int ret;
2113
2114 ret = hclge_config_ppu_error_interrupts(hdev, HCLGE_PPU_MPF_ECC_INT_CMD,
2115 en);
2116 if (ret) {
2117 dev_err(dev, "fail(%d) to configure PPU MPF ECC error intr\n",
2118 ret);
2119 return ret;
2120 }
2121
2122 ret = hclge_config_ppu_error_interrupts(hdev,
2123 HCLGE_PPU_MPF_OTHER_INT_CMD,
2124 en);
2125 if (ret) {
2126 dev_err(dev, "fail(%d) to configure PPU MPF other intr\n", ret);
2127 return ret;
2128 }
2129
2130 ret = hclge_config_ppu_error_interrupts(hdev,
2131 HCLGE_PPU_PF_OTHER_INT_CMD, en);
2132 if (ret)
2133 dev_err(dev, "fail(%d) to configure PPU PF error interrupts\n",
2134 ret);
2135 return ret;
2136 }
2137
hclge_config_ssu_hw_err_int(struct hclge_dev * hdev,bool en)2138 static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en)
2139 {
2140 struct device *dev = &hdev->pdev->dev;
2141 struct hclge_desc desc[2];
2142 int ret;
2143
2144 /* configure SSU ecc error interrupts */
2145 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_ECC_INT_CMD, false);
2146 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
2147 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_ECC_INT_CMD, false);
2148 if (en) {
2149 desc[0].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN);
2150 desc[0].data[1] =
2151 cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN);
2152 desc[0].data[4] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN);
2153 }
2154
2155 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN_MASK);
2156 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN_MASK);
2157 desc[1].data[2] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN_MASK);
2158
2159 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
2160 if (ret) {
2161 dev_err(dev,
2162 "fail(%d) to configure SSU ECC error interrupt\n", ret);
2163 return ret;
2164 }
2165
2166 /* configure SSU common error interrupts */
2167 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_COMMON_INT_CMD, false);
2168 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
2169 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_COMMON_INT_CMD, false);
2170
2171 if (en) {
2172 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
2173 desc[0].data[0] =
2174 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN);
2175 else
2176 desc[0].data[0] =
2177 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN & ~BIT(5));
2178 desc[0].data[1] = cpu_to_le32(HCLGE_SSU_PORT_BASED_ERR_INT_EN);
2179 desc[0].data[2] =
2180 cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN);
2181 }
2182
2183 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_COMMON_INT_EN_MASK |
2184 HCLGE_SSU_PORT_BASED_ERR_INT_EN_MASK);
2185 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN_MASK);
2186
2187 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
2188 if (ret)
2189 dev_err(dev,
2190 "fail(%d) to configure SSU COMMON error intr\n", ret);
2191
2192 return ret;
2193 }
2194
2195 /* hclge_query_bd_num: query number of buffer descriptors
2196 * @hdev: pointer to struct hclge_dev
2197 * @is_ras: true for ras, false for msix
2198 * @mpf_bd_num: number of main PF interrupt buffer descriptors
2199 * @pf_bd_num: number of not main PF interrupt buffer descriptors
2200 *
2201 * This function querys number of mpf and pf buffer descriptors.
2202 */
hclge_query_bd_num(struct hclge_dev * hdev,bool is_ras,u32 * mpf_bd_num,u32 * pf_bd_num)2203 static int hclge_query_bd_num(struct hclge_dev *hdev, bool is_ras,
2204 u32 *mpf_bd_num, u32 *pf_bd_num)
2205 {
2206 struct device *dev = &hdev->pdev->dev;
2207 u32 mpf_min_bd_num, pf_min_bd_num;
2208 enum hclge_opcode_type opcode;
2209 struct hclge_desc desc_bd;
2210 int ret;
2211
2212 if (is_ras) {
2213 opcode = HCLGE_QUERY_RAS_INT_STS_BD_NUM;
2214 mpf_min_bd_num = HCLGE_MPF_RAS_INT_MIN_BD_NUM;
2215 pf_min_bd_num = HCLGE_PF_RAS_INT_MIN_BD_NUM;
2216 } else {
2217 opcode = HCLGE_QUERY_MSIX_INT_STS_BD_NUM;
2218 mpf_min_bd_num = HCLGE_MPF_MSIX_INT_MIN_BD_NUM;
2219 pf_min_bd_num = HCLGE_PF_MSIX_INT_MIN_BD_NUM;
2220 }
2221
2222 hclge_cmd_setup_basic_desc(&desc_bd, opcode, true);
2223 ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
2224 if (ret) {
2225 dev_err(dev, "fail(%d) to query msix int status bd num\n",
2226 ret);
2227 return ret;
2228 }
2229
2230 *mpf_bd_num = le32_to_cpu(desc_bd.data[0]);
2231 *pf_bd_num = le32_to_cpu(desc_bd.data[1]);
2232 if (*mpf_bd_num < mpf_min_bd_num || *pf_bd_num < pf_min_bd_num) {
2233 dev_err(dev, "Invalid bd num: mpf(%u), pf(%u)\n",
2234 *mpf_bd_num, *pf_bd_num);
2235 return -EINVAL;
2236 }
2237
2238 return 0;
2239 }
2240
2241 /* hclge_handle_mpf_ras_error: handle all main PF RAS errors
2242 * @hdev: pointer to struct hclge_dev
2243 * @desc: descriptor for describing the command
2244 * @num: number of extended command structures
2245 *
2246 * This function handles all the main PF RAS errors in the
2247 * hw register/s using command.
2248 */
hclge_handle_mpf_ras_error(struct hclge_dev * hdev,struct hclge_desc * desc,int num)2249 static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
2250 struct hclge_desc *desc,
2251 int num)
2252 {
2253 struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
2254 struct device *dev = &hdev->pdev->dev;
2255 __le32 *desc_data;
2256 u32 status;
2257 int ret;
2258
2259 /* query all main PF RAS errors */
2260 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_MPF_RAS_INT,
2261 true);
2262 ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
2263 if (ret) {
2264 dev_err(dev, "query all mpf ras int cmd failed (%d)\n", ret);
2265 return ret;
2266 }
2267
2268 /* log HNS common errors */
2269 status = le32_to_cpu(desc[0].data[0]);
2270 if (status)
2271 hclge_log_error(dev, "IMP_TCM_ECC_INT_STS",
2272 &hclge_imp_tcm_ecc_int[0], status,
2273 &ae_dev->hw_err_reset_req);
2274
2275 status = le32_to_cpu(desc[0].data[1]);
2276 if (status)
2277 hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS",
2278 &hclge_cmdq_nic_mem_ecc_int[0], status,
2279 &ae_dev->hw_err_reset_req);
2280
2281 if ((le32_to_cpu(desc[0].data[2])) & BIT(0))
2282 dev_warn(dev, "imp_rd_data_poison_err found\n");
2283
2284 status = le32_to_cpu(desc[0].data[3]);
2285 if (status)
2286 hclge_log_error(dev, "TQP_INT_ECC_INT_STS",
2287 &hclge_tqp_int_ecc_int[0], status,
2288 &ae_dev->hw_err_reset_req);
2289
2290 status = le32_to_cpu(desc[0].data[4]);
2291 if (status)
2292 hclge_log_error(dev, "MSIX_ECC_INT_STS",
2293 &hclge_msix_sram_ecc_int[0], status,
2294 &ae_dev->hw_err_reset_req);
2295
2296 /* log SSU(Storage Switch Unit) errors */
2297 desc_data = (__le32 *)&desc[2];
2298 status = le32_to_cpu(*(desc_data + 2));
2299 if (status)
2300 hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0",
2301 &hclge_ssu_mem_ecc_err_int[0], status,
2302 &ae_dev->hw_err_reset_req);
2303
2304 status = le32_to_cpu(*(desc_data + 3)) & BIT(0);
2305 if (status) {
2306 dev_err(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_mem32_ecc_mbit_err found [error status=0x%x]\n",
2307 status);
2308 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req);
2309 }
2310
2311 status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK;
2312 if (status)
2313 hclge_log_error(dev, "SSU_COMMON_ERR_INT",
2314 &hclge_ssu_com_err_int[0], status,
2315 &ae_dev->hw_err_reset_req);
2316
2317 /* log IGU(Ingress Unit) errors */
2318 desc_data = (__le32 *)&desc[3];
2319 status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK;
2320 if (status)
2321 hclge_log_error(dev, "IGU_INT_STS",
2322 &hclge_igu_int[0], status,
2323 &ae_dev->hw_err_reset_req);
2324
2325 /* log PPP(Programmable Packet Process) errors */
2326 desc_data = (__le32 *)&desc[4];
2327 status = le32_to_cpu(*(desc_data + 1));
2328 if (status)
2329 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1",
2330 &hclge_ppp_mpf_abnormal_int_st1[0], status,
2331 &ae_dev->hw_err_reset_req);
2332
2333 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK;
2334 if (status)
2335 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3",
2336 &hclge_ppp_mpf_abnormal_int_st3[0], status,
2337 &ae_dev->hw_err_reset_req);
2338
2339 /* log PPU(RCB) errors */
2340 desc_data = (__le32 *)&desc[5];
2341 status = le32_to_cpu(*(desc_data + 1));
2342 if (status) {
2343 dev_err(dev,
2344 "PPU_MPF_ABNORMAL_INT_ST1 rpu_rx_pkt_ecc_mbit_err found\n");
2345 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req);
2346 }
2347
2348 status = le32_to_cpu(*(desc_data + 2));
2349 if (status)
2350 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
2351 &hclge_ppu_mpf_abnormal_int_st2[0], status,
2352 &ae_dev->hw_err_reset_req);
2353
2354 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK;
2355 if (status)
2356 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3",
2357 &hclge_ppu_mpf_abnormal_int_st3[0], status,
2358 &ae_dev->hw_err_reset_req);
2359
2360 /* log TM(Traffic Manager) errors */
2361 desc_data = (__le32 *)&desc[6];
2362 status = le32_to_cpu(*desc_data);
2363 if (status)
2364 hclge_log_error(dev, "TM_SCH_RINT",
2365 &hclge_tm_sch_rint[0], status,
2366 &ae_dev->hw_err_reset_req);
2367
2368 /* log QCN(Quantized Congestion Control) errors */
2369 desc_data = (__le32 *)&desc[7];
2370 status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK;
2371 if (status)
2372 hclge_log_error(dev, "QCN_FIFO_RINT",
2373 &hclge_qcn_fifo_rint[0], status,
2374 &ae_dev->hw_err_reset_req);
2375
2376 status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK;
2377 if (status)
2378 hclge_log_error(dev, "QCN_ECC_RINT",
2379 &hclge_qcn_ecc_rint[0], status,
2380 &ae_dev->hw_err_reset_req);
2381
2382 /* log NCSI errors */
2383 desc_data = (__le32 *)&desc[9];
2384 status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK;
2385 if (status)
2386 hclge_log_error(dev, "NCSI_ECC_INT_RPT",
2387 &hclge_ncsi_err_int[0], status,
2388 &ae_dev->hw_err_reset_req);
2389
2390 /* clear all main PF RAS errors */
2391 hclge_comm_cmd_reuse_desc(&desc[0], false);
2392 ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
2393 if (ret)
2394 dev_err(dev, "clear all mpf ras int cmd failed (%d)\n", ret);
2395
2396 return ret;
2397 }
2398
2399 /* hclge_handle_pf_ras_error: handle all PF RAS errors
2400 * @hdev: pointer to struct hclge_dev
2401 * @desc: descriptor for describing the command
2402 * @num: number of extended command structures
2403 *
2404 * This function handles all the PF RAS errors in the
2405 * hw registers using command.
2406 */
hclge_handle_pf_ras_error(struct hclge_dev * hdev,struct hclge_desc * desc,int num)2407 static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
2408 struct hclge_desc *desc,
2409 int num)
2410 {
2411 struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
2412 struct device *dev = &hdev->pdev->dev;
2413 __le32 *desc_data;
2414 u32 status;
2415 int ret;
2416
2417 /* query all PF RAS errors */
2418 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_PF_RAS_INT,
2419 true);
2420 ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
2421 if (ret) {
2422 dev_err(dev, "query all pf ras int cmd failed (%d)\n", ret);
2423 return ret;
2424 }
2425
2426 /* log SSU(Storage Switch Unit) errors */
2427 status = le32_to_cpu(desc[0].data[0]);
2428 if (status)
2429 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
2430 &hclge_ssu_port_based_err_int[0], status,
2431 &ae_dev->hw_err_reset_req);
2432
2433 status = le32_to_cpu(desc[0].data[1]);
2434 if (status)
2435 hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT",
2436 &hclge_ssu_fifo_overflow_int[0], status,
2437 &ae_dev->hw_err_reset_req);
2438
2439 status = le32_to_cpu(desc[0].data[2]);
2440 if (status)
2441 hclge_log_error(dev, "SSU_ETS_TCG_INT",
2442 &hclge_ssu_ets_tcg_int[0], status,
2443 &ae_dev->hw_err_reset_req);
2444
2445 /* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */
2446 desc_data = (__le32 *)&desc[1];
2447 status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK;
2448 if (status)
2449 hclge_log_error(dev, "IGU_EGU_TNL_INT_STS",
2450 &hclge_igu_egu_tnl_int[0], status,
2451 &ae_dev->hw_err_reset_req);
2452
2453 /* log PPU(RCB) errors */
2454 desc_data = (__le32 *)&desc[3];
2455 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK;
2456 if (status) {
2457 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0",
2458 &hclge_ppu_pf_abnormal_int[0], status,
2459 &ae_dev->hw_err_reset_req);
2460 hclge_report_hw_error(hdev, HNAE3_PPU_POISON_ERROR);
2461 }
2462
2463 /* clear all PF RAS errors */
2464 hclge_comm_cmd_reuse_desc(&desc[0], false);
2465 ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
2466 if (ret)
2467 dev_err(dev, "clear all pf ras int cmd failed (%d)\n", ret);
2468
2469 return ret;
2470 }
2471
hclge_handle_all_ras_errors(struct hclge_dev * hdev)2472 static int hclge_handle_all_ras_errors(struct hclge_dev *hdev)
2473 {
2474 u32 mpf_bd_num, pf_bd_num, bd_num;
2475 struct hclge_desc *desc;
2476 int ret;
2477
2478 /* query the number of registers in the RAS int status */
2479 ret = hclge_query_bd_num(hdev, true, &mpf_bd_num, &pf_bd_num);
2480 if (ret)
2481 return ret;
2482
2483 bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
2484 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
2485 if (!desc)
2486 return -ENOMEM;
2487
2488 /* handle all main PF RAS errors */
2489 ret = hclge_handle_mpf_ras_error(hdev, desc, mpf_bd_num);
2490 if (ret) {
2491 kfree(desc);
2492 return ret;
2493 }
2494 memset(desc, 0, bd_num * sizeof(struct hclge_desc));
2495
2496 /* handle all PF RAS errors */
2497 ret = hclge_handle_pf_ras_error(hdev, desc, pf_bd_num);
2498 kfree(desc);
2499
2500 return ret;
2501 }
2502
hclge_log_rocee_axi_error(struct hclge_dev * hdev)2503 static int hclge_log_rocee_axi_error(struct hclge_dev *hdev)
2504 {
2505 struct device *dev = &hdev->pdev->dev;
2506 struct hclge_desc desc[3];
2507 int ret;
2508
2509 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
2510 true);
2511 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
2512 true);
2513 hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
2514 true);
2515 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
2516 desc[1].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
2517
2518 ret = hclge_cmd_send(&hdev->hw, &desc[0], 3);
2519 if (ret) {
2520 dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret);
2521 return ret;
2522 }
2523
2524 dev_err(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n",
2525 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]),
2526 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]),
2527 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5]));
2528 dev_err(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n",
2529 le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]),
2530 le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]),
2531 le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5]));
2532 dev_err(dev, "AXI3: %08X %08X %08X %08X\n",
2533 le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]),
2534 le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3]));
2535
2536 return 0;
2537 }
2538
hclge_log_rocee_ecc_error(struct hclge_dev * hdev)2539 static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev)
2540 {
2541 struct device *dev = &hdev->pdev->dev;
2542 struct hclge_desc desc[2];
2543 int ret;
2544
2545 ret = hclge_cmd_query_error(hdev, &desc[0],
2546 HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD,
2547 HCLGE_COMM_CMD_FLAG_NEXT);
2548 if (ret) {
2549 dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret);
2550 return ret;
2551 }
2552
2553 dev_err(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n",
2554 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]),
2555 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]),
2556 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5]));
2557 dev_err(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]),
2558 le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2]));
2559
2560 return 0;
2561 }
2562
hclge_log_rocee_ovf_error(struct hclge_dev * hdev)2563 static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev)
2564 {
2565 struct device *dev = &hdev->pdev->dev;
2566 struct hclge_desc desc[2];
2567 int ret;
2568
2569 /* read overflow error status */
2570 ret = hclge_cmd_query_error(hdev, &desc[0], HCLGE_ROCEE_PF_RAS_INT_CMD,
2571 0);
2572 if (ret) {
2573 dev_err(dev, "failed(%d) to query ROCEE OVF error sts\n", ret);
2574 return ret;
2575 }
2576
2577 /* log overflow error */
2578 if (le32_to_cpu(desc[0].data[0]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) {
2579 const struct hclge_hw_error *err;
2580 u32 err_sts;
2581
2582 err = &hclge_rocee_qmm_ovf_err_int[0];
2583 err_sts = HCLGE_ROCEE_OVF_ERR_TYPE_MASK &
2584 le32_to_cpu(desc[0].data[0]);
2585 while (err->msg) {
2586 if (err->int_msk == err_sts) {
2587 dev_err(dev, "%s [error status=0x%x] found\n",
2588 err->msg,
2589 le32_to_cpu(desc[0].data[0]));
2590 break;
2591 }
2592 err++;
2593 }
2594 }
2595
2596 if (le32_to_cpu(desc[0].data[1]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) {
2597 dev_err(dev, "ROCEE TSP OVF [error status=0x%x] found\n",
2598 le32_to_cpu(desc[0].data[1]));
2599 }
2600
2601 if (le32_to_cpu(desc[0].data[2]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) {
2602 dev_err(dev, "ROCEE SCC OVF [error status=0x%x] found\n",
2603 le32_to_cpu(desc[0].data[2]));
2604 }
2605
2606 return 0;
2607 }
2608
2609 static enum hnae3_reset_type
hclge_log_and_clear_rocee_ras_error(struct hclge_dev * hdev)2610 hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev)
2611 {
2612 enum hnae3_reset_type reset_type = HNAE3_NONE_RESET;
2613 struct device *dev = &hdev->pdev->dev;
2614 struct hclge_desc desc[2];
2615 unsigned int status;
2616 int ret;
2617
2618 /* read RAS error interrupt status */
2619 ret = hclge_cmd_query_error(hdev, &desc[0],
2620 HCLGE_QUERY_CLEAR_ROCEE_RAS_INT, 0);
2621 if (ret) {
2622 dev_err(dev, "failed(%d) to query ROCEE RAS INT SRC\n", ret);
2623 /* reset everything for now */
2624 return HNAE3_GLOBAL_RESET;
2625 }
2626
2627 status = le32_to_cpu(desc[0].data[0]);
2628 if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) {
2629 if (status & HCLGE_ROCEE_RERR_INT_MASK)
2630 dev_err(dev, "ROCEE RAS AXI rresp error\n");
2631
2632 if (status & HCLGE_ROCEE_BERR_INT_MASK)
2633 dev_err(dev, "ROCEE RAS AXI bresp error\n");
2634
2635 reset_type = HNAE3_FUNC_RESET;
2636
2637 hclge_report_hw_error(hdev, HNAE3_ROCEE_AXI_RESP_ERROR);
2638
2639 ret = hclge_log_rocee_axi_error(hdev);
2640 if (ret)
2641 return HNAE3_GLOBAL_RESET;
2642 }
2643
2644 if (status & HCLGE_ROCEE_ECC_INT_MASK) {
2645 dev_err(dev, "ROCEE RAS 2bit ECC error\n");
2646 reset_type = HNAE3_GLOBAL_RESET;
2647
2648 ret = hclge_log_rocee_ecc_error(hdev);
2649 if (ret)
2650 return HNAE3_GLOBAL_RESET;
2651 }
2652
2653 if (status & HCLGE_ROCEE_OVF_INT_MASK) {
2654 ret = hclge_log_rocee_ovf_error(hdev);
2655 if (ret) {
2656 dev_err(dev, "failed(%d) to process ovf error\n", ret);
2657 /* reset everything for now */
2658 return HNAE3_GLOBAL_RESET;
2659 }
2660 }
2661
2662 /* clear error status */
2663 hclge_comm_cmd_reuse_desc(&desc[0], false);
2664 ret = hclge_cmd_send(&hdev->hw, &desc[0], 1);
2665 if (ret) {
2666 dev_err(dev, "failed(%d) to clear ROCEE RAS error\n", ret);
2667 /* reset everything for now */
2668 return HNAE3_GLOBAL_RESET;
2669 }
2670
2671 return reset_type;
2672 }
2673
hclge_config_rocee_ras_interrupt(struct hclge_dev * hdev,bool en)2674 int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en)
2675 {
2676 struct device *dev = &hdev->pdev->dev;
2677 struct hclge_desc desc;
2678 int ret;
2679
2680 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2 ||
2681 !hnae3_dev_roce_supported(hdev))
2682 return 0;
2683
2684 hclge_cmd_setup_basic_desc(&desc, HCLGE_CONFIG_ROCEE_RAS_INT_EN, false);
2685 if (en) {
2686 /* enable ROCEE hw error interrupts */
2687 desc.data[0] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN);
2688 desc.data[1] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN);
2689
2690 hclge_log_and_clear_rocee_ras_error(hdev);
2691 }
2692 desc.data[2] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN_MASK);
2693 desc.data[3] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN_MASK);
2694
2695 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
2696 if (ret)
2697 dev_err(dev, "failed(%d) to config ROCEE RAS interrupt\n", ret);
2698
2699 return ret;
2700 }
2701
hclge_handle_rocee_ras_error(struct hnae3_ae_dev * ae_dev)2702 static void hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev)
2703 {
2704 struct hclge_dev *hdev = ae_dev->priv;
2705 enum hnae3_reset_type reset_type;
2706
2707 if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
2708 return;
2709
2710 reset_type = hclge_log_and_clear_rocee_ras_error(hdev);
2711 if (reset_type != HNAE3_NONE_RESET)
2712 set_bit(reset_type, &ae_dev->hw_err_reset_req);
2713 }
2714
2715 static const struct hclge_hw_blk hw_blk[] = {
2716 {
2717 .msk = BIT(0),
2718 .name = "IGU_EGU",
2719 .config_err_int = hclge_config_igu_egu_hw_err_int,
2720 }, {
2721 .msk = BIT(1),
2722 .name = "PPP",
2723 .config_err_int = hclge_config_ppp_hw_err_int,
2724 }, {
2725 .msk = BIT(2),
2726 .name = "SSU",
2727 .config_err_int = hclge_config_ssu_hw_err_int,
2728 }, {
2729 .msk = BIT(3),
2730 .name = "PPU",
2731 .config_err_int = hclge_config_ppu_hw_err_int,
2732 }, {
2733 .msk = BIT(4),
2734 .name = "TM",
2735 .config_err_int = hclge_config_tm_hw_err_int,
2736 }, {
2737 .msk = BIT(5),
2738 .name = "COMMON",
2739 .config_err_int = hclge_config_common_hw_err_int,
2740 }, {
2741 .msk = BIT(8),
2742 .name = "MAC",
2743 .config_err_int = hclge_config_mac_err_int,
2744 }, {
2745 /* sentinel */
2746 }
2747 };
2748
hclge_config_all_msix_error(struct hclge_dev * hdev,bool enable)2749 static void hclge_config_all_msix_error(struct hclge_dev *hdev, bool enable)
2750 {
2751 u32 reg_val;
2752
2753 reg_val = hclge_read_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG);
2754
2755 if (enable)
2756 reg_val |= BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B);
2757 else
2758 reg_val &= ~BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B);
2759
2760 hclge_write_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG, reg_val);
2761 }
2762
hclge_config_nic_hw_error(struct hclge_dev * hdev,bool state)2763 int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state)
2764 {
2765 const struct hclge_hw_blk *module = hw_blk;
2766 int ret = 0;
2767
2768 hclge_config_all_msix_error(hdev, state);
2769
2770 while (module->name) {
2771 if (module->config_err_int) {
2772 ret = module->config_err_int(hdev, state);
2773 if (ret)
2774 return ret;
2775 }
2776 module++;
2777 }
2778
2779 return ret;
2780 }
2781
hclge_handle_hw_ras_error(struct hnae3_ae_dev * ae_dev)2782 pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev)
2783 {
2784 struct hclge_dev *hdev = ae_dev->priv;
2785 struct device *dev = &hdev->pdev->dev;
2786 u32 status;
2787
2788 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) {
2789 dev_err(dev,
2790 "Can't recover - RAS error reported during dev init\n");
2791 return PCI_ERS_RESULT_NONE;
2792 }
2793
2794 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
2795 if (status & HCLGE_RAS_REG_NFE_MASK ||
2796 status & HCLGE_RAS_REG_ROCEE_ERR_MASK)
2797 ae_dev->hw_err_reset_req = 0;
2798 else
2799 goto out;
2800
2801 /* Handling Non-fatal HNS RAS errors */
2802 if (status & HCLGE_RAS_REG_NFE_MASK) {
2803 dev_err(dev,
2804 "HNS Non-Fatal RAS error(status=0x%x) identified\n",
2805 status);
2806 hclge_handle_all_ras_errors(hdev);
2807 }
2808
2809 /* Handling Non-fatal Rocee RAS errors */
2810 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2 &&
2811 status & HCLGE_RAS_REG_ROCEE_ERR_MASK) {
2812 dev_err(dev, "ROCEE Non-Fatal RAS error identified\n");
2813 hclge_handle_rocee_ras_error(ae_dev);
2814 }
2815
2816 if (ae_dev->hw_err_reset_req)
2817 return PCI_ERS_RESULT_NEED_RESET;
2818
2819 out:
2820 return PCI_ERS_RESULT_RECOVERED;
2821 }
2822
hclge_clear_hw_msix_error(struct hclge_dev * hdev,struct hclge_desc * desc,bool is_mpf,u32 bd_num)2823 static int hclge_clear_hw_msix_error(struct hclge_dev *hdev,
2824 struct hclge_desc *desc, bool is_mpf,
2825 u32 bd_num)
2826 {
2827 if (is_mpf)
2828 desc[0].opcode =
2829 cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT);
2830 else
2831 desc[0].opcode = cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT);
2832
2833 desc[0].flag = cpu_to_le16(HCLGE_COMM_CMD_FLAG_NO_INTR |
2834 HCLGE_COMM_CMD_FLAG_IN);
2835
2836 return hclge_cmd_send(&hdev->hw, &desc[0], bd_num);
2837 }
2838
2839 /* hclge_query_8bd_info: query information about over_8bd_nfe_err
2840 * @hdev: pointer to struct hclge_dev
2841 * @vf_id: Index of the virtual function with error
2842 * @q_id: Physical index of the queue with error
2843 *
2844 * This function get specific index of queue and function which causes
2845 * over_8bd_nfe_err by using command. If vf_id is 0, it means error is
2846 * caused by PF instead of VF.
2847 */
hclge_query_over_8bd_err_info(struct hclge_dev * hdev,u16 * vf_id,u16 * q_id)2848 static int hclge_query_over_8bd_err_info(struct hclge_dev *hdev, u16 *vf_id,
2849 u16 *q_id)
2850 {
2851 struct hclge_query_ppu_pf_other_int_dfx_cmd *req;
2852 struct hclge_desc desc;
2853 int ret;
2854
2855 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PPU_PF_OTHER_INT_DFX, true);
2856 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
2857 if (ret)
2858 return ret;
2859
2860 req = (struct hclge_query_ppu_pf_other_int_dfx_cmd *)desc.data;
2861 *vf_id = le16_to_cpu(req->over_8bd_no_fe_vf_id);
2862 *q_id = le16_to_cpu(req->over_8bd_no_fe_qid);
2863
2864 return 0;
2865 }
2866
2867 /* hclge_handle_over_8bd_err: handle MSI-X error named over_8bd_nfe_err
2868 * @hdev: pointer to struct hclge_dev
2869 * @reset_requests: reset level that we need to trigger later
2870 *
2871 * over_8bd_nfe_err is a special MSI-X because it may caused by a VF, in
2872 * that case, we need to trigger VF reset. Otherwise, a PF reset is needed.
2873 */
hclge_handle_over_8bd_err(struct hclge_dev * hdev,unsigned long * reset_requests)2874 static void hclge_handle_over_8bd_err(struct hclge_dev *hdev,
2875 unsigned long *reset_requests)
2876 {
2877 struct device *dev = &hdev->pdev->dev;
2878 u16 vf_id;
2879 u16 q_id;
2880 int ret;
2881
2882 ret = hclge_query_over_8bd_err_info(hdev, &vf_id, &q_id);
2883 if (ret) {
2884 dev_err(dev, "fail(%d) to query over_8bd_no_fe info\n",
2885 ret);
2886 return;
2887 }
2888
2889 dev_err(dev, "PPU_PF_ABNORMAL_INT_ST over_8bd_no_fe found, vport(%u), queue_id(%u)\n",
2890 vf_id, q_id);
2891
2892 if (vf_id) {
2893 if (vf_id >= hdev->num_alloc_vport) {
2894 dev_err(dev, "invalid vport(%u)\n", vf_id);
2895 return;
2896 }
2897
2898 /* If we need to trigger other reset whose level is higher
2899 * than HNAE3_VF_FUNC_RESET, no need to trigger a VF reset
2900 * here.
2901 */
2902 if (*reset_requests != 0)
2903 return;
2904
2905 ret = hclge_inform_reset_assert_to_vf(&hdev->vport[vf_id]);
2906 if (ret)
2907 dev_err(dev, "inform reset to vport(%u) failed %d!\n",
2908 vf_id, ret);
2909 } else {
2910 set_bit(HNAE3_FUNC_RESET, reset_requests);
2911 }
2912 }
2913
2914 /* hclge_handle_mpf_msix_error: handle all main PF MSI-X errors
2915 * @hdev: pointer to struct hclge_dev
2916 * @desc: descriptor for describing the command
2917 * @mpf_bd_num: number of extended command structures
2918 * @reset_requests: record of the reset level that we need
2919 *
2920 * This function handles all the main PF MSI-X errors in the hw register/s
2921 * using command.
2922 */
hclge_handle_mpf_msix_error(struct hclge_dev * hdev,struct hclge_desc * desc,int mpf_bd_num,unsigned long * reset_requests)2923 static int hclge_handle_mpf_msix_error(struct hclge_dev *hdev,
2924 struct hclge_desc *desc,
2925 int mpf_bd_num,
2926 unsigned long *reset_requests)
2927 {
2928 struct device *dev = &hdev->pdev->dev;
2929 __le32 *desc_data;
2930 u32 status;
2931 int ret;
2932 /* query all main PF MSIx errors */
2933 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT,
2934 true);
2935 ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num);
2936 if (ret) {
2937 dev_err(dev, "query all mpf msix int cmd failed (%d)\n", ret);
2938 return ret;
2939 }
2940
2941 /* log MAC errors */
2942 desc_data = (__le32 *)&desc[1];
2943 status = le32_to_cpu(*desc_data);
2944 if (status)
2945 hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R",
2946 &hclge_mac_afifo_tnl_int[0], status,
2947 reset_requests);
2948
2949 /* log PPU(RCB) MPF errors */
2950 desc_data = (__le32 *)&desc[5];
2951 status = le32_to_cpu(*(desc_data + 2)) &
2952 HCLGE_PPU_MPF_INT_ST2_MSIX_MASK;
2953 if (status)
2954 dev_err(dev, "PPU_MPF_ABNORMAL_INT_ST2 rx_q_search_miss found [dfx status=0x%x\n]",
2955 status);
2956
2957 /* clear all main PF MSIx errors */
2958 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num);
2959 if (ret)
2960 dev_err(dev, "clear all mpf msix int cmd failed (%d)\n", ret);
2961
2962 return ret;
2963 }
2964
2965 /* hclge_handle_pf_msix_error: handle all PF MSI-X errors
2966 * @hdev: pointer to struct hclge_dev
2967 * @desc: descriptor for describing the command
2968 * @mpf_bd_num: number of extended command structures
2969 * @reset_requests: record of the reset level that we need
2970 *
2971 * This function handles all the PF MSI-X errors in the hw register/s using
2972 * command.
2973 */
hclge_handle_pf_msix_error(struct hclge_dev * hdev,struct hclge_desc * desc,int pf_bd_num,unsigned long * reset_requests)2974 static int hclge_handle_pf_msix_error(struct hclge_dev *hdev,
2975 struct hclge_desc *desc,
2976 int pf_bd_num,
2977 unsigned long *reset_requests)
2978 {
2979 struct device *dev = &hdev->pdev->dev;
2980 __le32 *desc_data;
2981 u32 status;
2982 int ret;
2983
2984 /* query all PF MSIx errors */
2985 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT,
2986 true);
2987 ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num);
2988 if (ret) {
2989 dev_err(dev, "query all pf msix int cmd failed (%d)\n", ret);
2990 return ret;
2991 }
2992
2993 /* log SSU PF errors */
2994 status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK;
2995 if (status)
2996 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
2997 &hclge_ssu_port_based_pf_int[0],
2998 status, reset_requests);
2999
3000 /* read and log PPP PF errors */
3001 desc_data = (__le32 *)&desc[2];
3002 status = le32_to_cpu(*desc_data);
3003 if (status)
3004 hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0",
3005 &hclge_ppp_pf_abnormal_int[0],
3006 status, reset_requests);
3007
3008 /* log PPU(RCB) PF errors */
3009 desc_data = (__le32 *)&desc[3];
3010 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK;
3011 if (status)
3012 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST",
3013 &hclge_ppu_pf_abnormal_int[0],
3014 status, reset_requests);
3015
3016 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_OVER_8BD_ERR_MASK;
3017 if (status)
3018 hclge_handle_over_8bd_err(hdev, reset_requests);
3019
3020 /* clear all PF MSIx errors */
3021 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num);
3022 if (ret)
3023 dev_err(dev, "clear all pf msix int cmd failed (%d)\n", ret);
3024
3025 return ret;
3026 }
3027
hclge_handle_all_hw_msix_error(struct hclge_dev * hdev,unsigned long * reset_requests)3028 static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev,
3029 unsigned long *reset_requests)
3030 {
3031 u32 mpf_bd_num, pf_bd_num, bd_num;
3032 struct hclge_desc *desc;
3033 int ret;
3034
3035 /* query the number of bds for the MSIx int status */
3036 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num);
3037 if (ret)
3038 goto out;
3039
3040 bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
3041 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
3042 if (!desc)
3043 return -ENOMEM;
3044
3045 ret = hclge_handle_mpf_msix_error(hdev, desc, mpf_bd_num,
3046 reset_requests);
3047 if (ret)
3048 goto msi_error;
3049
3050 memset(desc, 0, bd_num * sizeof(struct hclge_desc));
3051 ret = hclge_handle_pf_msix_error(hdev, desc, pf_bd_num, reset_requests);
3052 if (ret)
3053 goto msi_error;
3054
3055 ret = hclge_handle_mac_tnl(hdev);
3056
3057 msi_error:
3058 kfree(desc);
3059 out:
3060 return ret;
3061 }
3062
hclge_handle_hw_msix_error(struct hclge_dev * hdev,unsigned long * reset_requests)3063 int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
3064 unsigned long *reset_requests)
3065 {
3066 struct device *dev = &hdev->pdev->dev;
3067
3068 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) {
3069 dev_err(dev,
3070 "failed to handle msix error during dev init\n");
3071 return -EAGAIN;
3072 }
3073
3074 return hclge_handle_all_hw_msix_error(hdev, reset_requests);
3075 }
3076
hclge_handle_mac_tnl(struct hclge_dev * hdev)3077 int hclge_handle_mac_tnl(struct hclge_dev *hdev)
3078 {
3079 struct hclge_mac_tnl_stats mac_tnl_stats;
3080 struct device *dev = &hdev->pdev->dev;
3081 struct hclge_desc desc;
3082 u32 status;
3083 int ret;
3084
3085 /* query and clear mac tnl interruptions */
3086 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_MAC_TNL_INT, true);
3087 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
3088 if (ret) {
3089 dev_err(dev, "failed to query mac tnl int, ret = %d.\n", ret);
3090 return ret;
3091 }
3092
3093 status = le32_to_cpu(desc.data[0]);
3094 if (status) {
3095 /* When mac tnl interrupt occurs, we record current time and
3096 * register status here in a fifo, then clear the status. So
3097 * that if link status changes suddenly at some time, we can
3098 * query them by debugfs.
3099 */
3100 mac_tnl_stats.time = local_clock();
3101 mac_tnl_stats.status = status;
3102 kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats);
3103 ret = hclge_clear_mac_tnl_int(hdev);
3104 if (ret)
3105 dev_err(dev, "failed to clear mac tnl int, ret = %d.\n",
3106 ret);
3107 }
3108
3109 return ret;
3110 }
3111
hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev * ae_dev)3112 void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
3113 {
3114 struct hclge_dev *hdev = ae_dev->priv;
3115 struct device *dev = &hdev->pdev->dev;
3116 u32 mpf_bd_num, pf_bd_num, bd_num;
3117 struct hclge_desc *desc;
3118 u32 status;
3119 int ret;
3120
3121 ae_dev->hw_err_reset_req = 0;
3122 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
3123
3124 /* query the number of bds for the MSIx int status */
3125 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num);
3126 if (ret)
3127 return;
3128
3129 bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
3130 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
3131 if (!desc)
3132 return;
3133
3134 /* Clear HNS hw errors reported through msix */
3135 memset(&desc[0].data[0], 0xFF, mpf_bd_num * sizeof(struct hclge_desc) -
3136 HCLGE_DESC_NO_DATA_LEN);
3137 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num);
3138 if (ret) {
3139 dev_err(dev, "fail(%d) to clear mpf msix int during init\n",
3140 ret);
3141 goto msi_error;
3142 }
3143
3144 memset(&desc[0].data[0], 0xFF, pf_bd_num * sizeof(struct hclge_desc) -
3145 HCLGE_DESC_NO_DATA_LEN);
3146 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num);
3147 if (ret) {
3148 dev_err(dev, "fail(%d) to clear pf msix int during init\n",
3149 ret);
3150 goto msi_error;
3151 }
3152
3153 /* Handle Non-fatal HNS RAS errors */
3154 if (status & HCLGE_RAS_REG_NFE_MASK) {
3155 dev_err(dev, "HNS hw error(RAS) identified during init\n");
3156 hclge_handle_all_ras_errors(hdev);
3157 }
3158
3159 msi_error:
3160 kfree(desc);
3161 }
3162
hclge_find_error_source(struct hclge_dev * hdev)3163 bool hclge_find_error_source(struct hclge_dev *hdev)
3164 {
3165 u32 msix_src_flag, hw_err_src_flag;
3166
3167 msix_src_flag = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS) &
3168 HCLGE_VECTOR0_REG_MSIX_MASK;
3169
3170 hw_err_src_flag = hclge_read_dev(&hdev->hw,
3171 HCLGE_RAS_PF_OTHER_INT_STS_REG) &
3172 HCLGE_RAS_REG_ERR_MASK;
3173
3174 return msix_src_flag || hw_err_src_flag;
3175 }
3176
hclge_handle_occurred_error(struct hclge_dev * hdev)3177 void hclge_handle_occurred_error(struct hclge_dev *hdev)
3178 {
3179 struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
3180
3181 if (hclge_find_error_source(hdev))
3182 hclge_handle_error_info_log(ae_dev);
3183 }
3184
3185 static bool
hclge_handle_error_type_reg_log(struct hclge_dev * hdev,struct hclge_mod_err_info * mod_info,struct hclge_type_reg_err_info * type_reg_info)3186 hclge_handle_error_type_reg_log(struct hclge_dev *hdev,
3187 struct hclge_mod_err_info *mod_info,
3188 struct hclge_type_reg_err_info *type_reg_info)
3189 {
3190 #define HCLGE_ERR_TYPE_MASK 0x7F
3191 #define HCLGE_ERR_TYPE_IS_RAS_OFFSET 7
3192
3193 u8 mod_id, total_module, type_id, total_type, i, is_ras;
3194 struct device *dev = &hdev->pdev->dev;
3195 u8 index_module = MODULE_NONE;
3196 u8 index_type = NONE_ERROR;
3197 bool cause_by_vf = false;
3198
3199 mod_id = mod_info->mod_id;
3200 type_id = type_reg_info->type_id & HCLGE_ERR_TYPE_MASK;
3201 is_ras = type_reg_info->type_id >> HCLGE_ERR_TYPE_IS_RAS_OFFSET;
3202
3203 total_module = ARRAY_SIZE(hclge_hw_module_id_st);
3204 total_type = ARRAY_SIZE(hclge_hw_type_id_st);
3205
3206 for (i = 0; i < total_module; i++) {
3207 if (mod_id == hclge_hw_module_id_st[i].module_id) {
3208 index_module = i;
3209 break;
3210 }
3211 }
3212
3213 for (i = 0; i < total_type; i++) {
3214 if (type_id == hclge_hw_type_id_st[i].type_id) {
3215 index_type = i;
3216 cause_by_vf = hclge_hw_type_id_st[i].cause_by_vf;
3217 break;
3218 }
3219 }
3220
3221 if (index_module != MODULE_NONE && index_type != NONE_ERROR)
3222 dev_err(dev,
3223 "found %s %s, is %s error.\n",
3224 hclge_hw_module_id_st[index_module].msg,
3225 hclge_hw_type_id_st[index_type].msg,
3226 is_ras ? "ras" : "msix");
3227 else
3228 dev_err(dev,
3229 "unknown module[%u] or type[%u].\n", mod_id, type_id);
3230
3231 dev_err(dev, "reg_value:\n");
3232 for (i = 0; i < type_reg_info->reg_num; i++)
3233 dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]);
3234
3235 if (hclge_hw_module_id_st[index_module].query_reg_info)
3236 hclge_hw_module_id_st[index_module].query_reg_info(hdev);
3237
3238 return cause_by_vf;
3239 }
3240
hclge_handle_error_module_log(struct hnae3_ae_dev * ae_dev,const u32 * buf,u32 buf_size)3241 static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev,
3242 const u32 *buf, u32 buf_size)
3243 {
3244 struct hclge_type_reg_err_info *type_reg_info;
3245 struct hclge_dev *hdev = ae_dev->priv;
3246 struct device *dev = &hdev->pdev->dev;
3247 struct hclge_mod_err_info *mod_info;
3248 struct hclge_sum_err_info *sum_info;
3249 bool cause_by_vf = false;
3250 u8 mod_num, err_num, i;
3251 u32 offset = 0;
3252
3253 sum_info = (struct hclge_sum_err_info *)&buf[offset++];
3254 if (sum_info->reset_type &&
3255 sum_info->reset_type != HNAE3_NONE_RESET)
3256 set_bit(sum_info->reset_type, &ae_dev->hw_err_reset_req);
3257 mod_num = sum_info->mod_num;
3258
3259 while (mod_num--) {
3260 if (offset >= buf_size) {
3261 dev_err(dev, "The offset(%u) exceeds buf's size(%u).\n",
3262 offset, buf_size);
3263 return;
3264 }
3265 mod_info = (struct hclge_mod_err_info *)&buf[offset++];
3266 err_num = mod_info->err_num;
3267
3268 for (i = 0; i < err_num; i++) {
3269 if (offset >= buf_size) {
3270 dev_err(dev,
3271 "The offset(%u) exceeds buf size(%u).\n",
3272 offset, buf_size);
3273 return;
3274 }
3275
3276 type_reg_info = (struct hclge_type_reg_err_info *)
3277 &buf[offset++];
3278 if (hclge_handle_error_type_reg_log(hdev, mod_info,
3279 type_reg_info))
3280 cause_by_vf = true;
3281
3282 offset += type_reg_info->reg_num;
3283 }
3284 }
3285
3286 if (hnae3_ae_dev_vf_fault_supported(hdev->ae_dev) && cause_by_vf)
3287 set_bit(HNAE3_VF_EXP_RESET, &ae_dev->hw_err_reset_req);
3288 }
3289
hclge_query_all_err_bd_num(struct hclge_dev * hdev,u32 * bd_num)3290 static int hclge_query_all_err_bd_num(struct hclge_dev *hdev, u32 *bd_num)
3291 {
3292 struct device *dev = &hdev->pdev->dev;
3293 struct hclge_desc desc_bd;
3294 int ret;
3295
3296 hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_ALL_ERR_BD_NUM, true);
3297 ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
3298 if (ret) {
3299 dev_err(dev, "failed to query error bd_num, ret = %d.\n", ret);
3300 return ret;
3301 }
3302
3303 *bd_num = le32_to_cpu(desc_bd.data[0]);
3304 if (!(*bd_num)) {
3305 dev_err(dev, "The value of bd_num is 0!\n");
3306 return -EINVAL;
3307 }
3308
3309 return 0;
3310 }
3311
hclge_query_all_err_info(struct hclge_dev * hdev,struct hclge_desc * desc,u32 bd_num)3312 static int hclge_query_all_err_info(struct hclge_dev *hdev,
3313 struct hclge_desc *desc, u32 bd_num)
3314 {
3315 struct device *dev = &hdev->pdev->dev;
3316 int ret;
3317
3318 hclge_cmd_setup_basic_desc(desc, HCLGE_QUERY_ALL_ERR_INFO, true);
3319 ret = hclge_cmd_send(&hdev->hw, desc, bd_num);
3320 if (ret)
3321 dev_err(dev, "failed to query error info, ret = %d.\n", ret);
3322
3323 return ret;
3324 }
3325
hclge_handle_error_info_log(struct hnae3_ae_dev * ae_dev)3326 int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev)
3327 {
3328 u32 bd_num, desc_len, buf_len, buf_size, i;
3329 struct hclge_dev *hdev = ae_dev->priv;
3330 struct hclge_desc *desc;
3331 __le32 *desc_data;
3332 u32 *buf;
3333 int ret;
3334
3335 ret = hclge_query_all_err_bd_num(hdev, &bd_num);
3336 if (ret)
3337 goto out;
3338
3339 desc_len = bd_num * sizeof(struct hclge_desc);
3340 desc = kzalloc(desc_len, GFP_KERNEL);
3341 if (!desc) {
3342 ret = -ENOMEM;
3343 goto out;
3344 }
3345
3346 ret = hclge_query_all_err_info(hdev, desc, bd_num);
3347 if (ret)
3348 goto err_desc;
3349
3350 buf_len = bd_num * sizeof(struct hclge_desc) - HCLGE_DESC_NO_DATA_LEN;
3351 buf_size = buf_len / sizeof(u32);
3352
3353 desc_data = kzalloc(buf_len, GFP_KERNEL);
3354 if (!desc_data) {
3355 ret = -ENOMEM;
3356 goto err_desc;
3357 }
3358
3359 buf = kzalloc(buf_len, GFP_KERNEL);
3360 if (!buf) {
3361 ret = -ENOMEM;
3362 goto err_buf_alloc;
3363 }
3364
3365 memcpy(desc_data, &desc[0].data[0], buf_len);
3366 for (i = 0; i < buf_size; i++)
3367 buf[i] = le32_to_cpu(desc_data[i]);
3368
3369 hclge_handle_error_module_log(ae_dev, buf, buf_size);
3370 kfree(buf);
3371
3372 err_buf_alloc:
3373 kfree(desc_data);
3374 err_desc:
3375 kfree(desc);
3376 out:
3377 return ret;
3378 }
3379
hclge_reset_vf_in_bitmap(struct hclge_dev * hdev,unsigned long * bitmap)3380 static bool hclge_reset_vf_in_bitmap(struct hclge_dev *hdev,
3381 unsigned long *bitmap)
3382 {
3383 struct hclge_vport *vport;
3384 bool exist_set = false;
3385 int func_id;
3386 int ret;
3387
3388 func_id = find_first_bit(bitmap, HCLGE_VPORT_NUM);
3389 if (func_id == PF_VPORT_ID)
3390 return false;
3391
3392 while (func_id != HCLGE_VPORT_NUM) {
3393 vport = hclge_get_vf_vport(hdev,
3394 func_id - HCLGE_VF_VPORT_START_NUM);
3395 if (!vport) {
3396 dev_err(&hdev->pdev->dev, "invalid func id(%d)\n",
3397 func_id);
3398 return false;
3399 }
3400
3401 dev_info(&hdev->pdev->dev, "do function %d recovery.", func_id);
3402
3403 ret = hclge_reset_tqp(&vport->nic);
3404 if (ret) {
3405 dev_err(&hdev->pdev->dev,
3406 "failed to reset tqp, ret = %d.", ret);
3407 return false;
3408 }
3409
3410 ret = hclge_inform_vf_reset(vport, HNAE3_VF_FUNC_RESET);
3411 if (ret) {
3412 dev_err(&hdev->pdev->dev,
3413 "failed to reset func %d, ret = %d.",
3414 func_id, ret);
3415 return false;
3416 }
3417
3418 exist_set = true;
3419 clear_bit(func_id, bitmap);
3420 func_id = find_first_bit(bitmap, HCLGE_VPORT_NUM);
3421 }
3422
3423 return exist_set;
3424 }
3425
hclge_get_vf_fault_bitmap(struct hclge_desc * desc,unsigned long * bitmap)3426 static void hclge_get_vf_fault_bitmap(struct hclge_desc *desc,
3427 unsigned long *bitmap)
3428 {
3429 #define HCLGE_FIR_FAULT_BYTES 24
3430 #define HCLGE_SEC_FAULT_BYTES 8
3431
3432 u8 *buff;
3433
3434 BUILD_BUG_ON(HCLGE_FIR_FAULT_BYTES + HCLGE_SEC_FAULT_BYTES !=
3435 BITS_TO_BYTES(HCLGE_VPORT_NUM));
3436
3437 memcpy(bitmap, desc[0].data, HCLGE_FIR_FAULT_BYTES);
3438 buff = (u8 *)bitmap + HCLGE_FIR_FAULT_BYTES;
3439 memcpy(buff, desc[1].data, HCLGE_SEC_FAULT_BYTES);
3440 }
3441
hclge_handle_vf_queue_err_ras(struct hclge_dev * hdev)3442 int hclge_handle_vf_queue_err_ras(struct hclge_dev *hdev)
3443 {
3444 unsigned long vf_fault_bitmap[BITS_TO_LONGS(HCLGE_VPORT_NUM)];
3445 struct hclge_desc desc[2];
3446 bool cause_by_vf = false;
3447 int ret;
3448
3449 if (!test_and_clear_bit(HNAE3_VF_EXP_RESET,
3450 &hdev->ae_dev->hw_err_reset_req) ||
3451 !hnae3_ae_dev_vf_fault_supported(hdev->ae_dev))
3452 return 0;
3453
3454 hclge_comm_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_GET_QUEUE_ERR_VF,
3455 true);
3456 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
3457 hclge_comm_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_GET_QUEUE_ERR_VF,
3458 true);
3459
3460 ret = hclge_comm_cmd_send(&hdev->hw.hw, desc, 2);
3461 if (ret) {
3462 dev_err(&hdev->pdev->dev,
3463 "failed to get vf bitmap, ret = %d.\n", ret);
3464 return ret;
3465 }
3466 hclge_get_vf_fault_bitmap(desc, vf_fault_bitmap);
3467
3468 cause_by_vf = hclge_reset_vf_in_bitmap(hdev, vf_fault_bitmap);
3469 if (cause_by_vf)
3470 hdev->ae_dev->hw_err_reset_req = 0;
3471
3472 return 0;
3473 }
3474