1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /* Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6
7 #include "erdma.h"
8
arm_cmdq_cq(struct erdma_cmdq * cmdq)9 static void arm_cmdq_cq(struct erdma_cmdq *cmdq)
10 {
11 struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
12 u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) |
13 FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
14 FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) |
15 FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn);
16
17 *cmdq->cq.dbrec = db_data;
18 writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG);
19
20 atomic64_inc(&cmdq->cq.armed_num);
21 }
22
kick_cmdq_db(struct erdma_cmdq * cmdq)23 static void kick_cmdq_db(struct erdma_cmdq *cmdq)
24 {
25 struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
26 u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi);
27
28 *cmdq->sq.dbrec = db_data;
29 writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG);
30 }
31
get_comp_wait(struct erdma_cmdq * cmdq)32 static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq)
33 {
34 int comp_idx;
35
36 spin_lock(&cmdq->lock);
37 comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap,
38 cmdq->max_outstandings);
39 if (comp_idx == cmdq->max_outstandings) {
40 spin_unlock(&cmdq->lock);
41 return ERR_PTR(-ENOMEM);
42 }
43
44 __set_bit(comp_idx, cmdq->comp_wait_bitmap);
45 spin_unlock(&cmdq->lock);
46
47 return &cmdq->wait_pool[comp_idx];
48 }
49
put_comp_wait(struct erdma_cmdq * cmdq,struct erdma_comp_wait * comp_wait)50 static void put_comp_wait(struct erdma_cmdq *cmdq,
51 struct erdma_comp_wait *comp_wait)
52 {
53 int used;
54
55 cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT;
56 spin_lock(&cmdq->lock);
57 used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap);
58 spin_unlock(&cmdq->lock);
59
60 WARN_ON(!used);
61 }
62
erdma_cmdq_wait_res_init(struct erdma_dev * dev,struct erdma_cmdq * cmdq)63 static int erdma_cmdq_wait_res_init(struct erdma_dev *dev,
64 struct erdma_cmdq *cmdq)
65 {
66 int i;
67
68 cmdq->wait_pool =
69 devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings,
70 sizeof(struct erdma_comp_wait), GFP_KERNEL);
71 if (!cmdq->wait_pool)
72 return -ENOMEM;
73
74 spin_lock_init(&cmdq->lock);
75 cmdq->comp_wait_bitmap = devm_bitmap_zalloc(
76 &dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL);
77 if (!cmdq->comp_wait_bitmap)
78 return -ENOMEM;
79
80 for (i = 0; i < cmdq->max_outstandings; i++) {
81 init_completion(&cmdq->wait_pool[i].wait_event);
82 cmdq->wait_pool[i].ctx_id = i;
83 }
84
85 return 0;
86 }
87
erdma_cmdq_sq_init(struct erdma_dev * dev)88 static int erdma_cmdq_sq_init(struct erdma_dev *dev)
89 {
90 struct erdma_cmdq *cmdq = &dev->cmdq;
91 struct erdma_cmdq_sq *sq = &cmdq->sq;
92
93 sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE);
94 sq->depth = cmdq->max_outstandings * sq->wqebb_cnt;
95
96 sq->qbuf = dma_alloc_coherent(&dev->pdev->dev, sq->depth << SQEBB_SHIFT,
97 &sq->qbuf_dma_addr, GFP_KERNEL);
98 if (!sq->qbuf)
99 return -ENOMEM;
100
101 sq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &sq->dbrec_dma);
102 if (!sq->dbrec)
103 goto err_out;
104
105 spin_lock_init(&sq->lock);
106
107 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG,
108 upper_32_bits(sq->qbuf_dma_addr));
109 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG,
110 lower_32_bits(sq->qbuf_dma_addr));
111 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth);
112 erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG, sq->dbrec_dma);
113
114 return 0;
115
116 err_out:
117 dma_free_coherent(&dev->pdev->dev, sq->depth << SQEBB_SHIFT,
118 sq->qbuf, sq->qbuf_dma_addr);
119
120 return -ENOMEM;
121 }
122
erdma_cmdq_cq_init(struct erdma_dev * dev)123 static int erdma_cmdq_cq_init(struct erdma_dev *dev)
124 {
125 struct erdma_cmdq *cmdq = &dev->cmdq;
126 struct erdma_cmdq_cq *cq = &cmdq->cq;
127
128 cq->depth = cmdq->sq.depth;
129 cq->qbuf = dma_alloc_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
130 &cq->qbuf_dma_addr, GFP_KERNEL);
131 if (!cq->qbuf)
132 return -ENOMEM;
133
134 spin_lock_init(&cq->lock);
135
136 cq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &cq->dbrec_dma);
137 if (!cq->dbrec)
138 goto err_out;
139
140 atomic64_set(&cq->armed_num, 0);
141
142 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG,
143 upper_32_bits(cq->qbuf_dma_addr));
144 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG,
145 lower_32_bits(cq->qbuf_dma_addr));
146 erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG, cq->dbrec_dma);
147
148 return 0;
149
150 err_out:
151 dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT, cq->qbuf,
152 cq->qbuf_dma_addr);
153
154 return -ENOMEM;
155 }
156
erdma_cmdq_eq_init(struct erdma_dev * dev)157 static int erdma_cmdq_eq_init(struct erdma_dev *dev)
158 {
159 struct erdma_cmdq *cmdq = &dev->cmdq;
160 struct erdma_eq *eq = &cmdq->eq;
161 int ret;
162
163 ret = erdma_eq_common_init(dev, eq, cmdq->max_outstandings);
164 if (ret)
165 return ret;
166
167 eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG;
168
169 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG,
170 upper_32_bits(eq->qbuf_dma_addr));
171 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG,
172 lower_32_bits(eq->qbuf_dma_addr));
173 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth);
174 erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG, eq->dbrec_dma);
175
176 return 0;
177 }
178
erdma_cmdq_init(struct erdma_dev * dev)179 int erdma_cmdq_init(struct erdma_dev *dev)
180 {
181 struct erdma_cmdq *cmdq = &dev->cmdq;
182 int err;
183
184 cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING;
185 cmdq->use_event = false;
186
187 sema_init(&cmdq->credits, cmdq->max_outstandings);
188
189 err = erdma_cmdq_wait_res_init(dev, cmdq);
190 if (err)
191 return err;
192
193 err = erdma_cmdq_sq_init(dev);
194 if (err)
195 return err;
196
197 err = erdma_cmdq_cq_init(dev);
198 if (err)
199 goto err_destroy_sq;
200
201 err = erdma_cmdq_eq_init(dev);
202 if (err)
203 goto err_destroy_cq;
204
205 set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
206
207 return 0;
208
209 err_destroy_cq:
210 dma_free_coherent(&dev->pdev->dev, cmdq->cq.depth << CQE_SHIFT,
211 cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
212
213 dma_pool_free(dev->db_pool, cmdq->cq.dbrec, cmdq->cq.dbrec_dma);
214
215 err_destroy_sq:
216 dma_free_coherent(&dev->pdev->dev, cmdq->sq.depth << SQEBB_SHIFT,
217 cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
218
219 dma_pool_free(dev->db_pool, cmdq->sq.dbrec, cmdq->sq.dbrec_dma);
220
221 return err;
222 }
223
erdma_finish_cmdq_init(struct erdma_dev * dev)224 void erdma_finish_cmdq_init(struct erdma_dev *dev)
225 {
226 /* after device init successfully, change cmdq to event mode. */
227 dev->cmdq.use_event = true;
228 arm_cmdq_cq(&dev->cmdq);
229 }
230
erdma_cmdq_destroy(struct erdma_dev * dev)231 void erdma_cmdq_destroy(struct erdma_dev *dev)
232 {
233 struct erdma_cmdq *cmdq = &dev->cmdq;
234
235 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
236
237 erdma_eq_destroy(dev, &cmdq->eq);
238
239 dma_free_coherent(&dev->pdev->dev, cmdq->sq.depth << SQEBB_SHIFT,
240 cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
241
242 dma_pool_free(dev->db_pool, cmdq->sq.dbrec, cmdq->sq.dbrec_dma);
243
244 dma_free_coherent(&dev->pdev->dev, cmdq->cq.depth << CQE_SHIFT,
245 cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
246
247 dma_pool_free(dev->db_pool, cmdq->cq.dbrec, cmdq->cq.dbrec_dma);
248 }
249
get_next_valid_cmdq_cqe(struct erdma_cmdq * cmdq)250 static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq)
251 {
252 __be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci,
253 cmdq->cq.depth, CQE_SHIFT);
254 u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
255 be32_to_cpu(READ_ONCE(*cqe)));
256
257 return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL;
258 }
259
push_cmdq_sqe(struct erdma_cmdq * cmdq,u64 * req,size_t req_len,struct erdma_comp_wait * comp_wait)260 static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len,
261 struct erdma_comp_wait *comp_wait)
262 {
263 __le64 *wqe;
264 u64 hdr = *req;
265
266 comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED;
267 reinit_completion(&comp_wait->wait_event);
268 comp_wait->sq_pi = cmdq->sq.pi;
269
270 wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth,
271 SQEBB_SHIFT);
272 memcpy(wqe, req, req_len);
273
274 cmdq->sq.pi += cmdq->sq.wqebb_cnt;
275 hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) |
276 FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK,
277 comp_wait->ctx_id) |
278 FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1);
279 *wqe = cpu_to_le64(hdr);
280
281 kick_cmdq_db(cmdq);
282 }
283
erdma_poll_single_cmd_completion(struct erdma_cmdq * cmdq)284 static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq)
285 {
286 struct erdma_comp_wait *comp_wait;
287 u32 hdr0, sqe_idx;
288 __be32 *cqe;
289 u16 ctx_id;
290 u64 *sqe;
291
292 cqe = get_next_valid_cmdq_cqe(cmdq);
293 if (!cqe)
294 return -EAGAIN;
295
296 cmdq->cq.ci++;
297
298 dma_rmb();
299 hdr0 = be32_to_cpu(*cqe);
300 sqe_idx = be32_to_cpu(*(cqe + 1));
301
302 sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth,
303 SQEBB_SHIFT);
304 ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe);
305 comp_wait = &cmdq->wait_pool[ctx_id];
306 if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED)
307 return -EIO;
308
309 comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED;
310 comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0);
311 cmdq->sq.ci += cmdq->sq.wqebb_cnt;
312 /* Copy 16B comp data after cqe hdr to outer */
313 be32_to_cpu_array(comp_wait->comp_data, cqe + 2, 4);
314
315 if (cmdq->use_event)
316 complete(&comp_wait->wait_event);
317
318 return 0;
319 }
320
erdma_polling_cmd_completions(struct erdma_cmdq * cmdq)321 static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq)
322 {
323 unsigned long flags;
324 u16 comp_num;
325
326 spin_lock_irqsave(&cmdq->cq.lock, flags);
327
328 /* We must have less than # of max_outstandings
329 * completions at one time.
330 */
331 for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++)
332 if (erdma_poll_single_cmd_completion(cmdq))
333 break;
334
335 if (comp_num && cmdq->use_event)
336 arm_cmdq_cq(cmdq);
337
338 spin_unlock_irqrestore(&cmdq->cq.lock, flags);
339 }
340
erdma_cmdq_completion_handler(struct erdma_cmdq * cmdq)341 void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
342 {
343 int got_event = 0;
344
345 if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) ||
346 !cmdq->use_event)
347 return;
348
349 while (get_next_valid_eqe(&cmdq->eq)) {
350 cmdq->eq.ci++;
351 got_event++;
352 }
353
354 if (got_event) {
355 cmdq->cq.cmdsn++;
356 erdma_polling_cmd_completions(cmdq);
357 }
358
359 notify_eq(&cmdq->eq);
360 }
361
erdma_poll_cmd_completion(struct erdma_comp_wait * comp_ctx,struct erdma_cmdq * cmdq,u32 timeout)362 static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx,
363 struct erdma_cmdq *cmdq, u32 timeout)
364 {
365 unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout);
366
367 while (1) {
368 erdma_polling_cmd_completions(cmdq);
369 if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED)
370 break;
371
372 if (time_is_before_jiffies(comp_timeout))
373 return -ETIME;
374
375 msleep(20);
376 }
377
378 return 0;
379 }
380
erdma_wait_cmd_completion(struct erdma_comp_wait * comp_ctx,struct erdma_cmdq * cmdq,u32 timeout)381 static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx,
382 struct erdma_cmdq *cmdq, u32 timeout)
383 {
384 unsigned long flags = 0;
385
386 wait_for_completion_timeout(&comp_ctx->wait_event,
387 msecs_to_jiffies(timeout));
388
389 if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) {
390 spin_lock_irqsave(&cmdq->cq.lock, flags);
391 comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT;
392 spin_unlock_irqrestore(&cmdq->cq.lock, flags);
393 return -ETIME;
394 }
395
396 return 0;
397 }
398
erdma_cmdq_build_reqhdr(u64 * hdr,u32 mod,u32 op)399 void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op)
400 {
401 *hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) |
402 FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op);
403 }
404
erdma_post_cmd_wait(struct erdma_cmdq * cmdq,void * req,u32 req_size,u64 * resp0,u64 * resp1)405 int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
406 u64 *resp0, u64 *resp1)
407 {
408 struct erdma_comp_wait *comp_wait;
409 int ret;
410
411 if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
412 return -ENODEV;
413
414 down(&cmdq->credits);
415
416 comp_wait = get_comp_wait(cmdq);
417 if (IS_ERR(comp_wait)) {
418 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
419 set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state);
420 up(&cmdq->credits);
421 return PTR_ERR(comp_wait);
422 }
423
424 spin_lock(&cmdq->sq.lock);
425 push_cmdq_sqe(cmdq, req, req_size, comp_wait);
426 spin_unlock(&cmdq->sq.lock);
427
428 if (cmdq->use_event)
429 ret = erdma_wait_cmd_completion(comp_wait, cmdq,
430 ERDMA_CMDQ_TIMEOUT_MS);
431 else
432 ret = erdma_poll_cmd_completion(comp_wait, cmdq,
433 ERDMA_CMDQ_TIMEOUT_MS);
434
435 if (ret) {
436 set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state);
437 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
438 goto out;
439 }
440
441 if (comp_wait->comp_status)
442 ret = -EIO;
443
444 if (resp0 && resp1) {
445 *resp0 = *((u64 *)&comp_wait->comp_data[0]);
446 *resp1 = *((u64 *)&comp_wait->comp_data[2]);
447 }
448 put_comp_wait(cmdq, comp_wait);
449
450 out:
451 up(&cmdq->credits);
452
453 return ret;
454 }
455