1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7 #include "gve.h"
8 #include "gve_dqo.h"
9 #include "gve_adminq.h"
10 #include "gve_utils.h"
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/skbuff.h>
14 #include <linux/slab.h>
15 #include <net/ip6_checksum.h>
16 #include <net/ipv6.h>
17 #include <net/tcp.h>
18
gve_buf_ref_cnt(struct gve_rx_buf_state_dqo * bs)19 static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
20 {
21 return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias;
22 }
23
gve_free_page_dqo(struct gve_priv * priv,struct gve_rx_buf_state_dqo * bs,bool free_page)24 static void gve_free_page_dqo(struct gve_priv *priv,
25 struct gve_rx_buf_state_dqo *bs,
26 bool free_page)
27 {
28 page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1);
29 if (free_page)
30 gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
31 DMA_FROM_DEVICE);
32 bs->page_info.page = NULL;
33 }
34
gve_alloc_buf_state(struct gve_rx_ring * rx)35 static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
36 {
37 struct gve_rx_buf_state_dqo *buf_state;
38 s16 buffer_id;
39
40 buffer_id = rx->dqo.free_buf_states;
41 if (unlikely(buffer_id == -1))
42 return NULL;
43
44 buf_state = &rx->dqo.buf_states[buffer_id];
45
46 /* Remove buf_state from free list */
47 rx->dqo.free_buf_states = buf_state->next;
48
49 /* Point buf_state to itself to mark it as allocated */
50 buf_state->next = buffer_id;
51
52 return buf_state;
53 }
54
gve_buf_state_is_allocated(struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state)55 static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
56 struct gve_rx_buf_state_dqo *buf_state)
57 {
58 s16 buffer_id = buf_state - rx->dqo.buf_states;
59
60 return buf_state->next == buffer_id;
61 }
62
gve_free_buf_state(struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state)63 static void gve_free_buf_state(struct gve_rx_ring *rx,
64 struct gve_rx_buf_state_dqo *buf_state)
65 {
66 s16 buffer_id = buf_state - rx->dqo.buf_states;
67
68 buf_state->next = rx->dqo.free_buf_states;
69 rx->dqo.free_buf_states = buffer_id;
70 }
71
72 static struct gve_rx_buf_state_dqo *
gve_dequeue_buf_state(struct gve_rx_ring * rx,struct gve_index_list * list)73 gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list)
74 {
75 struct gve_rx_buf_state_dqo *buf_state;
76 s16 buffer_id;
77
78 buffer_id = list->head;
79 if (unlikely(buffer_id == -1))
80 return NULL;
81
82 buf_state = &rx->dqo.buf_states[buffer_id];
83
84 /* Remove buf_state from list */
85 list->head = buf_state->next;
86 if (buf_state->next == -1)
87 list->tail = -1;
88
89 /* Point buf_state to itself to mark it as allocated */
90 buf_state->next = buffer_id;
91
92 return buf_state;
93 }
94
gve_enqueue_buf_state(struct gve_rx_ring * rx,struct gve_index_list * list,struct gve_rx_buf_state_dqo * buf_state)95 static void gve_enqueue_buf_state(struct gve_rx_ring *rx,
96 struct gve_index_list *list,
97 struct gve_rx_buf_state_dqo *buf_state)
98 {
99 s16 buffer_id = buf_state - rx->dqo.buf_states;
100
101 buf_state->next = -1;
102
103 if (list->head == -1) {
104 list->head = buffer_id;
105 list->tail = buffer_id;
106 } else {
107 int tail = list->tail;
108
109 rx->dqo.buf_states[tail].next = buffer_id;
110 list->tail = buffer_id;
111 }
112 }
113
114 static struct gve_rx_buf_state_dqo *
gve_get_recycled_buf_state(struct gve_rx_ring * rx)115 gve_get_recycled_buf_state(struct gve_rx_ring *rx)
116 {
117 struct gve_rx_buf_state_dqo *buf_state;
118 int i;
119
120 /* Recycled buf states are immediately usable. */
121 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states);
122 if (likely(buf_state))
123 return buf_state;
124
125 if (unlikely(rx->dqo.used_buf_states.head == -1))
126 return NULL;
127
128 /* Used buf states are only usable when ref count reaches 0, which means
129 * no SKBs refer to them.
130 *
131 * Search a limited number before giving up.
132 */
133 for (i = 0; i < 5; i++) {
134 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
135 if (gve_buf_ref_cnt(buf_state) == 0) {
136 rx->dqo.used_buf_states_cnt--;
137 return buf_state;
138 }
139
140 gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
141 }
142
143 /* For QPL, we cannot allocate any new buffers and must
144 * wait for the existing ones to be available.
145 */
146 if (rx->dqo.qpl)
147 return NULL;
148
149 /* If there are no free buf states discard an entry from
150 * `used_buf_states` so it can be used.
151 */
152 if (unlikely(rx->dqo.free_buf_states == -1)) {
153 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
154 if (gve_buf_ref_cnt(buf_state) == 0)
155 return buf_state;
156
157 gve_free_page_dqo(rx->gve, buf_state, true);
158 gve_free_buf_state(rx, buf_state);
159 }
160
161 return NULL;
162 }
163
gve_alloc_page_dqo(struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state)164 static int gve_alloc_page_dqo(struct gve_rx_ring *rx,
165 struct gve_rx_buf_state_dqo *buf_state)
166 {
167 struct gve_priv *priv = rx->gve;
168 u32 idx;
169
170 if (!rx->dqo.qpl) {
171 int err;
172
173 err = gve_alloc_page(priv, &priv->pdev->dev,
174 &buf_state->page_info.page,
175 &buf_state->addr,
176 DMA_FROM_DEVICE, GFP_ATOMIC);
177 if (err)
178 return err;
179 } else {
180 idx = rx->dqo.next_qpl_page_idx;
181 if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) {
182 net_err_ratelimited("%s: Out of QPL pages\n",
183 priv->dev->name);
184 return -ENOMEM;
185 }
186 buf_state->page_info.page = rx->dqo.qpl->pages[idx];
187 buf_state->addr = rx->dqo.qpl->page_buses[idx];
188 rx->dqo.next_qpl_page_idx++;
189 }
190 buf_state->page_info.page_offset = 0;
191 buf_state->page_info.page_address =
192 page_address(buf_state->page_info.page);
193 buf_state->last_single_ref_offset = 0;
194
195 /* The page already has 1 ref. */
196 page_ref_add(buf_state->page_info.page, INT_MAX - 1);
197 buf_state->page_info.pagecnt_bias = INT_MAX;
198
199 return 0;
200 }
201
gve_rx_free_hdr_bufs(struct gve_priv * priv,struct gve_rx_ring * rx)202 static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
203 {
204 struct device *hdev = &priv->pdev->dev;
205 int buf_count = rx->dqo.bufq.mask + 1;
206
207 if (rx->dqo.hdr_bufs.data) {
208 dma_free_coherent(hdev, priv->header_buf_size * buf_count,
209 rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr);
210 rx->dqo.hdr_bufs.data = NULL;
211 }
212 }
213
gve_rx_init_ring_state_dqo(struct gve_rx_ring * rx,const u32 buffer_queue_slots,const u32 completion_queue_slots)214 static void gve_rx_init_ring_state_dqo(struct gve_rx_ring *rx,
215 const u32 buffer_queue_slots,
216 const u32 completion_queue_slots)
217 {
218 int i;
219
220 /* Set buffer queue state */
221 rx->dqo.bufq.mask = buffer_queue_slots - 1;
222 rx->dqo.bufq.head = 0;
223 rx->dqo.bufq.tail = 0;
224
225 /* Set completion queue state */
226 rx->dqo.complq.num_free_slots = completion_queue_slots;
227 rx->dqo.complq.mask = completion_queue_slots - 1;
228 rx->dqo.complq.cur_gen_bit = 0;
229 rx->dqo.complq.head = 0;
230
231 /* Set RX SKB context */
232 rx->ctx.skb_head = NULL;
233 rx->ctx.skb_tail = NULL;
234
235 /* Set up linked list of buffer IDs */
236 if (rx->dqo.buf_states) {
237 for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
238 rx->dqo.buf_states[i].next = i + 1;
239 rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
240 }
241
242 rx->dqo.free_buf_states = 0;
243 rx->dqo.recycled_buf_states.head = -1;
244 rx->dqo.recycled_buf_states.tail = -1;
245 rx->dqo.used_buf_states.head = -1;
246 rx->dqo.used_buf_states.tail = -1;
247 }
248
gve_rx_reset_ring_dqo(struct gve_priv * priv,int idx)249 static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx)
250 {
251 struct gve_rx_ring *rx = &priv->rx[idx];
252 size_t size;
253 int i;
254
255 const u32 buffer_queue_slots = priv->rx_desc_cnt;
256 const u32 completion_queue_slots = priv->rx_desc_cnt;
257
258 /* Reset buffer queue */
259 if (rx->dqo.bufq.desc_ring) {
260 size = sizeof(rx->dqo.bufq.desc_ring[0]) *
261 buffer_queue_slots;
262 memset(rx->dqo.bufq.desc_ring, 0, size);
263 }
264
265 /* Reset completion queue */
266 if (rx->dqo.complq.desc_ring) {
267 size = sizeof(rx->dqo.complq.desc_ring[0]) *
268 completion_queue_slots;
269 memset(rx->dqo.complq.desc_ring, 0, size);
270 }
271
272 /* Reset q_resources */
273 if (rx->q_resources)
274 memset(rx->q_resources, 0, sizeof(*rx->q_resources));
275
276 /* Reset buf states */
277 if (rx->dqo.buf_states) {
278 for (i = 0; i < rx->dqo.num_buf_states; i++) {
279 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
280
281 if (bs->page_info.page)
282 gve_free_page_dqo(priv, bs, !rx->dqo.qpl);
283 }
284 }
285
286 gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
287 completion_queue_slots);
288 }
289
gve_rx_stop_ring_dqo(struct gve_priv * priv,int idx)290 void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx)
291 {
292 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
293
294 if (!gve_rx_was_added_to_block(priv, idx))
295 return;
296
297 gve_remove_napi(priv, ntfy_idx);
298 gve_rx_remove_from_block(priv, idx);
299 gve_rx_reset_ring_dqo(priv, idx);
300 }
301
gve_rx_free_ring_dqo(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_alloc_rings_cfg * cfg)302 void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
303 struct gve_rx_alloc_rings_cfg *cfg)
304 {
305 struct device *hdev = &priv->pdev->dev;
306 size_t completion_queue_slots;
307 size_t buffer_queue_slots;
308 int idx = rx->q_num;
309 size_t size;
310 u32 qpl_id;
311 int i;
312
313 completion_queue_slots = rx->dqo.complq.mask + 1;
314 buffer_queue_slots = rx->dqo.bufq.mask + 1;
315
316 if (rx->q_resources) {
317 dma_free_coherent(hdev, sizeof(*rx->q_resources),
318 rx->q_resources, rx->q_resources_bus);
319 rx->q_resources = NULL;
320 }
321
322 for (i = 0; i < rx->dqo.num_buf_states; i++) {
323 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
324 /* Only free page for RDA. QPL pages are freed in gve_main. */
325 if (bs->page_info.page)
326 gve_free_page_dqo(priv, bs, !rx->dqo.qpl);
327 }
328
329 if (rx->dqo.qpl) {
330 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
331 gve_free_queue_page_list(priv, rx->dqo.qpl, qpl_id);
332 rx->dqo.qpl = NULL;
333 }
334
335 if (rx->dqo.bufq.desc_ring) {
336 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
337 dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring,
338 rx->dqo.bufq.bus);
339 rx->dqo.bufq.desc_ring = NULL;
340 }
341
342 if (rx->dqo.complq.desc_ring) {
343 size = sizeof(rx->dqo.complq.desc_ring[0]) *
344 completion_queue_slots;
345 dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring,
346 rx->dqo.complq.bus);
347 rx->dqo.complq.desc_ring = NULL;
348 }
349
350 kvfree(rx->dqo.buf_states);
351 rx->dqo.buf_states = NULL;
352
353 gve_rx_free_hdr_bufs(priv, rx);
354
355 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
356 }
357
gve_rx_alloc_hdr_bufs(struct gve_priv * priv,struct gve_rx_ring * rx,const u32 buf_count)358 static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx,
359 const u32 buf_count)
360 {
361 struct device *hdev = &priv->pdev->dev;
362
363 rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count,
364 &rx->dqo.hdr_bufs.addr, GFP_KERNEL);
365 if (!rx->dqo.hdr_bufs.data)
366 return -ENOMEM;
367
368 return 0;
369 }
370
gve_rx_start_ring_dqo(struct gve_priv * priv,int idx)371 void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx)
372 {
373 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
374
375 gve_rx_add_to_block(priv, idx);
376 gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
377 }
378
gve_rx_alloc_ring_dqo(struct gve_priv * priv,struct gve_rx_alloc_rings_cfg * cfg,struct gve_rx_ring * rx,int idx)379 int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
380 struct gve_rx_alloc_rings_cfg *cfg,
381 struct gve_rx_ring *rx,
382 int idx)
383 {
384 struct device *hdev = &priv->pdev->dev;
385 int qpl_page_cnt;
386 size_t size;
387 u32 qpl_id;
388
389 const u32 buffer_queue_slots = cfg->ring_size;
390 const u32 completion_queue_slots = cfg->ring_size;
391
392 netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
393
394 memset(rx, 0, sizeof(*rx));
395 rx->gve = priv;
396 rx->q_num = idx;
397
398 rx->dqo.num_buf_states = cfg->raw_addressing ?
399 min_t(s16, S16_MAX, buffer_queue_slots * 4) :
400 gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
401 rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
402 sizeof(rx->dqo.buf_states[0]),
403 GFP_KERNEL);
404 if (!rx->dqo.buf_states)
405 return -ENOMEM;
406
407 /* Allocate header buffers for header-split */
408 if (cfg->enable_header_split)
409 if (gve_rx_alloc_hdr_bufs(priv, rx, buffer_queue_slots))
410 goto err;
411
412 /* Allocate RX completion queue */
413 size = sizeof(rx->dqo.complq.desc_ring[0]) *
414 completion_queue_slots;
415 rx->dqo.complq.desc_ring =
416 dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL);
417 if (!rx->dqo.complq.desc_ring)
418 goto err;
419
420 /* Allocate RX buffer queue */
421 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
422 rx->dqo.bufq.desc_ring =
423 dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL);
424 if (!rx->dqo.bufq.desc_ring)
425 goto err;
426
427 if (!cfg->raw_addressing) {
428 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
429 qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
430
431 rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id,
432 qpl_page_cnt);
433 if (!rx->dqo.qpl)
434 goto err;
435 rx->dqo.next_qpl_page_idx = 0;
436 }
437
438 rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources),
439 &rx->q_resources_bus, GFP_KERNEL);
440 if (!rx->q_resources)
441 goto err;
442
443 gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
444 completion_queue_slots);
445
446 return 0;
447
448 err:
449 gve_rx_free_ring_dqo(priv, rx, cfg);
450 return -ENOMEM;
451 }
452
gve_rx_write_doorbell_dqo(const struct gve_priv * priv,int queue_idx)453 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
454 {
455 const struct gve_rx_ring *rx = &priv->rx[queue_idx];
456 u64 index = be32_to_cpu(rx->q_resources->db_index);
457
458 iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
459 }
460
gve_rx_alloc_rings_dqo(struct gve_priv * priv,struct gve_rx_alloc_rings_cfg * cfg)461 int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
462 struct gve_rx_alloc_rings_cfg *cfg)
463 {
464 struct gve_rx_ring *rx;
465 int err;
466 int i;
467
468 rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring),
469 GFP_KERNEL);
470 if (!rx)
471 return -ENOMEM;
472
473 for (i = 0; i < cfg->qcfg->num_queues; i++) {
474 err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i);
475 if (err) {
476 netif_err(priv, drv, priv->dev,
477 "Failed to alloc rx ring=%d: err=%d\n",
478 i, err);
479 goto err;
480 }
481 }
482
483 cfg->rx = rx;
484 return 0;
485
486 err:
487 for (i--; i >= 0; i--)
488 gve_rx_free_ring_dqo(priv, &rx[i], cfg);
489 kvfree(rx);
490 return err;
491 }
492
gve_rx_free_rings_dqo(struct gve_priv * priv,struct gve_rx_alloc_rings_cfg * cfg)493 void gve_rx_free_rings_dqo(struct gve_priv *priv,
494 struct gve_rx_alloc_rings_cfg *cfg)
495 {
496 struct gve_rx_ring *rx = cfg->rx;
497 int i;
498
499 if (!rx)
500 return;
501
502 for (i = 0; i < cfg->qcfg->num_queues; i++)
503 gve_rx_free_ring_dqo(priv, &rx[i], cfg);
504
505 kvfree(rx);
506 cfg->rx = NULL;
507 }
508
gve_rx_post_buffers_dqo(struct gve_rx_ring * rx)509 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
510 {
511 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
512 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
513 struct gve_priv *priv = rx->gve;
514 u32 num_avail_slots;
515 u32 num_full_slots;
516 u32 num_posted = 0;
517
518 num_full_slots = (bufq->tail - bufq->head) & bufq->mask;
519 num_avail_slots = bufq->mask - num_full_slots;
520
521 num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
522 while (num_posted < num_avail_slots) {
523 struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
524 struct gve_rx_buf_state_dqo *buf_state;
525
526 buf_state = gve_get_recycled_buf_state(rx);
527 if (unlikely(!buf_state)) {
528 buf_state = gve_alloc_buf_state(rx);
529 if (unlikely(!buf_state))
530 break;
531
532 if (unlikely(gve_alloc_page_dqo(rx, buf_state))) {
533 u64_stats_update_begin(&rx->statss);
534 rx->rx_buf_alloc_fail++;
535 u64_stats_update_end(&rx->statss);
536 gve_free_buf_state(rx, buf_state);
537 break;
538 }
539 }
540
541 desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
542 desc->buf_addr = cpu_to_le64(buf_state->addr +
543 buf_state->page_info.page_offset);
544 if (rx->dqo.hdr_bufs.data)
545 desc->header_buf_addr =
546 cpu_to_le64(rx->dqo.hdr_bufs.addr +
547 priv->header_buf_size * bufq->tail);
548
549 bufq->tail = (bufq->tail + 1) & bufq->mask;
550 complq->num_free_slots--;
551 num_posted++;
552
553 if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0)
554 gve_rx_write_doorbell_dqo(priv, rx->q_num);
555 }
556
557 rx->fill_cnt += num_posted;
558 }
559
gve_try_recycle_buf(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state)560 static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
561 struct gve_rx_buf_state_dqo *buf_state)
562 {
563 const u16 data_buffer_size = priv->data_buffer_size_dqo;
564 int pagecount;
565
566 /* Can't reuse if we only fit one buffer per page */
567 if (data_buffer_size * 2 > PAGE_SIZE)
568 goto mark_used;
569
570 pagecount = gve_buf_ref_cnt(buf_state);
571
572 /* Record the offset when we have a single remaining reference.
573 *
574 * When this happens, we know all of the other offsets of the page are
575 * usable.
576 */
577 if (pagecount == 1) {
578 buf_state->last_single_ref_offset =
579 buf_state->page_info.page_offset;
580 }
581
582 /* Use the next buffer sized chunk in the page. */
583 buf_state->page_info.page_offset += data_buffer_size;
584 buf_state->page_info.page_offset &= (PAGE_SIZE - 1);
585
586 /* If we wrap around to the same offset without ever dropping to 1
587 * reference, then we don't know if this offset was ever freed.
588 */
589 if (buf_state->page_info.page_offset ==
590 buf_state->last_single_ref_offset) {
591 goto mark_used;
592 }
593
594 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
595 return;
596
597 mark_used:
598 gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
599 rx->dqo.used_buf_states_cnt++;
600 }
601
gve_rx_skb_csum(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * desc,struct gve_ptype ptype)602 static void gve_rx_skb_csum(struct sk_buff *skb,
603 const struct gve_rx_compl_desc_dqo *desc,
604 struct gve_ptype ptype)
605 {
606 skb->ip_summed = CHECKSUM_NONE;
607
608 /* HW did not identify and process L3 and L4 headers. */
609 if (unlikely(!desc->l3_l4_processed))
610 return;
611
612 if (ptype.l3_type == GVE_L3_TYPE_IPV4) {
613 if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err))
614 return;
615 } else if (ptype.l3_type == GVE_L3_TYPE_IPV6) {
616 /* Checksum should be skipped if this flag is set. */
617 if (unlikely(desc->ipv6_ex_add))
618 return;
619 }
620
621 if (unlikely(desc->csum_l4_err))
622 return;
623
624 switch (ptype.l4_type) {
625 case GVE_L4_TYPE_TCP:
626 case GVE_L4_TYPE_UDP:
627 case GVE_L4_TYPE_ICMP:
628 case GVE_L4_TYPE_SCTP:
629 skb->ip_summed = CHECKSUM_UNNECESSARY;
630 break;
631 default:
632 break;
633 }
634 }
635
gve_rx_skb_hash(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * compl_desc,struct gve_ptype ptype)636 static void gve_rx_skb_hash(struct sk_buff *skb,
637 const struct gve_rx_compl_desc_dqo *compl_desc,
638 struct gve_ptype ptype)
639 {
640 enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2;
641
642 if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN)
643 hash_type = PKT_HASH_TYPE_L4;
644 else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN)
645 hash_type = PKT_HASH_TYPE_L3;
646
647 skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
648 }
649
gve_rx_free_skb(struct napi_struct * napi,struct gve_rx_ring * rx)650 static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx)
651 {
652 if (!rx->ctx.skb_head)
653 return;
654
655 if (rx->ctx.skb_head == napi->skb)
656 napi->skb = NULL;
657 dev_kfree_skb_any(rx->ctx.skb_head);
658 rx->ctx.skb_head = NULL;
659 rx->ctx.skb_tail = NULL;
660 }
661
gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring * rx)662 static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx)
663 {
664 if (!rx->dqo.qpl)
665 return false;
666 if (rx->dqo.used_buf_states_cnt <
667 (rx->dqo.num_buf_states -
668 GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD))
669 return false;
670 return true;
671 }
672
gve_rx_copy_ondemand(struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state,u16 buf_len)673 static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
674 struct gve_rx_buf_state_dqo *buf_state,
675 u16 buf_len)
676 {
677 struct page *page = alloc_page(GFP_ATOMIC);
678 int num_frags;
679
680 if (!page)
681 return -ENOMEM;
682
683 memcpy(page_address(page),
684 buf_state->page_info.page_address +
685 buf_state->page_info.page_offset,
686 buf_len);
687 num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
688 skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page,
689 0, buf_len, PAGE_SIZE);
690
691 u64_stats_update_begin(&rx->statss);
692 rx->rx_frag_alloc_cnt++;
693 u64_stats_update_end(&rx->statss);
694 /* Return unused buffer. */
695 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
696 return 0;
697 }
698
699 /* Chains multi skbs for single rx packet.
700 * Returns 0 if buffer is appended, -1 otherwise.
701 */
gve_rx_append_frags(struct napi_struct * napi,struct gve_rx_buf_state_dqo * buf_state,u16 buf_len,struct gve_rx_ring * rx,struct gve_priv * priv)702 static int gve_rx_append_frags(struct napi_struct *napi,
703 struct gve_rx_buf_state_dqo *buf_state,
704 u16 buf_len, struct gve_rx_ring *rx,
705 struct gve_priv *priv)
706 {
707 int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
708
709 if (unlikely(num_frags == MAX_SKB_FRAGS)) {
710 struct sk_buff *skb;
711
712 skb = napi_alloc_skb(napi, 0);
713 if (!skb)
714 return -1;
715
716 if (rx->ctx.skb_tail == rx->ctx.skb_head)
717 skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
718 else
719 rx->ctx.skb_tail->next = skb;
720 rx->ctx.skb_tail = skb;
721 num_frags = 0;
722 }
723 if (rx->ctx.skb_tail != rx->ctx.skb_head) {
724 rx->ctx.skb_head->len += buf_len;
725 rx->ctx.skb_head->data_len += buf_len;
726 rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo;
727 }
728
729 /* Trigger ondemand page allocation if we are running low on buffers */
730 if (gve_rx_should_trigger_copy_ondemand(rx))
731 return gve_rx_copy_ondemand(rx, buf_state, buf_len);
732
733 skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
734 buf_state->page_info.page,
735 buf_state->page_info.page_offset,
736 buf_len, priv->data_buffer_size_dqo);
737 gve_dec_pagecnt_bias(&buf_state->page_info);
738
739 /* Advances buffer page-offset if page is partially used.
740 * Marks buffer as used if page is full.
741 */
742 gve_try_recycle_buf(priv, rx, buf_state);
743 return 0;
744 }
745
746 /* Returns 0 if descriptor is completed successfully.
747 * Returns -EINVAL if descriptor is invalid.
748 * Returns -ENOMEM if data cannot be copied to skb.
749 */
gve_rx_dqo(struct napi_struct * napi,struct gve_rx_ring * rx,const struct gve_rx_compl_desc_dqo * compl_desc,u32 desc_idx,int queue_idx)750 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
751 const struct gve_rx_compl_desc_dqo *compl_desc,
752 u32 desc_idx, int queue_idx)
753 {
754 const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
755 const bool hbo = compl_desc->header_buffer_overflow;
756 const bool eop = compl_desc->end_of_packet != 0;
757 const bool hsplit = compl_desc->split_header;
758 struct gve_rx_buf_state_dqo *buf_state;
759 struct gve_priv *priv = rx->gve;
760 u16 buf_len;
761 u16 hdr_len;
762
763 if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
764 net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
765 priv->dev->name, buffer_id);
766 return -EINVAL;
767 }
768 buf_state = &rx->dqo.buf_states[buffer_id];
769 if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) {
770 net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n",
771 priv->dev->name, buffer_id);
772 return -EINVAL;
773 }
774
775 if (unlikely(compl_desc->rx_error)) {
776 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
777 buf_state);
778 return -EINVAL;
779 }
780
781 buf_len = compl_desc->packet_len;
782 hdr_len = compl_desc->header_len;
783
784 /* Page might have not been used for awhile and was likely last written
785 * by a different thread.
786 */
787 prefetch(buf_state->page_info.page);
788
789 /* Copy the header into the skb in the case of header split */
790 if (hsplit) {
791 int unsplit = 0;
792
793 if (hdr_len && !hbo) {
794 rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi,
795 rx->dqo.hdr_bufs.data +
796 desc_idx * priv->header_buf_size,
797 hdr_len);
798 if (unlikely(!rx->ctx.skb_head))
799 goto error;
800 rx->ctx.skb_tail = rx->ctx.skb_head;
801 } else {
802 unsplit = 1;
803 }
804 u64_stats_update_begin(&rx->statss);
805 rx->rx_hsplit_pkt++;
806 rx->rx_hsplit_unsplit_pkt += unsplit;
807 rx->rx_hsplit_bytes += hdr_len;
808 u64_stats_update_end(&rx->statss);
809 }
810
811 /* Sync the portion of dma buffer for CPU to read. */
812 dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
813 buf_state->page_info.page_offset,
814 buf_len, DMA_FROM_DEVICE);
815
816 /* Append to current skb if one exists. */
817 if (rx->ctx.skb_head) {
818 if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx,
819 priv)) != 0) {
820 goto error;
821 }
822 return 0;
823 }
824
825 if (eop && buf_len <= priv->rx_copybreak) {
826 rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
827 &buf_state->page_info, buf_len);
828 if (unlikely(!rx->ctx.skb_head))
829 goto error;
830 rx->ctx.skb_tail = rx->ctx.skb_head;
831
832 u64_stats_update_begin(&rx->statss);
833 rx->rx_copied_pkt++;
834 rx->rx_copybreak_pkt++;
835 u64_stats_update_end(&rx->statss);
836
837 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
838 buf_state);
839 return 0;
840 }
841
842 rx->ctx.skb_head = napi_get_frags(napi);
843 if (unlikely(!rx->ctx.skb_head))
844 goto error;
845 rx->ctx.skb_tail = rx->ctx.skb_head;
846
847 if (gve_rx_should_trigger_copy_ondemand(rx)) {
848 if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0)
849 goto error;
850 return 0;
851 }
852
853 skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
854 buf_state->page_info.page_offset, buf_len,
855 priv->data_buffer_size_dqo);
856 gve_dec_pagecnt_bias(&buf_state->page_info);
857
858 gve_try_recycle_buf(priv, rx, buf_state);
859 return 0;
860
861 error:
862 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
863 return -ENOMEM;
864 }
865
gve_rx_complete_rsc(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * desc,struct gve_ptype ptype)866 static int gve_rx_complete_rsc(struct sk_buff *skb,
867 const struct gve_rx_compl_desc_dqo *desc,
868 struct gve_ptype ptype)
869 {
870 struct skb_shared_info *shinfo = skb_shinfo(skb);
871
872 /* Only TCP is supported right now. */
873 if (ptype.l4_type != GVE_L4_TYPE_TCP)
874 return -EINVAL;
875
876 switch (ptype.l3_type) {
877 case GVE_L3_TYPE_IPV4:
878 shinfo->gso_type = SKB_GSO_TCPV4;
879 break;
880 case GVE_L3_TYPE_IPV6:
881 shinfo->gso_type = SKB_GSO_TCPV6;
882 break;
883 default:
884 return -EINVAL;
885 }
886
887 shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len);
888 return 0;
889 }
890
891 /* Returns 0 if skb is completed successfully, -1 otherwise. */
gve_rx_complete_skb(struct gve_rx_ring * rx,struct napi_struct * napi,const struct gve_rx_compl_desc_dqo * desc,netdev_features_t feat)892 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
893 const struct gve_rx_compl_desc_dqo *desc,
894 netdev_features_t feat)
895 {
896 struct gve_ptype ptype =
897 rx->gve->ptype_lut_dqo->ptypes[desc->packet_type];
898 int err;
899
900 skb_record_rx_queue(rx->ctx.skb_head, rx->q_num);
901
902 if (feat & NETIF_F_RXHASH)
903 gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype);
904
905 if (feat & NETIF_F_RXCSUM)
906 gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype);
907
908 /* RSC packets must set gso_size otherwise the TCP stack will complain
909 * that packets are larger than MTU.
910 */
911 if (desc->rsc) {
912 err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype);
913 if (err < 0)
914 return err;
915 }
916
917 if (skb_headlen(rx->ctx.skb_head) == 0)
918 napi_gro_frags(napi);
919 else
920 napi_gro_receive(napi, rx->ctx.skb_head);
921
922 return 0;
923 }
924
gve_rx_poll_dqo(struct gve_notify_block * block,int budget)925 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
926 {
927 struct napi_struct *napi = &block->napi;
928 netdev_features_t feat = napi->dev->features;
929
930 struct gve_rx_ring *rx = block->rx;
931 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
932
933 u32 work_done = 0;
934 u64 bytes = 0;
935 int err;
936
937 while (work_done < budget) {
938 struct gve_rx_compl_desc_dqo *compl_desc =
939 &complq->desc_ring[complq->head];
940 u32 pkt_bytes;
941
942 /* No more new packets */
943 if (compl_desc->generation == complq->cur_gen_bit)
944 break;
945
946 /* Prefetch the next two descriptors. */
947 prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]);
948 prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]);
949
950 /* Do not read data until we own the descriptor */
951 dma_rmb();
952
953 err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num);
954 if (err < 0) {
955 gve_rx_free_skb(napi, rx);
956 u64_stats_update_begin(&rx->statss);
957 if (err == -ENOMEM)
958 rx->rx_skb_alloc_fail++;
959 else if (err == -EINVAL)
960 rx->rx_desc_err_dropped_pkt++;
961 u64_stats_update_end(&rx->statss);
962 }
963
964 complq->head = (complq->head + 1) & complq->mask;
965 complq->num_free_slots++;
966
967 /* When the ring wraps, the generation bit is flipped. */
968 complq->cur_gen_bit ^= (complq->head == 0);
969
970 /* Receiving a completion means we have space to post another
971 * buffer on the buffer queue.
972 */
973 {
974 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
975
976 bufq->head = (bufq->head + 1) & bufq->mask;
977 }
978
979 /* Free running counter of completed descriptors */
980 rx->cnt++;
981
982 if (!rx->ctx.skb_head)
983 continue;
984
985 if (!compl_desc->end_of_packet)
986 continue;
987
988 work_done++;
989 pkt_bytes = rx->ctx.skb_head->len;
990 /* The ethernet header (first ETH_HLEN bytes) is snipped off
991 * by eth_type_trans.
992 */
993 if (skb_headlen(rx->ctx.skb_head))
994 pkt_bytes += ETH_HLEN;
995
996 /* gve_rx_complete_skb() will consume skb if successful */
997 if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
998 gve_rx_free_skb(napi, rx);
999 u64_stats_update_begin(&rx->statss);
1000 rx->rx_desc_err_dropped_pkt++;
1001 u64_stats_update_end(&rx->statss);
1002 continue;
1003 }
1004
1005 bytes += pkt_bytes;
1006 rx->ctx.skb_head = NULL;
1007 rx->ctx.skb_tail = NULL;
1008 }
1009
1010 gve_rx_post_buffers_dqo(rx);
1011
1012 u64_stats_update_begin(&rx->statss);
1013 rx->rpackets += work_done;
1014 rx->rbytes += bytes;
1015 u64_stats_update_end(&rx->statss);
1016
1017 return work_done;
1018 }
1019