chelsio/cxgb3/sge.c

2  * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
10  *     Redistribution and use in source and binary forms, with or
14  *      - Redistributions of source code must retain the above
18  *      - Redistributions in binary form must reproduce the above
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38 #include <linux/dma-mapping.h>
58  * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
59  * It must be a divisor of PAGE_SIZE.  If set to 0 FL0 will use sk_buffs
77  * Period of the Tx buffer reclaim timer.  This timer does not need to run
78  * frequently as Tx buffers are usually reclaimed by new Tx packets.
88  * Types of Tx queues in each queue set.  Order here matters, do not change.
109 struct tx_sw_desc {		/* SW state per Tx descriptor */
136  * Holds unmapping information for Tx packets that need deferred unmapping.
137  * This structure lives at skb->head and must be allocated by callers.
145  * Maps a number of flits to the number of Tx descriptors that can hold them.
148  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
150  * HW allows up to 4 descriptors to be combined into a WR.
180  *	refill_rspq - replenish an SGE response queue
182  *	@q: the response queue to replenish
183  *	@credits: how many new responses to make available
186  *	available to HW.
193 		     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));  in refill_rspq()
197  *	need_skb_unmap - does the platform need unmapping of sk_buffs?
212  *	unmap_skb - unmap a packet main body and its page fragments
214  *	@q: the Tx queue containing Tx descriptors for the packet
215  *	@cidx: index of Tx descriptor
220  *	to conserve space for metadata, the information necessary to unmap an
221  *	sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
225  *	the buffers held in the first Tx descriptor here, and we have enough
226  *	information at this point to set the state for the next Tx descriptor.
228  *	Note that it is possible to clean up the first descriptor of a packet
237 	struct tx_sw_desc *d = &q->sdesc[cidx];  in unmap_skb()
238 	int nfrags, frag_idx, curflit, j = d->addr_idx;  in unmap_skb()
240 	sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];  in unmap_skb()
241 	frag_idx = d->fragidx;  in unmap_skb()
244 		dma_unmap_single(&pdev->dev, be64_to_cpu(sgp->addr[0]),  in unmap_skb()
249 	curflit = d->sflit + 1 + j;  in unmap_skb()
250 	nfrags = skb_shinfo(skb)->nr_frags;  in unmap_skb()
253 		dma_unmap_page(&pdev->dev, be64_to_cpu(sgp->addr[j]),  in unmap_skb()
254 			       skb_frag_size(&skb_shinfo(skb)->frags[frag_idx]),  in unmap_skb()
265 	if (frag_idx < nfrags) {   /* SGL continues into next Tx descriptor */  in unmap_skb()
266 		d = cidx + 1 == q->size ? q->sdesc : d + 1;  in unmap_skb()
267 		d->fragidx = frag_idx;  in unmap_skb()
268 		d->addr_idx = j;  in unmap_skb()
269 		d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */  in unmap_skb()
274  *	free_tx_desc - reclaims Tx descriptors and their buffers
276  *	@q: the Tx queue to reclaim descriptors from
277  *	@n: the number of descriptors to reclaim
279  *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
280  *	Tx buffers.  Called with the Tx queue lock held.
286 	struct pci_dev *pdev = adapter->pdev;  in free_tx_desc()
287 	unsigned int cidx = q->cidx;  in free_tx_desc()
290 			       q->cntxt_id >= FW_TUNNEL_SGEEC_START;  in free_tx_desc()
292 	d = &q->sdesc[cidx];  in free_tx_desc()
293 	while (n--) {  in free_tx_desc()
294 		if (d->skb) {	/* an SGL is present */  in free_tx_desc()
296 				unmap_skb(d->skb, q, cidx, pdev);  in free_tx_desc()
297 			if (d->eop) {  in free_tx_desc()
298 				dev_consume_skb_any(d->skb);  in free_tx_desc()
299 				d->skb = NULL;  in free_tx_desc()
303 		if (++cidx == q->size) {  in free_tx_desc()
305 			d = q->sdesc;  in free_tx_desc()
308 	q->cidx = cidx;  in free_tx_desc()
312  *	reclaim_completed_tx - reclaims completed Tx descriptors
314  *	@q: the Tx queue to reclaim completed descriptors from
315  *	@chunk: maximum number of descriptors to reclaim
317  *	Reclaims Tx descriptors that the SGE has indicated it has processed,
318  *	and frees the associated buffers if possible.  Called with the Tx
325 	unsigned int reclaim = q->processed - q->cleaned;  in reclaim_completed_tx()
330 		q->cleaned += reclaim;  in reclaim_completed_tx()
331 		q->in_use -= reclaim;  in reclaim_completed_tx()
333 	return q->processed - q->cleaned;  in reclaim_completed_tx()
337  *	should_restart_tx - are there enough resources to restart a Tx queue?
338  *	@q: the Tx queue
340  *	Checks if there are enough descriptors to restart a suspended Tx queue.
344 	unsigned int r = q->processed - q->cleaned;  in should_restart_tx()
346 	return q->in_use - r < (q->size >> 1);  in should_restart_tx()
352 	if (q->use_pages && d->pg_chunk.page) {  in clear_rx_desc()
353 		(*d->pg_chunk.p_cnt)--;  in clear_rx_desc()
354 		if (!*d->pg_chunk.p_cnt)  in clear_rx_desc()
355 			dma_unmap_page(&pdev->dev, d->pg_chunk.mapping,  in clear_rx_desc()
356 				       q->alloc_size, DMA_FROM_DEVICE);  in clear_rx_desc()
358 		put_page(d->pg_chunk.page);  in clear_rx_desc()
359 		d->pg_chunk.page = NULL;  in clear_rx_desc()
361 		dma_unmap_single(&pdev->dev, dma_unmap_addr(d, dma_addr),  in clear_rx_desc()
362 				 q->buf_size, DMA_FROM_DEVICE);  in clear_rx_desc()
363 		kfree_skb(d->skb);  in clear_rx_desc()
364 		d->skb = NULL;  in clear_rx_desc()
369  *	free_rx_bufs - free the Rx buffers on an SGE free list
371  *	@q: the SGE free list to clean up
373  *	Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
378 	unsigned int cidx = q->cidx;  in free_rx_bufs()
380 	while (q->credits--) {  in free_rx_bufs()
381 		struct rx_sw_desc *d = &q->sdesc[cidx];  in free_rx_bufs()
385 		if (++cidx == q->size)  in free_rx_bufs()
389 	if (q->pg_chunk.page) {  in free_rx_bufs()
390 		__free_pages(q->pg_chunk.page, q->order);  in free_rx_bufs()
391 		q->pg_chunk.page = NULL;  in free_rx_bufs()
396  *	add_one_rx_buf - add a packet buffer to a free-buffer list
399  *	@d: the HW Rx descriptor to write
400  *	@sd: the SW Rx descriptor to write
404  *	Add a buffer of the given length to the supplied HW and SW Rx
413 	mapping = dma_map_single(&pdev->dev, va, len, DMA_FROM_DEVICE);  in add_one_rx_buf()
414 	if (unlikely(dma_mapping_error(&pdev->dev, mapping)))  in add_one_rx_buf()
415 		return -ENOMEM;  in add_one_rx_buf()
419 	d->addr_lo = cpu_to_be32(mapping);  in add_one_rx_buf()
420 	d->addr_hi = cpu_to_be32((u64) mapping >> 32);  in add_one_rx_buf()
422 	d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));  in add_one_rx_buf()
423 	d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));  in add_one_rx_buf()
430 	d->addr_lo = cpu_to_be32(mapping);  in add_one_rx_chunk()
431 	d->addr_hi = cpu_to_be32((u64) mapping >> 32);  in add_one_rx_chunk()
433 	d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));  in add_one_rx_chunk()
434 	d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));  in add_one_rx_chunk()
442 	if (!q->pg_chunk.page) {  in alloc_pg_chunk()
445 		q->pg_chunk.page = alloc_pages(gfp, order);  in alloc_pg_chunk()
446 		if (unlikely(!q->pg_chunk.page))  in alloc_pg_chunk()
447 			return -ENOMEM;  in alloc_pg_chunk()
448 		q->pg_chunk.va = page_address(q->pg_chunk.page);  in alloc_pg_chunk()
449 		q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) -  in alloc_pg_chunk()
451 		q->pg_chunk.offset = 0;  in alloc_pg_chunk()
452 		mapping = dma_map_page(&adapter->pdev->dev, q->pg_chunk.page,  in alloc_pg_chunk()
453 				       0, q->alloc_size, DMA_FROM_DEVICE);  in alloc_pg_chunk()
454 		if (unlikely(dma_mapping_error(&adapter->pdev->dev, mapping))) {  in alloc_pg_chunk()
455 			__free_pages(q->pg_chunk.page, order);  in alloc_pg_chunk()
456 			q->pg_chunk.page = NULL;  in alloc_pg_chunk()
457 			return -EIO;  in alloc_pg_chunk()
459 		q->pg_chunk.mapping = mapping;  in alloc_pg_chunk()
461 	sd->pg_chunk = q->pg_chunk;  in alloc_pg_chunk()
463 	prefetch(sd->pg_chunk.p_cnt);  in alloc_pg_chunk()
465 	q->pg_chunk.offset += q->buf_size;  in alloc_pg_chunk()
466 	if (q->pg_chunk.offset == (PAGE_SIZE << order))  in alloc_pg_chunk()
467 		q->pg_chunk.page = NULL;  in alloc_pg_chunk()
469 		q->pg_chunk.va += q->buf_size;  in alloc_pg_chunk()
470 		get_page(q->pg_chunk.page);  in alloc_pg_chunk()
473 	if (sd->pg_chunk.offset == 0)  in alloc_pg_chunk()
474 		*sd->pg_chunk.p_cnt = 1;  in alloc_pg_chunk()
476 		*sd->pg_chunk.p_cnt += 1;  in alloc_pg_chunk()
483 	if (q->pend_cred >= q->credits / 4) {  in ring_fl_db()
484 		q->pend_cred = 0;  in ring_fl_db()
486 		t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));  in ring_fl_db()
491  *	refill_fl - refill an SGE free-buffer list
493  *	@q: the free-list to refill
494  *	@n: the number of new buffers to allocate
497  *	(Re)populate an SGE free-buffer list with up to @n new packet buffers,
503 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];  in refill_fl()
504 	struct rx_desc *d = &q->desc[q->pidx];  in refill_fl()
507 	while (n--) {  in refill_fl()
511 		if (q->use_pages) {  in refill_fl()
513 						    q->order))) {  in refill_fl()
514 nomem:				q->alloc_failed++;  in refill_fl()
517 			mapping = sd->pg_chunk.mapping + sd->pg_chunk.offset;  in refill_fl()
520 			add_one_rx_chunk(mapping, d, q->gen);  in refill_fl()
521 			dma_sync_single_for_device(&adap->pdev->dev, mapping,  in refill_fl()
522 						   q->buf_size - SGE_PG_RSVD,  in refill_fl()
527 			struct sk_buff *skb = alloc_skb(q->buf_size, gfp);  in refill_fl()
531 			sd->skb = skb;  in refill_fl()
532 			buf_start = skb->data;  in refill_fl()
533 			err = add_one_rx_buf(buf_start, q->buf_size, d, sd,  in refill_fl()
534 					     q->gen, adap->pdev);  in refill_fl()
536 				clear_rx_desc(adap->pdev, q, sd);  in refill_fl()
543 		if (++q->pidx == q->size) {  in refill_fl()
544 			q->pidx = 0;  in refill_fl()
545 			q->gen ^= 1;  in refill_fl()
546 			sd = q->sdesc;  in refill_fl()
547 			d = q->desc;  in refill_fl()
552 	q->credits += count;  in refill_fl()
553 	q->pend_cred += count;  in refill_fl()
561 	refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits),  in __refill_fl()
566  *	recycle_rx_buf - recycle a receive buffer
569  *	@idx: index of buffer to recycle
577 	struct rx_desc *from = &q->desc[idx];  in recycle_rx_buf()
578 	struct rx_desc *to = &q->desc[q->pidx];  in recycle_rx_buf()  local
580 	q->sdesc[q->pidx] = q->sdesc[idx];  in recycle_rx_buf()
581 	to->addr_lo = from->addr_lo;	/* already big endian */  in recycle_rx_buf()
582 	to->addr_hi = from->addr_hi;	/* likewise */  in recycle_rx_buf()
584 	to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));  in recycle_rx_buf()
585 	to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));  in recycle_rx_buf()
587 	if (++q->pidx == q->size) {  in recycle_rx_buf()
588 		q->pidx = 0;  in recycle_rx_buf()
589 		q->gen ^= 1;  in recycle_rx_buf()
592 	q->credits++;  in recycle_rx_buf()
593 	q->pend_cred++;  in recycle_rx_buf()
598  *	alloc_ring - allocate resources for an SGE descriptor ring
606  *	Allocates resources for an SGE descriptor ring, such as Tx queues,
607  *	free buffer lists, or response queues.  Each SGE ring requires
619 	void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);  in alloc_ring()
627 			dma_free_coherent(&pdev->dev, len, p, *phys);  in alloc_ring()
636  *	t3_reset_qset - reset a sge qset
645 	if (q->adap &&  in t3_reset_qset()
646 	    !(q->adap->flags & NAPI_INIT)) {  in t3_reset_qset()
651 	q->adap = NULL;  in t3_reset_qset()
652 	memset(&q->rspq, 0, sizeof(q->rspq));  in t3_reset_qset()
653 	memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);  in t3_reset_qset()
654 	memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);  in t3_reset_qset()
655 	q->txq_stopped = 0;  in t3_reset_qset()
656 	q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */  in t3_reset_qset()
657 	q->rx_reclaim_timer.function = NULL;  in t3_reset_qset()
658 	q->nomem = 0;  in t3_reset_qset()
659 	napi_free_frags(&q->napi);  in t3_reset_qset()
664  *	t3_free_qset - free the resources of an SGE queue set
669  *	as HW contexts, packet buffers, and descriptor rings.  Traffic to the
670  *	queue set must be quiesced prior to calling this.
675 	struct pci_dev *pdev = adapter->pdev;  in t3_free_qset()
678 		if (q->fl[i].desc) {  in t3_free_qset()
679 			spin_lock_irq(&adapter->sge.reg_lock);  in t3_free_qset()
680 			t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);  in t3_free_qset()
681 			spin_unlock_irq(&adapter->sge.reg_lock);  in t3_free_qset()
682 			free_rx_bufs(pdev, &q->fl[i]);  in t3_free_qset()
683 			kfree(q->fl[i].sdesc);  in t3_free_qset()
684 			dma_free_coherent(&pdev->dev,  in t3_free_qset()
685 					  q->fl[i].size *  in t3_free_qset()
686 					  sizeof(struct rx_desc), q->fl[i].desc,  in t3_free_qset()
687 					  q->fl[i].phys_addr);  in t3_free_qset()
691 		if (q->txq[i].desc) {  in t3_free_qset()
692 			spin_lock_irq(&adapter->sge.reg_lock);  in t3_free_qset()
693 			t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);  in t3_free_qset()
694 			spin_unlock_irq(&adapter->sge.reg_lock);  in t3_free_qset()
695 			if (q->txq[i].sdesc) {  in t3_free_qset()
696 				free_tx_desc(adapter, &q->txq[i],  in t3_free_qset()
697 					     q->txq[i].in_use);  in t3_free_qset()
698 				kfree(q->txq[i].sdesc);  in t3_free_qset()
700 			dma_free_coherent(&pdev->dev,  in t3_free_qset()
701 					  q->txq[i].size *  in t3_free_qset()
703 					  q->txq[i].desc, q->txq[i].phys_addr);  in t3_free_qset()
704 			__skb_queue_purge(&q->txq[i].sendq);  in t3_free_qset()
707 	if (q->rspq.desc) {  in t3_free_qset()
708 		spin_lock_irq(&adapter->sge.reg_lock);  in t3_free_qset()
709 		t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);  in t3_free_qset()
710 		spin_unlock_irq(&adapter->sge.reg_lock);  in t3_free_qset()
711 		dma_free_coherent(&pdev->dev,  in t3_free_qset()
712 				  q->rspq.size * sizeof(struct rsp_desc),  in t3_free_qset()
713 				  q->rspq.desc, q->rspq.phys_addr);  in t3_free_qset()
720  *	init_qset_cntxt - initialize an SGE queue set context info
724  *	Initializes the TIDs and context ids for the queues of a queue set.
728 	qs->rspq.cntxt_id = id;  in init_qset_cntxt()
729 	qs->fl[0].cntxt_id = 2 * id;  in init_qset_cntxt()
730 	qs->fl[1].cntxt_id = 2 * id + 1;  in init_qset_cntxt()
731 	qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;  in init_qset_cntxt()
732 	qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;  in init_qset_cntxt()
733 	qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;  in init_qset_cntxt()
734 	qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;  in init_qset_cntxt()
735 	qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;  in init_qset_cntxt()
739  *	sgl_len - calculates the size of an SGL of the given capacity
752  *	flits_to_desc - returns the num of Tx descriptors for the given flits
755  *	Calculates the number of Tx descriptors needed for the supplied number
765  *	get_packet - return the next ingress packet buffer from a free list
773  *	original buffer, otherwise we use the original buffer itself.  If a
776  *	threshold and the packet is too big to copy, or (b) the packet should
783 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];  in get_packet()
785 	prefetch(sd->skb->data);  in get_packet()
786 	fl->credits--;  in get_packet()
792 			dma_sync_single_for_cpu(&adap->pdev->dev,  in get_packet()
795 			memcpy(skb->data, sd->skb->data, len);  in get_packet()
796 			dma_sync_single_for_device(&adap->pdev->dev,  in get_packet()
802 		recycle_rx_buf(adap, fl, fl->cidx);  in get_packet()
806 	if (unlikely(fl->credits < drop_thres) &&  in get_packet()
807 	    refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits - 1),  in get_packet()
812 	dma_unmap_single(&adap->pdev->dev, dma_unmap_addr(sd, dma_addr),  in get_packet()
813 			 fl->buf_size, DMA_FROM_DEVICE);  in get_packet()
814 	skb = sd->skb;  in get_packet()
821  *	get_packet_pg - return the next ingress packet buffer from a free list
830  *	otherwise we attach the original buffer as a page fragment to a fresh
833  *	under the threshold and the packet is too big to copy, or (b) there's
836  * 	Note: this function is similar to @get_packet but deals with Rx buffers
844 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];  in get_packet_pg()
848 	newskb = skb = q->pg_skb;  in get_packet_pg()
853 			dma_sync_single_for_cpu(&adap->pdev->dev, dma_addr,  in get_packet_pg()
855 			memcpy(newskb->data, sd->pg_chunk.va, len);  in get_packet_pg()
856 			dma_sync_single_for_device(&adap->pdev->dev, dma_addr,  in get_packet_pg()
861 		fl->credits--;  in get_packet_pg()
862 		recycle_rx_buf(adap, fl, fl->cidx);  in get_packet_pg()
863 		q->rx_recycle_buf++;  in get_packet_pg()
867 	if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))  in get_packet_pg()
870 	prefetch(sd->pg_chunk.p_cnt);  in get_packet_pg()
881 	dma_sync_single_for_cpu(&adap->pdev->dev, dma_addr, len,  in get_packet_pg()
883 	(*sd->pg_chunk.p_cnt)--;  in get_packet_pg()
884 	if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)  in get_packet_pg()
885 		dma_unmap_page(&adap->pdev->dev, sd->pg_chunk.mapping,  in get_packet_pg()
886 			       fl->alloc_size, DMA_FROM_DEVICE);  in get_packet_pg()
889 		memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);  in get_packet_pg()
890 		skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,  in get_packet_pg()
891 				   sd->pg_chunk.offset + SGE_RX_PULL_LEN,  in get_packet_pg()
892 				   len - SGE_RX_PULL_LEN);  in get_packet_pg()
893 		newskb->len = len;  in get_packet_pg()
894 		newskb->data_len = len - SGE_RX_PULL_LEN;  in get_packet_pg()
895 		newskb->truesize += newskb->data_len;  in get_packet_pg()
897 		skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,  in get_packet_pg()
898 				   sd->pg_chunk.page,  in get_packet_pg()
899 				   sd->pg_chunk.offset, len);  in get_packet_pg()
900 		newskb->len += len;  in get_packet_pg()
901 		newskb->data_len += len;  in get_packet_pg()
902 		newskb->truesize += len;  in get_packet_pg()
905 	fl->credits--;  in get_packet_pg()
907 	 * We do not refill FLs here, we let the caller do it to overlap a  in get_packet_pg()
914  *	get_imm_packet - return the next ingress packet buffer from a response
925 		BUILD_BUG_ON(IMMED_PKT_SIZE != sizeof(resp->immediate));  in get_imm_packet()
926 		skb_copy_to_linear_data(skb, &resp->immediate, IMMED_PKT_SIZE);  in get_imm_packet()
932  *	calc_tx_descs - calculate the number of Tx descriptors for a packet
935  * 	Returns the number of Tx descriptors needed for the given Ethernet
942 	if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))  in calc_tx_descs()
945 	flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;  in calc_tx_descs()
946 	if (skb_shinfo(skb)->gso_size)  in calc_tx_descs()
951 /*	map_skb - map a packet main body and its page fragments
954  *	@addr: placeholder to save the mapped addresses
965 		*addr = dma_map_single(&pdev->dev, skb->data,  in map_skb()
967 		if (dma_mapping_error(&pdev->dev, *addr))  in map_skb()
973 	end = &si->frags[si->nr_frags];  in map_skb()
975 	for (fp = si->frags; fp < end; fp++) {  in map_skb()
976 		*addr = skb_frag_dma_map(&pdev->dev, fp, 0, skb_frag_size(fp),  in map_skb()
978 		if (dma_mapping_error(&pdev->dev, *addr))  in map_skb()
985 	while (fp-- > si->frags)  in map_skb()
986 		dma_unmap_page(&pdev->dev, *--addr, skb_frag_size(fp),  in map_skb()
989 	dma_unmap_single(&pdev->dev, addr[-1], skb_headlen(skb),  in map_skb()
992 	return -ENOMEM;  in map_skb()
996  *	write_sgl - populate a scatter/gather list for a packet
998  *	@sgp: the SGL to populate
999  *	@start: start address of skb main body data to include in the SGL
1000  *	@len: length of skb main body data to include in the SGL
1004  *	and returns the SGL size in 8-byte words.  The caller must size the SGL
1014 		sgp->len[0] = cpu_to_be32(len);  in write_sgl()
1015 		sgp->addr[j++] = cpu_to_be64(addr[k++]);  in write_sgl()
1018 	nfrags = skb_shinfo(skb)->nr_frags;  in write_sgl()
1020 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];  in write_sgl()
1022 		sgp->len[j] = cpu_to_be32(skb_frag_size(frag));  in write_sgl()
1023 		sgp->addr[j] = cpu_to_be64(addr[k++]);  in write_sgl()
1029 		sgp->len[j] = 0;  in write_sgl()
1034  *	check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1036  *	@q: the Tx queue
1038  *	Ring the doorbel if a Tx queue is asleep.  There is a natural race,
1039  *	where the HW is going to sleep just after we checked, however,
1040  *	then the interrupt handler will detect the outstanding TX packet
1048 	clear_bit(TXQ_LAST_PKT_DB, &q->flags);  in check_ring_tx_db()
1049 	if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {  in check_ring_tx_db()
1050 		set_bit(TXQ_LAST_PKT_DB, &q->flags);  in check_ring_tx_db()
1052 			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));  in check_ring_tx_db()
1057 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));  in check_ring_tx_db()
1064 	d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);  in wr_gen2()
1069  *	write_wr_hdr_sgl - write a WR header and, optionally, SGL
1070  *	@ndesc: number of Tx descriptors spanned by the SGL
1071  *	@skb: the packet corresponding to the WR
1072  *	@d: first Tx descriptor to be written
1074  *	@q: the SGE Tx queue
1076  *	@flits: number of flits to the start of the SGL in the first descriptor
1078  *	@gen: the Tx descriptor generation
1083  *	small enough to fit into one Tx descriptor it has already been written
1084  *	and we just need to write the WR header.  Otherwise we distribute the
1096 	struct tx_sw_desc *sd = &q->sdesc[pidx];  in write_wr_hdr_sgl()
1098 	sd->skb = skb;  in write_wr_hdr_sgl()
1100 		sd->fragidx = 0;  in write_wr_hdr_sgl()
1101 		sd->addr_idx = 0;  in write_wr_hdr_sgl()
1102 		sd->sflit = flits;  in write_wr_hdr_sgl()
1106 		sd->eop = 1;  in write_wr_hdr_sgl()
1107 		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |  in write_wr_hdr_sgl()
1110 		wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |  in write_wr_hdr_sgl()
1118 		wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |  in write_wr_hdr_sgl()
1122 			unsigned int avail = WR_FLITS - flits;  in write_wr_hdr_sgl()
1126 			memcpy(&d->flit[flits], fp, avail * sizeof(*fp));  in write_wr_hdr_sgl()
1127 			sgl_flits -= avail;  in write_wr_hdr_sgl()
1128 			ndesc--;  in write_wr_hdr_sgl()
1134 			sd->eop = 0;  in write_wr_hdr_sgl()
1136 			if (++pidx == q->size) {  in write_wr_hdr_sgl()
1139 				d = q->desc;  in write_wr_hdr_sgl()
1140 				sd = q->sdesc;  in write_wr_hdr_sgl()
1143 			sd->skb = skb;  in write_wr_hdr_sgl()
1145 			wrp->wr_hi = htonl(V_WR_DATATYPE(1) |  in write_wr_hdr_sgl()
1147 			wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,  in write_wr_hdr_sgl()
1153 		sd->eop = 1;  in write_wr_hdr_sgl()
1154 		wrp->wr_hi |= htonl(F_WR_EOP);  in write_wr_hdr_sgl()
1156 		wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;  in write_wr_hdr_sgl()
1163  *	write_tx_pkt_wr - write a TX_PKT work request
1165  *	@skb: the packet to send
1167  *	@pidx: index of the first Tx descriptor to write
1168  *	@gen: the generation value to use
1169  *	@q: the Tx queue
1171  *	@compl: the value of the COMPL bit to use
1174  *	Generate a TX_PKT work request to send the supplied packet.
1184 	struct tx_desc *d = &q->desc[pidx];  in write_tx_pkt_wr()
1187 	cpl->len = htonl(skb->len);  in write_tx_pkt_wr()
1188 	cntrl = V_TXPKT_INTF(pi->port_id);  in write_tx_pkt_wr()
1193 	tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);  in write_tx_pkt_wr()
1198 		d->flit[2] = 0;  in write_tx_pkt_wr()
1200 		hdr->cntrl = htonl(cntrl);  in write_tx_pkt_wr()
1204 		    V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |  in write_tx_pkt_wr()
1205 		    V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);  in write_tx_pkt_wr()
1206 		hdr->lso_info = htonl(tso_info);  in write_tx_pkt_wr()
1211 		cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);  in write_tx_pkt_wr()
1212 		cpl->cntrl = htonl(cntrl);  in write_tx_pkt_wr()
1214 		if (skb->len <= WR_LEN - sizeof(*cpl)) {  in write_tx_pkt_wr()
1215 			q->sdesc[pidx].skb = NULL;  in write_tx_pkt_wr()
1216 			if (!skb->data_len)  in write_tx_pkt_wr()
1217 				skb_copy_from_linear_data(skb, &d->flit[2],  in write_tx_pkt_wr()
1218 							  skb->len);  in write_tx_pkt_wr()
1220 				skb_copy_bits(skb, 0, &d->flit[2], skb->len);  in write_tx_pkt_wr()
1222 			flits = (skb->len + 7) / 8 + 2;  in write_tx_pkt_wr()
1223 			cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |  in write_tx_pkt_wr()
1227 			cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |  in write_tx_pkt_wr()
1228 					      V_WR_TID(q->token));  in write_tx_pkt_wr()
1237 	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;  in write_tx_pkt_wr()
1238 	sgl_flits = write_sgl(skb, sgp, skb->data, skb_headlen(skb), addr);  in write_tx_pkt_wr()
1242 			 htonl(V_WR_TID(q->token)));  in write_tx_pkt_wr()
1249 	set_bit(TXQ_ETH, &qs->txq_stopped);  in t3_stop_tx_queue()
1250 	q->stops++;  in t3_stop_tx_queue()
1254  *	t3_eth_xmit - add a packet to the Ethernet Tx queue
1258  *	Add a packet to an SGE Tx queue.  Runs with softirqs disabled.
1265 	struct adapter *adap = pi->adapter;  in t3_eth_xmit()
1275 	if (unlikely(skb->len < ETH_HLEN)) {  in t3_eth_xmit()
1281 	qs = &pi->qs[qidx];  in t3_eth_xmit()
1282 	q = &qs->txq[TXQ_ETH];  in t3_eth_xmit()
1287 	credits = q->size - q->in_use;  in t3_eth_xmit()
1292 		dev_err(&adap->pdev->dev,  in t3_eth_xmit()
1293 			"%s: Tx ring %u full while queue awake!\n",  in t3_eth_xmit()
1294 			dev->name, q->cntxt_id & 7);  in t3_eth_xmit()
1299 	if (skb->len > (WR_LEN - sizeof(struct cpl_tx_pkt))) {  in t3_eth_xmit()
1300 		if (unlikely(map_skb(adap->pdev, skb, addr) < 0)) {  in t3_eth_xmit()
1306 	q->in_use += ndesc;  in t3_eth_xmit()
1307 	if (unlikely(credits - ndesc < q->stop_thres)) {  in t3_eth_xmit()
1311 		    test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {  in t3_eth_xmit()
1312 			q->restarts++;  in t3_eth_xmit()
1317 	gen = q->gen;  in t3_eth_xmit()
1318 	q->unacked += ndesc;  in t3_eth_xmit()
1319 	compl = (q->unacked & 8) << (S_WR_COMPL - 3);  in t3_eth_xmit()
1320 	q->unacked &= 7;  in t3_eth_xmit()
1321 	pidx = q->pidx;  in t3_eth_xmit()
1322 	q->pidx += ndesc;  in t3_eth_xmit()
1323 	if (q->pidx >= q->size) {  in t3_eth_xmit()
1324 		q->pidx -= q->size;  in t3_eth_xmit()
1325 		q->gen ^= 1;  in t3_eth_xmit()
1329 	if (skb->ip_summed == CHECKSUM_PARTIAL)  in t3_eth_xmit()
1330 		qs->port_stats[SGE_PSTAT_TX_CSUM]++;  in t3_eth_xmit()
1331 	if (skb_shinfo(skb)->gso_size)  in t3_eth_xmit()
1332 		qs->port_stats[SGE_PSTAT_TSO]++;  in t3_eth_xmit()
1334 		qs->port_stats[SGE_PSTAT_VLANINS]++;  in t3_eth_xmit()
1337 	 * We do not use Tx completion interrupts to free DMAd Tx packets.  in t3_eth_xmit()
1338 	 * This is good for performance but means that we rely on new Tx  in t3_eth_xmit()
1339 	 * packets arriving to run the destructors of completed packets,  in t3_eth_xmit()
1340 	 * which open up space in their sockets' send queues.  Sometimes  in t3_eth_xmit()
1341 	 * we do not get such new packets causing Tx to stall.  A single  in t3_eth_xmit()
1344 	 * but it doesn't run often enough (nor do we want it to) to prevent  in t3_eth_xmit()
1345 	 * lengthy stalls.  A solution to this problem is to run the  in t3_eth_xmit()
1347 	 * A cons is that we lie to socket memory accounting, but the amount  in t3_eth_xmit()
1348 	 * of extra memory is reasonable (limited by the number of Tx  in t3_eth_xmit()
1351 	 * acks to really free up the data the extra memory is even less.  in t3_eth_xmit()
1354 	 * good thing.  We also run them without holding our Tx queue lock,  in t3_eth_xmit()
1358 	 * to make sure it doesn't complete and get freed prematurely.  in t3_eth_xmit()
1369  *	write_imm - write a packet into a Tx descriptor as immediate data
1370  *	@d: the Tx descriptor to write
1372  *	@len: the length of packet data to write as immediate data
1373  *	@gen: the generation bit value to write
1375  *	Writes a packet as immediate data into a Tx descriptor.  The packet
1383 	struct work_request_hdr *from = (struct work_request_hdr *)skb->data;  in write_imm()
1384 	struct work_request_hdr *to = (struct work_request_hdr *)d;  in write_imm()  local
1386 	if (likely(!skb->data_len))  in write_imm()
1387 		memcpy(&to[1], &from[1], len - sizeof(*from));  in write_imm()
1389 		skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));  in write_imm()
1391 	to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |  in write_imm()
1394 	to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |  in write_imm()
1401  *	check_desc_avail - check descriptor availability on a send queue
1405  *	@ndesc: the number of Tx descriptors needed
1406  *	@qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1408  *	Checks if the requested number of Tx descriptors is available on an
1411  *	Must be called with the Tx queue locked.
1415  *	needs to retry because there weren't enough descriptors at the
1422 	if (unlikely(!skb_queue_empty(&q->sendq))) {  in check_desc_avail()
1423 	      addq_exit:__skb_queue_tail(&q->sendq, skb);  in check_desc_avail()
1426 	if (unlikely(q->size - q->in_use < ndesc)) {  in check_desc_avail()
1429 		set_bit(qid, &qs->txq_stopped);  in check_desc_avail()
1433 		    test_and_clear_bit(qid, &qs->txq_stopped))  in check_desc_avail()
1436 		q->stops++;  in check_desc_avail()
1443  *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1444  *	@q: the SGE control Tx queue
1446  *	This is a variant of reclaim_completed_tx() that is used for Tx queues
1447  *	that send only immediate data (presently just the control queues) and
1448  *	thus do not have any sk_buffs to release.
1452 	unsigned int reclaim = q->processed - q->cleaned;  in reclaim_completed_tx_imm()
1454 	q->in_use -= reclaim;  in reclaim_completed_tx_imm()
1455 	q->cleaned += reclaim;  in reclaim_completed_tx_imm()
1460 	return skb->len <= WR_LEN;  in immediate()
1464  *	ctrl_xmit - send a packet through an SGE control Tx queue
1469  *	Send a packet through an SGE control Tx queue.  Packets sent through
1470  *	a control queue must fit entirely as immediate data in a single Tx
1477 	struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;  in ctrl_xmit()
1485 	wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);  in ctrl_xmit()
1486 	wrp->wr_lo = htonl(V_WR_TID(q->token));  in ctrl_xmit()
1488 	spin_lock(&q->lock);  in ctrl_xmit()
1494 			spin_unlock(&q->lock);  in ctrl_xmit()
1500 	write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);  in ctrl_xmit()
1502 	q->in_use++;  in ctrl_xmit()
1503 	if (++q->pidx >= q->size) {  in ctrl_xmit()
1504 		q->pidx = 0;  in ctrl_xmit()
1505 		q->gen ^= 1;  in ctrl_xmit()
1507 	spin_unlock(&q->lock);  in ctrl_xmit()
1510 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));  in ctrl_xmit()
1515  *	restart_ctrlq - restart a suspended control queue
1516  *	@w: pointer to the work associated with this handler
1518  *	Resumes transmission on a suspended Tx control queue.
1525 	struct sge_txq *q = &qs->txq[TXQ_CTRL];  in restart_ctrlq()
1527 	spin_lock(&q->lock);  in restart_ctrlq()
1530 	while (q->in_use < q->size &&  in restart_ctrlq()
1531 	       (skb = __skb_dequeue(&q->sendq)) != NULL) {  in restart_ctrlq()
1533 		write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);  in restart_ctrlq()
1535 		if (++q->pidx >= q->size) {  in restart_ctrlq()
1536 			q->pidx = 0;  in restart_ctrlq()
1537 			q->gen ^= 1;  in restart_ctrlq()
1539 		q->in_use++;  in restart_ctrlq()
1542 	if (!skb_queue_empty(&q->sendq)) {  in restart_ctrlq()
1543 		set_bit(TXQ_CTRL, &qs->txq_stopped);  in restart_ctrlq()
1547 		    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))  in restart_ctrlq()
1549 		q->stops++;  in restart_ctrlq()
1552 	spin_unlock(&q->lock);  in restart_ctrlq()
1554 	t3_write_reg(qs->adap, A_SG_KDOORBELL,  in restart_ctrlq()
1555 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));  in restart_ctrlq()
1565 	ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);  in t3_mgmt_tx()
1572  *	deferred_unmap_destructor - unmap a packet when it is freed
1575  *	This is the packet destructor used for Tx packets that need to remain
1576  *	mapped until they are freed rather than until their Tx descriptors are
1586 	dui = (struct deferred_unmap_info *)skb->head;  in deferred_unmap_destructor()
1587 	p = dui->addr;  in deferred_unmap_destructor()
1589 	if (skb_tail_pointer(skb) - skb_transport_header(skb))  in deferred_unmap_destructor()
1590 		dma_unmap_single(&dui->pdev->dev, *p++,  in deferred_unmap_destructor()
1591 				 skb_tail_pointer(skb) - skb_transport_header(skb),  in deferred_unmap_destructor()
1595 	for (i = 0; i < si->nr_frags; i++)  in deferred_unmap_destructor()
1596 		dma_unmap_page(&dui->pdev->dev, *p++,  in deferred_unmap_destructor()
1597 			       skb_frag_size(&si->frags[i]), DMA_TO_DEVICE);  in deferred_unmap_destructor()
1606 	dui = (struct deferred_unmap_info *)skb->head;  in setup_deferred_unmapping()
1607 	dui->pdev = pdev;  in setup_deferred_unmapping()
1608 	for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {  in setup_deferred_unmapping()
1609 		*p++ = be64_to_cpu(sgl->addr[0]);  in setup_deferred_unmapping()
1610 		*p++ = be64_to_cpu(sgl->addr[1]);  in setup_deferred_unmapping()
1613 		*p = be64_to_cpu(sgl->addr[0]);  in setup_deferred_unmapping()
1617  *	write_ofld_wr - write an offload work request
1619  *	@skb: the packet to send
1620  *	@q: the Tx queue
1621  *	@pidx: index of the first Tx descriptor to write
1622  *	@gen: the generation value to use
1626  *	Write an offload work request to send the supplied packet.  The packet
1637 	struct tx_desc *d = &q->desc[pidx];  in write_ofld_wr()
1640 		q->sdesc[pidx].skb = NULL;  in write_ofld_wr()
1641 		write_imm(d, skb, skb->len, gen);  in write_ofld_wr()
1647 	from = (struct work_request_hdr *)skb->data;  in write_ofld_wr()
1648 	memcpy(&d->flit[1], &from[1],  in write_ofld_wr()
1649 	       skb_transport_offset(skb) - sizeof(*from));  in write_ofld_wr()
1652 	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;  in write_ofld_wr()
1654 			      skb_tail_pointer(skb) - skb_transport_header(skb),  in write_ofld_wr()
1657 		setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);  in write_ofld_wr()
1658 		skb->destructor = deferred_unmap_destructor;  in write_ofld_wr()
1662 			 gen, from->wr_hi, from->wr_lo);  in write_ofld_wr()
1666  *	calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1669  * 	Returns the number of Tx descriptors needed for the given offload
1676 	if (skb->len <= WR_LEN)  in calc_tx_descs_ofld()
1680 	cnt = skb_shinfo(skb)->nr_frags;  in calc_tx_descs_ofld()
1687  *	ofld_xmit - send a packet through an offload queue
1689  *	@q: the Tx offload queue
1700 	spin_lock(&q->lock);  in ofld_xmit()
1706 			skb->priority = ndesc;	/* save for restart */  in ofld_xmit()
1707 			spin_unlock(&q->lock);  in ofld_xmit()
1714 	    map_skb(adap->pdev, skb, (dma_addr_t *)skb->head)) {  in ofld_xmit()
1715 		spin_unlock(&q->lock);  in ofld_xmit()
1719 	gen = q->gen;  in ofld_xmit()
1720 	q->in_use += ndesc;  in ofld_xmit()
1721 	pidx = q->pidx;  in ofld_xmit()
1722 	q->pidx += ndesc;  in ofld_xmit()
1723 	if (q->pidx >= q->size) {  in ofld_xmit()
1724 		q->pidx -= q->size;  in ofld_xmit()
1725 		q->gen ^= 1;  in ofld_xmit()
1727 	spin_unlock(&q->lock);  in ofld_xmit()
1729 	write_ofld_wr(adap, skb, q, pidx, gen, ndesc, (dma_addr_t *)skb->head);  in ofld_xmit()
1735  *	restart_offloadq - restart a suspended offload queue
1736  *	@w: pointer to the work associated with this handler
1738  *	Resumes transmission on a suspended Tx offload queue.
1745 	struct sge_txq *q = &qs->txq[TXQ_OFLD];  in restart_offloadq()
1746 	const struct port_info *pi = netdev_priv(qs->netdev);  in restart_offloadq()
1747 	struct adapter *adap = pi->adapter;  in restart_offloadq()
1750 	spin_lock(&q->lock);  in restart_offloadq()
1753 	while ((skb = skb_peek(&q->sendq)) != NULL) {  in restart_offloadq()
1755 		unsigned int ndesc = skb->priority;  in restart_offloadq()
1757 		if (unlikely(q->size - q->in_use < ndesc)) {  in restart_offloadq()
1758 			set_bit(TXQ_OFLD, &qs->txq_stopped);  in restart_offloadq()
1762 			    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))  in restart_offloadq()
1764 			q->stops++;  in restart_offloadq()
1769 		    map_skb(adap->pdev, skb, (dma_addr_t *)skb->head))  in restart_offloadq()
1772 		gen = q->gen;  in restart_offloadq()
1773 		q->in_use += ndesc;  in restart_offloadq()
1774 		pidx = q->pidx;  in restart_offloadq()
1775 		q->pidx += ndesc;  in restart_offloadq()
1777 		if (q->pidx >= q->size) {  in restart_offloadq()
1778 			q->pidx -= q->size;  in restart_offloadq()
1779 			q->gen ^= 1;  in restart_offloadq()
1781 		__skb_unlink(skb, &q->sendq);  in restart_offloadq()
1782 		spin_unlock(&q->lock);  in restart_offloadq()
1785 			      (dma_addr_t *)skb->head);  in restart_offloadq()
1786 		spin_lock(&q->lock);  in restart_offloadq()
1788 	spin_unlock(&q->lock);  in restart_offloadq()
1791 	set_bit(TXQ_RUNNING, &q->flags);  in restart_offloadq()
1792 	set_bit(TXQ_LAST_PKT_DB, &q->flags);  in restart_offloadq()
1797 			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));  in restart_offloadq()
1801  *	queue_set - return the queue set a packet should use
1804  *	Maps a packet to the SGE queue set it should use.  The desired queue
1805  *	set is carried in bits 1-3 in the packet's priority.
1809 	return skb->priority >> 1;  in queue_set()
1813  *	is_ctrl_pkt - return whether an offload packet is a control packet
1816  *	Determines whether an offload packet should use an OFLD or a CTRL
1817  *	Tx queue.  This is indicated by bit 0 in the packet's priority.
1821 	return skb->priority & 1;  in is_ctrl_pkt()
1825  *	t3_offload_tx - send an offload packet
1826  *	@tdev: the offload device to send to
1829  *	Sends an offload packet.  We use the packet priority to select the
1830  *	appropriate Tx queue as follows: bit 0 indicates whether the packet
1831  *	should be sent as regular or control, bits 1-3 select the queue set.
1836 	struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];  in t3_offload_tx()
1839 		return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);  in t3_offload_tx()
1841 	return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);  in t3_offload_tx()
1845  *	offload_enqueue - add an offload packet to an SGE offload receive queue
1849  *	Add a new offload packet to an SGE response queue's offload packet
1851  *	softirq to process the queue.
1855 	int was_empty = skb_queue_empty(&q->rx_queue);  in offload_enqueue()
1857 	__skb_queue_tail(&q->rx_queue, skb);  in offload_enqueue()
1862 		napi_schedule(&qs->napi);  in offload_enqueue()
1867  *	deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1873  *	Delivers a (partial) bundle of Rx offload packets to an offload device.
1880 		q->offload_bundles++;  in deliver_partial_bundle()
1881 		tdev->recv(tdev, skbs, n);  in deliver_partial_bundle()
1886  *	ofld_poll - NAPI handler for offload packets in interrupt mode
1891  *	by the hard interrupt handler, i.e., when it's operating in non-polling
1893  *	receive handler.  Batches need to be of modest size as we do prefetches
1899 	struct sge_rspq *q = &qs->rspq;  in ofld_poll()
1900 	struct adapter *adapter = qs->adap;  in ofld_poll()
1908 		spin_lock_irq(&q->lock);  in ofld_poll()
1910 		skb_queue_splice_init(&q->rx_queue, &queue);  in ofld_poll()
1913 			spin_unlock_irq(&q->lock);  in ofld_poll()
1916 		spin_unlock_irq(&q->lock);  in ofld_poll()
1925 			prefetch(skb->data);  in ofld_poll()
1928 				q->offload_bundles++;  in ofld_poll()
1929 				adapter->tdev.recv(&adapter->tdev, skbs,  in ofld_poll()
1936 			spin_lock_irq(&q->lock);  in ofld_poll()
1937 			skb_queue_splice(&queue, &q->rx_queue);  in ofld_poll()
1938 			spin_unlock_irq(&q->lock);  in ofld_poll()
1940 		deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);  in ofld_poll()
1947  *	rx_offload - process a received offload packet
1954  *	Process an ingress offload packet and add it to the offload ingress
1965 	if (rq->polling) {  in rx_offload()
1968 			tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);  in rx_offload()
1970 			rq->offload_bundles++;  in rx_offload()
1979  *	restart_tx - check whether to restart suspended Tx queues
1980  *	@qs: the queue set to resume
1982  *	Restarts suspended Tx queues of an SGE queue set if they have enough
1983  *	free resources to resume operation.
1987 	if (test_bit(TXQ_ETH, &qs->txq_stopped) &&  in restart_tx()
1988 	    should_restart_tx(&qs->txq[TXQ_ETH]) &&  in restart_tx()
1989 	    test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {  in restart_tx()
1990 		qs->txq[TXQ_ETH].restarts++;  in restart_tx()
1991 		if (netif_running(qs->netdev))  in restart_tx()
1992 			netif_tx_wake_queue(qs->tx_q);  in restart_tx()
1995 	if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&  in restart_tx()
1996 	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&  in restart_tx()
1997 	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {  in restart_tx()
1998 		qs->txq[TXQ_OFLD].restarts++;  in restart_tx()
2000 		/* The work can be quite lengthy so we use driver's own queue */  in restart_tx()
2001 		queue_work(cxgb3_wq, &qs->txq[TXQ_OFLD].qresume_task);  in restart_tx()
2003 	if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&  in restart_tx()
2004 	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&  in restart_tx()
2005 	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {  in restart_tx()
2006 		qs->txq[TXQ_CTRL].restarts++;  in restart_tx()
2008 		/* The work can be quite lengthy so we use driver's own queue */  in restart_tx()
2009 		queue_work(cxgb3_wq, &qs->txq[TXQ_CTRL].qresume_task);  in restart_tx()
2014  *	cxgb3_arp_process - process an ARP request probing a private IP address
2019  *	dedicated to iSCSI, generate an ARP reply if so.
2023 	struct net_device *dev = skb->dev;  in cxgb3_arp_process()
2035 	if (arp->ar_op != htons(ARPOP_REQUEST))  in cxgb3_arp_process()
2040 	arp_ptr += dev->addr_len;  in cxgb3_arp_process()
2043 	arp_ptr += dev->addr_len;  in cxgb3_arp_process()
2046 	if (tip != pi->iscsi_ipv4addr)  in cxgb3_arp_process()
2050 		 pi->iscsic.mac_addr, sha);  in cxgb3_arp_process()
2056 	return skb->protocol == htons(ETH_P_ARP);  in is_arp()
2067 	if (pi->iscsic.recv)  in cxgb3_process_iscsi_prov_pack()
2068 		pi->iscsic.recv(pi, skb);  in cxgb3_process_iscsi_prov_pack()
2073  *	rx_eth - process an ingress ethernet packet
2080  *	Process an ingress ethernet packet and deliver it to the stack.
2087 	struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);  in rx_eth()
2092 	skb->protocol = eth_type_trans(skb, adap->port[p->iff]);  in rx_eth()
2093 	pi = netdev_priv(skb->dev);  in rx_eth()
2094 	if ((skb->dev->features & NETIF_F_RXCSUM) && p->csum_valid &&  in rx_eth()
2095 	    p->csum == htons(0xffff) && !p->fragment) {  in rx_eth()
2096 		qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;  in rx_eth()
2097 		skb->ip_summed = CHECKSUM_UNNECESSARY;  in rx_eth()
2100 	skb_record_rx_queue(skb, qs - &adap->sge.qs[pi->first_qset]);  in rx_eth()
2102 	if (p->vlan_valid) {  in rx_eth()
2103 		qs->port_stats[SGE_PSTAT_VLANEX]++;  in rx_eth()
2104 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(p->vlan));  in rx_eth()
2106 	if (rq->polling) {  in rx_eth()
2108 			napi_gro_receive(&qs->napi, skb);  in rx_eth()
2110 			if (unlikely(pi->iscsic.flags))  in rx_eth()
2124  *	lro_add_page - add a page chunk to an LRO session
2127  *	@fl: the free list containing the page chunk to add
2131  *	Add a received packet contained in a page chunk to an existing LRO
2137 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];  in lro_add_page()
2138 	struct port_info *pi = netdev_priv(qs->netdev);  in lro_add_page()
2145 	if (!qs->nomem) {  in lro_add_page()
2146 		skb = napi_get_frags(&qs->napi);  in lro_add_page()
2147 		qs->nomem = !skb;  in lro_add_page()
2150 	fl->credits--;  in lro_add_page()
2152 	dma_sync_single_for_cpu(&adap->pdev->dev,  in lro_add_page()
2154 				fl->buf_size - SGE_PG_RSVD, DMA_FROM_DEVICE);  in lro_add_page()
2156 	(*sd->pg_chunk.p_cnt)--;  in lro_add_page()
2157 	if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)  in lro_add_page()
2158 		dma_unmap_page(&adap->pdev->dev, sd->pg_chunk.mapping,  in lro_add_page()
2159 			       fl->alloc_size, DMA_FROM_DEVICE);  in lro_add_page()
2162 		put_page(sd->pg_chunk.page);  in lro_add_page()
2164 			qs->nomem = 0;  in lro_add_page()
2168 	rx_frag = skb_shinfo(skb)->frags;  in lro_add_page()
2169 	nr_frags = skb_shinfo(skb)->nr_frags;  in lro_add_page()
2173 		cpl = qs->lro_va = sd->pg_chunk.va + 2;  in lro_add_page()
2175 		if ((qs->netdev->features & NETIF_F_RXCSUM) &&  in lro_add_page()
2176 		     cpl->csum_valid && cpl->csum == htons(0xffff)) {  in lro_add_page()
2177 			skb->ip_summed = CHECKSUM_UNNECESSARY;  in lro_add_page()
2178 			qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;  in lro_add_page()
2180 			skb->ip_summed = CHECKSUM_NONE;  in lro_add_page()
2182 		cpl = qs->lro_va;  in lro_add_page()
2184 	len -= offset;  in lro_add_page()
2187 	skb_frag_fill_page_desc(rx_frag, sd->pg_chunk.page,  in lro_add_page()
2188 				sd->pg_chunk.offset + offset, len);  in lro_add_page()
2190 	skb->len += len;  in lro_add_page()
2191 	skb->data_len += len;  in lro_add_page()
2192 	skb->truesize += len;  in lro_add_page()
2193 	skb_shinfo(skb)->nr_frags++;  in lro_add_page()
2198 	skb_record_rx_queue(skb, qs - &adap->sge.qs[pi->first_qset]);  in lro_add_page()
2200 	if (cpl->vlan_valid) {  in lro_add_page()
2201 		qs->port_stats[SGE_PSTAT_VLANEX]++;  in lro_add_page()
2202 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan));  in lro_add_page()
2204 	napi_gro_frags(&qs->napi);  in lro_add_page()
2208  *	handle_rsp_cntrl_info - handles control information in a response
2209  *	@qs: the queue set corresponding to the response
2213  *	indications and completion credits for the queue set's Tx queues.
2222 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);  in handle_rsp_cntrl_info()
2227 		qs->txq[TXQ_ETH].processed += credits;  in handle_rsp_cntrl_info()
2231 		qs->txq[TXQ_CTRL].processed += credits;  in handle_rsp_cntrl_info()
2235 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);  in handle_rsp_cntrl_info()
2239 		qs->txq[TXQ_OFLD].processed += credits;  in handle_rsp_cntrl_info()
2243  *	check_ring_db - check if we need to ring any doorbells
2245  *	@qs: the queue set whose Tx queues are to be examined
2246  *	@sleeping: indicates which Tx queue sent GTS
2248  *	Checks if some of a queue set's Tx queues need to ring their doorbells
2249  *	to resume transmission after idling while they still have unprocessed
2256 		struct sge_txq *txq = &qs->txq[TXQ_ETH];  in check_ring_db()
2258 		if (txq->cleaned + txq->in_use != txq->processed &&  in check_ring_db()
2259 		    !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {  in check_ring_db()
2260 			set_bit(TXQ_RUNNING, &txq->flags);  in check_ring_db()
2262 				     V_EGRCNTX(txq->cntxt_id));  in check_ring_db()
2267 		struct sge_txq *txq = &qs->txq[TXQ_OFLD];  in check_ring_db()
2269 		if (txq->cleaned + txq->in_use != txq->processed &&  in check_ring_db()
2270 		    !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {  in check_ring_db()
2271 			set_bit(TXQ_RUNNING, &txq->flags);  in check_ring_db()
2273 				     V_EGRCNTX(txq->cntxt_id));  in check_ring_db()
2279  *	is_new_response - check if a response is newly written
2289 	return (r->intr_gen & F_RSPD_GEN2) == q->gen;  in is_new_response()
2294 	q->pg_skb = NULL;  in clear_rspq_bufstate()
2295 	q->rx_recycle_buf = 0;  in clear_rspq_bufstate()
2304 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2308  *	process_responses - process responses from an SGE response queue
2310  *	@qs: the queue set to which the response queue belongs
2313  *	Process responses from an SGE response queue up to the supplied budget.
2315  *	for the queues that belong to the response queue's queue set.
2319  *	on this queue.  If the system is under memory shortage use a fairly
2320  *	long delay to help recovery.
2325 	struct sge_rspq *q = &qs->rspq;  in process_responses()
2326 	struct rsp_desc *r = &q->desc[q->cidx];  in process_responses()
2332 	q->next_holdoff = q->holdoff_tmr;  in process_responses()
2336 		int lro = !!(qs->netdev->features & NETIF_F_GRO);  in process_responses()
2342 		eth = r->rss_hdr.opcode == CPL_RX_PKT;  in process_responses()
2344 		rss_lo = r->rss_hdr.rss_hash_val;  in process_responses()
2345 		flags = ntohl(r->flags);  in process_responses()
2353 			skb->data[0] = CPL_ASYNC_NOTIF;  in process_responses()
2355 			q->async_notif++;  in process_responses()
2360 				q->next_holdoff = NOMEM_INTR_DELAY;  in process_responses()
2361 				q->nomem++;  in process_responses()
2363 				budget_left--;  in process_responses()
2366 			q->imm_data++;  in process_responses()
2368 		} else if ((len = ntohl(r->len_cq)) != 0) {  in process_responses()
2373 			fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];  in process_responses()
2374 			if (fl->use_pages) {  in process_responses()
2375 				void *addr = fl->sdesc[fl->cidx].pg_chunk.va;  in process_responses()
2390 				q->pg_skb = skb;  in process_responses()
2397 				q->rx_drops++;  in process_responses()
2398 			} else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))  in process_responses()
2401 			if (++fl->cidx == fl->size)  in process_responses()
2402 				fl->cidx = 0;  in process_responses()
2404 			q->pure_rsps++;  in process_responses()
2412 		if (unlikely(++q->cidx == q->size)) {  in process_responses()
2413 			q->cidx = 0;  in process_responses()
2414 			q->gen ^= 1;  in process_responses()
2415 			r = q->desc;  in process_responses()
2419 		if (++q->credits >= (q->size / 4)) {  in process_responses()
2420 			refill_rspq(adap, q, q->credits);  in process_responses()
2421 			q->credits = 0;  in process_responses()
2432 				q->offload_pkts++;  in process_responses()
2434 				skb->csum = rss_hi;  in process_responses()
2435 				skb->priority = rss_lo;  in process_responses()
2436 				ngathered = rx_offload(&adap->tdev, q, skb,  in process_responses()
2444 		--budget_left;  in process_responses()
2447 	deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);  in process_responses()
2452 	smp_mb();		/* commit Tx queue .processed updates */  in process_responses()
2453 	if (unlikely(qs->txq_stopped != 0))  in process_responses()
2456 	budget -= budget_left;  in process_responses()
2462 	__be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);  in is_pure_response()
2464 	return (n | r->len_cq) == 0;  in is_pure_response()
2468  *	napi_rx_handler - the NAPI handler for Rx processing
2477 	struct adapter *adap = qs->adap;  in napi_rx_handler()
2488 		 * causing the NAPI interrupt handler below to return  in napi_rx_handler()
2489 		 * unhandled status to the OS.  To protect against  in napi_rx_handler()
2495 		 * The race cannot happen at all with MSI-X.  in napi_rx_handler()
2497 		t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |  in napi_rx_handler()
2498 			     V_NEWTIMER(qs->rspq.next_holdoff) |  in napi_rx_handler()
2499 			     V_NEWINDEX(qs->rspq.cidx));  in napi_rx_handler()
2505  *	process_pure_responses - process pure responses from a response queue
2508  *	@r: the first pure response to process
2511  *	non data-carrying) responses.  Such respones are too light-weight to
2513  *	the interrupt handler.  The function is called with a pointer to a
2516  *	Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2521 	struct sge_rspq *q = &qs->rspq;  in process_pure_responses()
2525 		u32 flags = ntohl(r->flags);  in process_pure_responses()
2528 		if (unlikely(++q->cidx == q->size)) {  in process_pure_responses()
2529 			q->cidx = 0;  in process_pure_responses()
2530 			q->gen ^= 1;  in process_pure_responses()
2531 			r = q->desc;  in process_pure_responses()
2540 		q->pure_rsps++;  in process_pure_responses()
2541 		if (++q->credits >= (q->size / 4)) {  in process_pure_responses()
2542 			refill_rspq(adap, q, q->credits);  in process_pure_responses()
2543 			q->credits = 0;  in process_pure_responses()
2553 	smp_mb();		/* commit Tx queue .processed updates */  in process_pure_responses()
2554 	if (unlikely(qs->txq_stopped != 0))  in process_pure_responses()
2561  *	handle_responses - decide what to do with new responses in NAPI mode
2565  *	This is used by the NAPI interrupt handlers to decide what to do with
2566  *	new SGE responses.  If there are no new responses it returns -1.  If
2567  *	there are new responses and they are pure (i.e., non-data carrying)
2578 	struct rsp_desc *r = &q->desc[q->cidx];  in handle_responses()
2581 		return -1;  in handle_responses()
2584 		t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |  in handle_responses()
2585 			     V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));  in handle_responses()
2588 	napi_schedule(&qs->napi);  in handle_responses()
2593  * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2599 	struct adapter *adap = qs->adap;  in t3_sge_intr_msix()
2600 	struct sge_rspq *q = &qs->rspq;  in t3_sge_intr_msix()
2602 	spin_lock(&q->lock);  in t3_sge_intr_msix()
2603 	if (process_responses(adap, qs, -1) == 0)  in t3_sge_intr_msix()
2604 		q->unhandled_irqs++;  in t3_sge_intr_msix()
2605 	t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |  in t3_sge_intr_msix()
2606 		     V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));  in t3_sge_intr_msix()
2607 	spin_unlock(&q->lock);  in t3_sge_intr_msix()
2612  * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2618 	struct sge_rspq *q = &qs->rspq;  in t3_sge_intr_msix_napi()
2620 	spin_lock(&q->lock);  in t3_sge_intr_msix_napi()
2622 	if (handle_responses(qs->adap, q) < 0)  in t3_sge_intr_msix_napi()
2623 		q->unhandled_irqs++;  in t3_sge_intr_msix_napi()
2624 	spin_unlock(&q->lock);  in t3_sge_intr_msix_napi()
2629  * The non-NAPI MSI interrupt handler.  This needs to handle data events from
2630  * SGE response queues as well as error and other async events as they all use
2631  * the same MSI vector.  We use one SGE response queue per port in this mode
2632  * and protect all response queues with queue 0's lock.
2638 	struct sge_rspq *q = &adap->sge.qs[0].rspq;  in t3_intr_msi()
2640 	spin_lock(&q->lock);  in t3_intr_msi()
2642 	if (process_responses(adap, &adap->sge.qs[0], -1)) {  in t3_intr_msi()
2643 		t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |  in t3_intr_msi()
2644 			     V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));  in t3_intr_msi()
2648 	if (adap->params.nports == 2 &&  in t3_intr_msi()
2649 	    process_responses(adap, &adap->sge.qs[1], -1)) {  in t3_intr_msi()
2650 		struct sge_rspq *q1 = &adap->sge.qs[1].rspq;  in t3_intr_msi()
2652 		t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |  in t3_intr_msi()
2653 			     V_NEWTIMER(q1->next_holdoff) |  in t3_intr_msi()
2654 			     V_NEWINDEX(q1->cidx));  in t3_intr_msi()
2659 		q->unhandled_irqs++;  in t3_intr_msi()
2661 	spin_unlock(&q->lock);  in t3_intr_msi()
2667 	struct sge_rspq *q = &qs->rspq;  in rspq_check_napi()
2669 	return is_new_response(&q->desc[q->cidx], q) && napi_schedule(&qs->napi);  in rspq_check_napi()
2673  * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2674  * by NAPI polling).  Handles data events from SGE response queues as well as
2675  * error and other async events as they all use the same MSI vector.  We use
2677  * queues with queue 0's lock.
2683 	struct sge_rspq *q = &adap->sge.qs[0].rspq;  in t3_intr_msi_napi()
2685 	spin_lock(&q->lock);  in t3_intr_msi_napi()
2687 	new_packets = rspq_check_napi(&adap->sge.qs[0]);  in t3_intr_msi_napi()
2688 	if (adap->params.nports == 2)  in t3_intr_msi_napi()
2689 		new_packets += rspq_check_napi(&adap->sge.qs[1]);  in t3_intr_msi_napi()
2691 		q->unhandled_irqs++;  in t3_intr_msi_napi()
2693 	spin_unlock(&q->lock);  in t3_intr_msi_napi()
2705 	work = process_responses(adap, rspq_to_qset(rq), -1);  in process_responses_gts()
2706 	t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |  in process_responses_gts()
2707 		     V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));  in process_responses_gts()
2712  * The legacy INTx interrupt handler.  This needs to handle data events from
2713  * SGE response queues as well as error and other async events as they all use
2714  * the same interrupt pin.  We use one SGE response queue per port in this mode
2715  * and protect all response queues with queue 0's lock.
2721 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;  in t3_intr()
2722 	struct sge_rspq *q1 = &adap->sge.qs[1].rspq;  in t3_intr()
2724 	spin_lock(&q0->lock);  in t3_intr()
2726 	w0 = is_new_response(&q0->desc[q0->cidx], q0);  in t3_intr()
2727 	w1 = adap->params.nports == 2 &&  in t3_intr()
2728 	    is_new_response(&q1->desc[q1->cidx], q1);  in t3_intr()
2744 	spin_unlock(&q0->lock);  in t3_intr()
2749  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2750  * Handles data events from SGE response queues as well as error and other
2751  * async events as they all use the same interrupt pin.  We use one SGE
2752  * response queue per port in this mode and protect all response queues with
2759 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;  in t3b_intr()
2767 	spin_lock(&q0->lock);  in t3b_intr()
2776 		process_responses_gts(adap, &adap->sge.qs[1].rspq);  in t3b_intr()
2778 	spin_unlock(&q0->lock);  in t3b_intr()
2783  * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2784  * Handles data events from SGE response queues as well as error and other
2785  * async events as they all use the same interrupt pin.  We use one SGE
2786  * response queue per port in this mode and protect all response queues with
2793 	struct sge_qset *qs0 = &adap->sge.qs[0];  in t3b_intr_napi()
2794 	struct sge_rspq *q0 = &qs0->rspq;  in t3b_intr_napi()
2802 	spin_lock(&q0->lock);  in t3b_intr_napi()
2808 		napi_schedule(&qs0->napi);  in t3b_intr_napi()
2811 		napi_schedule(&adap->sge.qs[1].napi);  in t3b_intr_napi()
2813 	spin_unlock(&q0->lock);  in t3b_intr_napi()
2818  *	t3_intr_handler - select the top-level interrupt handler
2820  *	@polling: whether using NAPI to service response queues
2822  *	Selects the top-level interrupt handler based on the type of interrupts
2823  *	(MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2824  *	response queues.
2828 	if (adap->flags & USING_MSIX)  in t3_intr_handler()
2830 	if (adap->flags & USING_MSI)  in t3_intr_handler()
2832 	if (adap->params.rev > 0)  in t3_intr_handler()
2847  *	t3_sge_err_intr_handler - SGE async event interrupt handler
2850  *	Interrupt handler for SGE asynchronous (non-data) events.
2871 			 "packet delivered to disabled response queue "  in t3_sge_err_intr_handler()
2876 		queue_work(cxgb3_wq, &adapter->db_drop_task);  in t3_sge_err_intr_handler()
2879 		queue_work(cxgb3_wq, &adapter->db_full_task);  in t3_sge_err_intr_handler()
2882 		queue_work(cxgb3_wq, &adapter->db_empty_task);  in t3_sge_err_intr_handler()
2890  *	sge_timer_tx - perform periodic maintenance of an SGE qset
2891  *	@t: a timer list containing the SGE queue set to maintain
2893  *	Runs periodically from a timer to perform maintenance of an SGE queue
2896  *	Cleans up any completed Tx descriptors that may still be pending.
2897  *	Normal descriptor cleanup happens when new packets are added to a Tx
2899  *	if the Tx queue has not seen any new packets in a while.  We make a
2900  *	best effort attempt to reclaim descriptors, in that we don't wait
2903  *	up).  Since control queues use immediate data exclusively we don't
2910 	struct port_info *pi = netdev_priv(qs->netdev);  in sge_timer_tx()
2911 	struct adapter *adap = pi->adapter;  in sge_timer_tx()
2915 	if (__netif_tx_trylock(qs->tx_q)) {  in sge_timer_tx()
2916                 tbd[TXQ_ETH] = reclaim_completed_tx(adap, &qs->txq[TXQ_ETH],  in sge_timer_tx()
2918 		__netif_tx_unlock(qs->tx_q);  in sge_timer_tx()
2921 	if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {  in sge_timer_tx()
2922 		tbd[TXQ_OFLD] = reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD],  in sge_timer_tx()
2924 		spin_unlock(&qs->txq[TXQ_OFLD].lock);  in sge_timer_tx()
2930 	mod_timer(&qs->tx_reclaim_timer, jiffies + next_period);  in sge_timer_tx()
2934  *	sge_timer_rx - perform periodic maintenance of an SGE qset
2935  *	@t: the timer list containing the SGE queue set to maintain
2937  *	a) Replenishes Rx queues that have run out due to memory shortage.
2939  *	when out of memory a queue can become empty.  We try to add only a few
2951 	struct port_info *pi = netdev_priv(qs->netdev);  in sge_timer_rx()
2952 	struct adapter *adap = pi->adapter;  in sge_timer_rx()
2955 	lock = adap->params.rev > 0 ?  in sge_timer_rx()
2956 	       &qs->rspq.lock : &adap->sge.qs[0].rspq.lock;  in sge_timer_rx()
2961 	if (napi_is_scheduled(&qs->napi))  in sge_timer_rx()
2964 	if (adap->params.rev < 4) {  in sge_timer_rx()
2967 		if (status & (1 << qs->rspq.cntxt_id)) {  in sge_timer_rx()
2968 			qs->rspq.starved++;  in sge_timer_rx()
2969 			if (qs->rspq.credits) {  in sge_timer_rx()
2970 				qs->rspq.credits--;  in sge_timer_rx()
2971 				refill_rspq(adap, &qs->rspq, 1);  in sge_timer_rx()
2972 				qs->rspq.restarted++;  in sge_timer_rx()
2974 					     1 << qs->rspq.cntxt_id);  in sge_timer_rx()
2979 	if (qs->fl[0].credits < qs->fl[0].size)  in sge_timer_rx()
2980 		__refill_fl(adap, &qs->fl[0]);  in sge_timer_rx()
2981 	if (qs->fl[1].credits < qs->fl[1].size)  in sge_timer_rx()
2982 		__refill_fl(adap, &qs->fl[1]);  in sge_timer_rx()
2987 	mod_timer(&qs->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD);  in sge_timer_rx()
2991  *	t3_update_qset_coalesce - update coalescing settings for a queue set
3000 	qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */  in t3_update_qset_coalesce()
3001 	qs->rspq.polling = p->polling;  in t3_update_qset_coalesce()
3002 	qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;  in t3_update_qset_coalesce()
3006  *	t3_sge_alloc_qset - initialize an SGE queue set
3012  *	@ntxq: number of Tx queues for the queue set
3014  *	@netdevq: net device TX queue associated with this queue set
3017  *	comprises a response queue, two Rx free-buffer queues, and up to 3
3018  *	Tx queues.  The Tx queues are assigned roles in the order Ethernet
3026 	int i, avail, ret = -ENOMEM;  in t3_sge_alloc_qset()
3027 	struct sge_qset *q = &adapter->sge.qs[id];  in t3_sge_alloc_qset()
3030 	timer_setup(&q->tx_reclaim_timer, sge_timer_tx, 0);  in t3_sge_alloc_qset()
3031 	timer_setup(&q->rx_reclaim_timer, sge_timer_rx, 0);  in t3_sge_alloc_qset()
3033 	q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,  in t3_sge_alloc_qset()
3036 				   &q->fl[0].phys_addr, &q->fl[0].sdesc);  in t3_sge_alloc_qset()
3037 	if (!q->fl[0].desc)  in t3_sge_alloc_qset()
3040 	q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,  in t3_sge_alloc_qset()
3043 				   &q->fl[1].phys_addr, &q->fl[1].sdesc);  in t3_sge_alloc_qset()
3044 	if (!q->fl[1].desc)  in t3_sge_alloc_qset()
3047 	q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,  in t3_sge_alloc_qset()
3049 				  &q->rspq.phys_addr, NULL);  in t3_sge_alloc_qset()
3050 	if (!q->rspq.desc)  in t3_sge_alloc_qset()
3056 		 * need to keep track of any sk_buffs.  in t3_sge_alloc_qset()
3060 		q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],  in t3_sge_alloc_qset()
3062 					    &q->txq[i].phys_addr,  in t3_sge_alloc_qset()
3063 					    &q->txq[i].sdesc);  in t3_sge_alloc_qset()
3064 		if (!q->txq[i].desc)  in t3_sge_alloc_qset()
3067 		q->txq[i].gen = 1;  in t3_sge_alloc_qset()
3068 		q->txq[i].size = p->txq_size[i];  in t3_sge_alloc_qset()
3069 		spin_lock_init(&q->txq[i].lock);  in t3_sge_alloc_qset()
3070 		skb_queue_head_init(&q->txq[i].sendq);  in t3_sge_alloc_qset()
3073 	INIT_WORK(&q->txq[TXQ_OFLD].qresume_task, restart_offloadq);  in t3_sge_alloc_qset()
3074 	INIT_WORK(&q->txq[TXQ_CTRL].qresume_task, restart_ctrlq);  in t3_sge_alloc_qset()
3076 	q->fl[0].gen = q->fl[1].gen = 1;  in t3_sge_alloc_qset()
3077 	q->fl[0].size = p->fl_size;  in t3_sge_alloc_qset()
3078 	q->fl[1].size = p->jumbo_size;  in t3_sge_alloc_qset()
3080 	q->rspq.gen = 1;  in t3_sge_alloc_qset()
3081 	q->rspq.size = p->rspq_size;  in t3_sge_alloc_qset()
3082 	spin_lock_init(&q->rspq.lock);  in t3_sge_alloc_qset()
3083 	skb_queue_head_init(&q->rspq.rx_queue);  in t3_sge_alloc_qset()
3085 	q->txq[TXQ_ETH].stop_thres = nports *  in t3_sge_alloc_qset()
3089 	q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;  in t3_sge_alloc_qset()
3091 	q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);  in t3_sge_alloc_qset()
3094 	q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;  in t3_sge_alloc_qset()
3096 	q->fl[1].buf_size = is_offload(adapter) ?  in t3_sge_alloc_qset()
3097 		(16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :  in t3_sge_alloc_qset()
3101 	q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;  in t3_sge_alloc_qset()
3102 	q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;  in t3_sge_alloc_qset()
3103 	q->fl[0].order = FL0_PG_ORDER;  in t3_sge_alloc_qset()
3104 	q->fl[1].order = FL1_PG_ORDER;  in t3_sge_alloc_qset()
3105 	q->fl[0].alloc_size = FL0_PG_ALLOC_SIZE;  in t3_sge_alloc_qset()
3106 	q->fl[1].alloc_size = FL1_PG_ALLOC_SIZE;  in t3_sge_alloc_qset()
3108 	spin_lock_irq(&adapter->sge.reg_lock);  in t3_sge_alloc_qset()
3111 	ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,  in t3_sge_alloc_qset()
3112 				   q->rspq.phys_addr, q->rspq.size,  in t3_sge_alloc_qset()
3113 				   q->fl[0].buf_size - SGE_PG_RSVD, 1, 0);  in t3_sge_alloc_qset()
3118 		ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,  in t3_sge_alloc_qset()
3119 					  q->fl[i].phys_addr, q->fl[i].size,  in t3_sge_alloc_qset()
3120 					  q->fl[i].buf_size - SGE_PG_RSVD,  in t3_sge_alloc_qset()
3121 					  p->cong_thres, 1, 0);  in t3_sge_alloc_qset()
3126 	ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,  in t3_sge_alloc_qset()
3127 				 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,  in t3_sge_alloc_qset()
3128 				 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,  in t3_sge_alloc_qset()
3134 		ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,  in t3_sge_alloc_qset()
3136 					 q->txq[TXQ_OFLD].phys_addr,  in t3_sge_alloc_qset()
3137 					 q->txq[TXQ_OFLD].size, 0, 1, 0);  in t3_sge_alloc_qset()
3143 		ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,  in t3_sge_alloc_qset()
3145 					 q->txq[TXQ_CTRL].phys_addr,  in t3_sge_alloc_qset()
3146 					 q->txq[TXQ_CTRL].size,  in t3_sge_alloc_qset()
3147 					 q->txq[TXQ_CTRL].token, 1, 0);  in t3_sge_alloc_qset()
3152 	spin_unlock_irq(&adapter->sge.reg_lock);  in t3_sge_alloc_qset()
3154 	q->adap = adapter;  in t3_sge_alloc_qset()
3155 	q->netdev = dev;  in t3_sge_alloc_qset()
3156 	q->tx_q = netdevq;  in t3_sge_alloc_qset()
3159 	avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,  in t3_sge_alloc_qset()
3163 		ret = -ENOMEM;  in t3_sge_alloc_qset()
3166 	if (avail < q->fl[0].size)  in t3_sge_alloc_qset()
3170 	avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,  in t3_sge_alloc_qset()
3172 	if (avail < q->fl[1].size)  in t3_sge_alloc_qset()
3175 	refill_rspq(adapter, &q->rspq, q->rspq.size - 1);  in t3_sge_alloc_qset()
3177 	t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |  in t3_sge_alloc_qset()
3178 		     V_NEWTIMER(q->rspq.holdoff_tmr));  in t3_sge_alloc_qset()
3183 	spin_unlock_irq(&adapter->sge.reg_lock);  in t3_sge_alloc_qset()
3190  *      t3_start_sge_timers - start SGE timer call backs
3200 		struct sge_qset *q = &adap->sge.qs[i];  in t3_start_sge_timers()
3202 		if (q->tx_reclaim_timer.function)  in t3_start_sge_timers()
3203 			mod_timer(&q->tx_reclaim_timer,  in t3_start_sge_timers()
3206 		if (q->rx_reclaim_timer.function)  in t3_start_sge_timers()
3207 			mod_timer(&q->rx_reclaim_timer,  in t3_start_sge_timers()
3213  *	t3_stop_sge_timers - stop SGE timer call backs
3223 		struct sge_qset *q = &adap->sge.qs[i];  in t3_stop_sge_timers()
3225 		if (q->tx_reclaim_timer.function)  in t3_stop_sge_timers()
3226 			del_timer_sync(&q->tx_reclaim_timer);  in t3_stop_sge_timers()
3227 		if (q->rx_reclaim_timer.function)  in t3_stop_sge_timers()
3228 			del_timer_sync(&q->rx_reclaim_timer);  in t3_stop_sge_timers()
3233  *	t3_free_sge_resources - free SGE resources
3243 		t3_free_qset(adap, &adap->sge.qs[i]);  in t3_free_sge_resources()
3247  *	t3_sge_start - enable SGE
3259  *	t3_sge_stop_dma - Disable SGE DMA engine operation
3276  *	t3_sge_stop - disable SGE operation completly
3289 	if (!(adap->flags & FULL_INIT_DONE))  in t3_sge_stop()
3292 		struct sge_qset *qs = &adap->sge.qs[i];  in t3_sge_stop()
3294 		cancel_work_sync(&qs->txq[TXQ_OFLD].qresume_task);  in t3_sge_stop()
3295 		cancel_work_sync(&qs->txq[TXQ_CTRL].qresume_task);  in t3_sge_stop()
3300  *	t3_sge_init - initialize SGE
3306  *	top-level must request those individually.  We also do not enable DMA
3307  *	here, that should be done after the queues have been set up.
3311 	unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);  in t3_sge_init()
3315 	    V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |  in t3_sge_init()
3316 	    V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;  in t3_sge_init()
3320 	if (adap->params.rev > 0) {  in t3_sge_init()
3321 		if (!(adap->flags & (USING_MSIX | USING_MSI)))  in t3_sge_init()
3331 		     adap->params.rev < T3_REV_C ? 1000 : 500);  in t3_sge_init()
3340  *	t3_sge_prep - one-time SGE initialization
3344  *	Performs one-time initialization of SGE SW state.  Includes determining
3346  *	they are used to initialize the SGE.
3352 	p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -  in t3_sge_prep()
3356 		struct qset_params *q = p->qset + i;  in t3_sge_prep()
3358 		q->polling = adap->params.rev > 0;  in t3_sge_prep()
3359 		q->coalesce_usecs = 5;  in t3_sge_prep()
3360 		q->rspq_size = 1024;  in t3_sge_prep()
3361 		q->fl_size = 1024;  in t3_sge_prep()
3362 		q->jumbo_size = 512;  in t3_sge_prep()
3363 		q->txq_size[TXQ_ETH] = 1024;  in t3_sge_prep()
3364 		q->txq_size[TXQ_OFLD] = 1024;  in t3_sge_prep()
3365 		q->txq_size[TXQ_CTRL] = 256;  in t3_sge_prep()
3366 		q->cong_thres = 0;  in t3_sge_prep()
3369 	spin_lock_init(&adap->sge.reg_lock);  in t3_sge_prep()