1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _LINUX_NET_QUEUES_H
3 #define _LINUX_NET_QUEUES_H
4
5 #include <linux/netdevice.h>
6
7 /* See the netdev.yaml spec for definition of each statistic */
8 struct netdev_queue_stats_rx {
9 u64 bytes;
10 u64 packets;
11 u64 alloc_fail;
12
13 u64 hw_drops;
14 u64 hw_drop_overruns;
15
16 u64 csum_unnecessary;
17 u64 csum_none;
18 u64 csum_bad;
19
20 u64 hw_gro_packets;
21 u64 hw_gro_bytes;
22 u64 hw_gro_wire_packets;
23 u64 hw_gro_wire_bytes;
24
25 u64 hw_drop_ratelimits;
26 };
27
28 struct netdev_queue_stats_tx {
29 u64 bytes;
30 u64 packets;
31
32 u64 hw_drops;
33 u64 hw_drop_errors;
34
35 u64 csum_none;
36 u64 needs_csum;
37
38 u64 hw_gso_packets;
39 u64 hw_gso_bytes;
40 u64 hw_gso_wire_packets;
41 u64 hw_gso_wire_bytes;
42
43 u64 hw_drop_ratelimits;
44
45 u64 stop;
46 u64 wake;
47 };
48
49 /**
50 * struct netdev_stat_ops - netdev ops for fine grained stats
51 * @get_queue_stats_rx: get stats for a given Rx queue
52 * @get_queue_stats_tx: get stats for a given Tx queue
53 * @get_base_stats: get base stats (not belonging to any live instance)
54 *
55 * Query stats for a given object. The values of the statistics are undefined
56 * on entry (specifically they are *not* zero-initialized). Drivers should
57 * assign values only to the statistics they collect. Statistics which are not
58 * collected must be left undefined.
59 *
60 * Queue objects are not necessarily persistent, and only currently active
61 * queues are queried by the per-queue callbacks. This means that per-queue
62 * statistics will not generally add up to the total number of events for
63 * the device. The @get_base_stats callback allows filling in the delta
64 * between events for currently live queues and overall device history.
65 * @get_base_stats can also be used to report any miscellaneous packets
66 * transferred outside of the main set of queues used by the networking stack.
67 * When the statistics for the entire device are queried, first @get_base_stats
68 * is issued to collect the delta, and then a series of per-queue callbacks.
69 * Only statistics which are set in @get_base_stats will be reported
70 * at the device level, meaning that unlike in queue callbacks, setting
71 * a statistic to zero in @get_base_stats is a legitimate thing to do.
72 * This is because @get_base_stats has a second function of designating which
73 * statistics are in fact correct for the entire device (e.g. when history
74 * for some of the events is not maintained, and reliable "total" cannot
75 * be provided).
76 *
77 * Device drivers can assume that when collecting total device stats,
78 * the @get_base_stats and subsequent per-queue calls are performed
79 * "atomically" (without releasing the rtnl_lock).
80 *
81 * Device drivers are encouraged to reset the per-queue statistics when
82 * number of queues change. This is because the primary use case for
83 * per-queue statistics is currently to detect traffic imbalance.
84 */
85 struct netdev_stat_ops {
86 void (*get_queue_stats_rx)(struct net_device *dev, int idx,
87 struct netdev_queue_stats_rx *stats);
88 void (*get_queue_stats_tx)(struct net_device *dev, int idx,
89 struct netdev_queue_stats_tx *stats);
90 void (*get_base_stats)(struct net_device *dev,
91 struct netdev_queue_stats_rx *rx,
92 struct netdev_queue_stats_tx *tx);
93 };
94
95 /**
96 * struct netdev_queue_mgmt_ops - netdev ops for queue management
97 *
98 * @ndo_queue_mem_size: Size of the struct that describes a queue's memory.
99 *
100 * @ndo_queue_mem_alloc: Allocate memory for an RX queue at the specified index.
101 * The new memory is written at the specified address.
102 *
103 * @ndo_queue_mem_free: Free memory from an RX queue.
104 *
105 * @ndo_queue_start: Start an RX queue with the specified memory and at the
106 * specified index.
107 *
108 * @ndo_queue_stop: Stop the RX queue at the specified index. The stopped
109 * queue's memory is written at the specified address.
110 */
111 struct netdev_queue_mgmt_ops {
112 size_t ndo_queue_mem_size;
113 int (*ndo_queue_mem_alloc)(struct net_device *dev,
114 void *per_queue_mem,
115 int idx);
116 void (*ndo_queue_mem_free)(struct net_device *dev,
117 void *per_queue_mem);
118 int (*ndo_queue_start)(struct net_device *dev,
119 void *per_queue_mem,
120 int idx);
121 int (*ndo_queue_stop)(struct net_device *dev,
122 void *per_queue_mem,
123 int idx);
124 };
125
126 /**
127 * DOC: Lockless queue stopping / waking helpers.
128 *
129 * The netif_txq_maybe_stop() and __netif_txq_completed_wake()
130 * macros are designed to safely implement stopping
131 * and waking netdev queues without full lock protection.
132 *
133 * We assume that there can be no concurrent stop attempts and no concurrent
134 * wake attempts. The try-stop should happen from the xmit handler,
135 * while wake up should be triggered from NAPI poll context.
136 * The two may run concurrently (single producer, single consumer).
137 *
138 * The try-stop side is expected to run from the xmit handler and therefore
139 * it does not reschedule Tx (netif_tx_start_queue() instead of
140 * netif_tx_wake_queue()). Uses of the ``stop`` macros outside of the xmit
141 * handler may lead to xmit queue being enabled but not run.
142 * The waking side does not have similar context restrictions.
143 *
144 * The macros guarantee that rings will not remain stopped if there's
145 * space available, but they do *not* prevent false wake ups when
146 * the ring is full! Drivers should check for ring full at the start
147 * for the xmit handler.
148 *
149 * All descriptor ring indexes (and other relevant shared state) must
150 * be updated before invoking the macros.
151 */
152
153 #define netif_txq_try_stop(txq, get_desc, start_thrs) \
154 ({ \
155 int _res; \
156 \
157 netif_tx_stop_queue(txq); \
158 /* Producer index and stop bit must be visible \
159 * to consumer before we recheck. \
160 * Pairs with a barrier in __netif_txq_completed_wake(). \
161 */ \
162 smp_mb__after_atomic(); \
163 \
164 /* We need to check again in a case another \
165 * CPU has just made room available. \
166 */ \
167 _res = 0; \
168 if (unlikely(get_desc >= start_thrs)) { \
169 netif_tx_start_queue(txq); \
170 _res = -1; \
171 } \
172 _res; \
173 }) \
174
175 /**
176 * netif_txq_maybe_stop() - locklessly stop a Tx queue, if needed
177 * @txq: struct netdev_queue to stop/start
178 * @get_desc: get current number of free descriptors (see requirements below!)
179 * @stop_thrs: minimal number of available descriptors for queue to be left
180 * enabled
181 * @start_thrs: minimal number of descriptors to re-enable the queue, can be
182 * equal to @stop_thrs or higher to avoid frequent waking
183 *
184 * All arguments may be evaluated multiple times, beware of side effects.
185 * @get_desc must be a formula or a function call, it must always
186 * return up-to-date information when evaluated!
187 * Expected to be used from ndo_start_xmit, see the comment on top of the file.
188 *
189 * Returns:
190 * 0 if the queue was stopped
191 * 1 if the queue was left enabled
192 * -1 if the queue was re-enabled (raced with waking)
193 */
194 #define netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs) \
195 ({ \
196 int _res; \
197 \
198 _res = 1; \
199 if (unlikely(get_desc < stop_thrs)) \
200 _res = netif_txq_try_stop(txq, get_desc, start_thrs); \
201 _res; \
202 }) \
203
204 /* Variant of netdev_tx_completed_queue() which guarantees smp_mb() if
205 * @bytes != 0, regardless of kernel config.
206 */
207 static inline void
netdev_txq_completed_mb(struct netdev_queue * dev_queue,unsigned int pkts,unsigned int bytes)208 netdev_txq_completed_mb(struct netdev_queue *dev_queue,
209 unsigned int pkts, unsigned int bytes)
210 {
211 if (IS_ENABLED(CONFIG_BQL))
212 netdev_tx_completed_queue(dev_queue, pkts, bytes);
213 else if (bytes)
214 smp_mb();
215 }
216
217 /**
218 * __netif_txq_completed_wake() - locklessly wake a Tx queue, if needed
219 * @txq: struct netdev_queue to stop/start
220 * @pkts: number of packets completed
221 * @bytes: number of bytes completed
222 * @get_desc: get current number of free descriptors (see requirements below!)
223 * @start_thrs: minimal number of descriptors to re-enable the queue
224 * @down_cond: down condition, predicate indicating that the queue should
225 * not be woken up even if descriptors are available
226 *
227 * All arguments may be evaluated multiple times.
228 * @get_desc must be a formula or a function call, it must always
229 * return up-to-date information when evaluated!
230 * Reports completed pkts/bytes to BQL.
231 *
232 * Returns:
233 * 0 if the queue was woken up
234 * 1 if the queue was already enabled (or disabled but @down_cond is true)
235 * -1 if the queue was left unchanged (@start_thrs not reached)
236 */
237 #define __netif_txq_completed_wake(txq, pkts, bytes, \
238 get_desc, start_thrs, down_cond) \
239 ({ \
240 int _res; \
241 \
242 /* Report to BQL and piggy back on its barrier. \
243 * Barrier makes sure that anybody stopping the queue \
244 * after this point sees the new consumer index. \
245 * Pairs with barrier in netif_txq_try_stop(). \
246 */ \
247 netdev_txq_completed_mb(txq, pkts, bytes); \
248 \
249 _res = -1; \
250 if (pkts && likely(get_desc >= start_thrs)) { \
251 _res = 1; \
252 if (unlikely(netif_tx_queue_stopped(txq)) && \
253 !(down_cond)) { \
254 netif_tx_wake_queue(txq); \
255 _res = 0; \
256 } \
257 } \
258 _res; \
259 })
260
261 #define netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs) \
262 __netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs, false)
263
264 /* subqueue variants follow */
265
266 #define netif_subqueue_try_stop(dev, idx, get_desc, start_thrs) \
267 ({ \
268 struct netdev_queue *txq; \
269 \
270 txq = netdev_get_tx_queue(dev, idx); \
271 netif_txq_try_stop(txq, get_desc, start_thrs); \
272 })
273
274 #define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \
275 ({ \
276 struct netdev_queue *txq; \
277 \
278 txq = netdev_get_tx_queue(dev, idx); \
279 netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs); \
280 })
281
282 #define netif_subqueue_completed_wake(dev, idx, pkts, bytes, \
283 get_desc, start_thrs) \
284 ({ \
285 struct netdev_queue *txq; \
286 \
287 txq = netdev_get_tx_queue(dev, idx); \
288 netif_txq_completed_wake(txq, pkts, bytes, \
289 get_desc, start_thrs); \
290 })
291
292 #endif
293