| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | 
|---|
| 2 | #ifndef _LINUX_NET_QUEUES_H | 
|---|
| 3 | #define _LINUX_NET_QUEUES_H | 
|---|
| 4 |  | 
|---|
| 5 | #include <linux/netdevice.h> | 
|---|
| 6 |  | 
|---|
| 7 | /** | 
|---|
| 8 | * struct netdev_config - queue-related configuration for a netdev | 
|---|
| 9 | * @hds_thresh:		HDS Threshold value. | 
|---|
| 10 | * @hds_config:		HDS value from userspace. | 
|---|
| 11 | */ | 
|---|
| 12 | struct netdev_config { | 
|---|
| 13 | u32	hds_thresh; | 
|---|
| 14 | u8	hds_config; | 
|---|
| 15 | }; | 
|---|
| 16 |  | 
|---|
| 17 | /* See the netdev.yaml spec for definition of each statistic */ | 
|---|
| 18 | struct netdev_queue_stats_rx { | 
|---|
| 19 | u64 bytes; | 
|---|
| 20 | u64 packets; | 
|---|
| 21 | u64 alloc_fail; | 
|---|
| 22 |  | 
|---|
| 23 | u64 hw_drops; | 
|---|
| 24 | u64 hw_drop_overruns; | 
|---|
| 25 |  | 
|---|
| 26 | u64 csum_complete; | 
|---|
| 27 | u64 csum_unnecessary; | 
|---|
| 28 | u64 csum_none; | 
|---|
| 29 | u64 csum_bad; | 
|---|
| 30 |  | 
|---|
| 31 | u64 hw_gro_packets; | 
|---|
| 32 | u64 hw_gro_bytes; | 
|---|
| 33 | u64 hw_gro_wire_packets; | 
|---|
| 34 | u64 hw_gro_wire_bytes; | 
|---|
| 35 |  | 
|---|
| 36 | u64 hw_drop_ratelimits; | 
|---|
| 37 | }; | 
|---|
| 38 |  | 
|---|
| 39 | struct netdev_queue_stats_tx { | 
|---|
| 40 | u64 bytes; | 
|---|
| 41 | u64 packets; | 
|---|
| 42 |  | 
|---|
| 43 | u64 hw_drops; | 
|---|
| 44 | u64 hw_drop_errors; | 
|---|
| 45 |  | 
|---|
| 46 | u64 csum_none; | 
|---|
| 47 | u64 needs_csum; | 
|---|
| 48 |  | 
|---|
| 49 | u64 hw_gso_packets; | 
|---|
| 50 | u64 hw_gso_bytes; | 
|---|
| 51 | u64 hw_gso_wire_packets; | 
|---|
| 52 | u64 hw_gso_wire_bytes; | 
|---|
| 53 |  | 
|---|
| 54 | u64 hw_drop_ratelimits; | 
|---|
| 55 |  | 
|---|
| 56 | u64 stop; | 
|---|
| 57 | u64 wake; | 
|---|
| 58 | }; | 
|---|
| 59 |  | 
|---|
| 60 | /** | 
|---|
| 61 | * struct netdev_stat_ops - netdev ops for fine grained stats | 
|---|
| 62 | * @get_queue_stats_rx:	get stats for a given Rx queue | 
|---|
| 63 | * @get_queue_stats_tx:	get stats for a given Tx queue | 
|---|
| 64 | * @get_base_stats:	get base stats (not belonging to any live instance) | 
|---|
| 65 | * | 
|---|
| 66 | * Query stats for a given object. The values of the statistics are undefined | 
|---|
| 67 | * on entry (specifically they are *not* zero-initialized). Drivers should | 
|---|
| 68 | * assign values only to the statistics they collect. Statistics which are not | 
|---|
| 69 | * collected must be left undefined. | 
|---|
| 70 | * | 
|---|
| 71 | * Queue objects are not necessarily persistent, and only currently active | 
|---|
| 72 | * queues are queried by the per-queue callbacks. This means that per-queue | 
|---|
| 73 | * statistics will not generally add up to the total number of events for | 
|---|
| 74 | * the device. The @get_base_stats callback allows filling in the delta | 
|---|
| 75 | * between events for currently live queues and overall device history. | 
|---|
| 76 | * @get_base_stats can also be used to report any miscellaneous packets | 
|---|
| 77 | * transferred outside of the main set of queues used by the networking stack. | 
|---|
| 78 | * When the statistics for the entire device are queried, first @get_base_stats | 
|---|
| 79 | * is issued to collect the delta, and then a series of per-queue callbacks. | 
|---|
| 80 | * Only statistics which are set in @get_base_stats will be reported | 
|---|
| 81 | * at the device level, meaning that unlike in queue callbacks, setting | 
|---|
| 82 | * a statistic to zero in @get_base_stats is a legitimate thing to do. | 
|---|
| 83 | * This is because @get_base_stats has a second function of designating which | 
|---|
| 84 | * statistics are in fact correct for the entire device (e.g. when history | 
|---|
| 85 | * for some of the events is not maintained, and reliable "total" cannot | 
|---|
| 86 | * be provided). | 
|---|
| 87 | * | 
|---|
| 88 | * Ops are called under the instance lock if netdev_need_ops_lock() | 
|---|
| 89 | * returns true, otherwise under rtnl_lock. | 
|---|
| 90 | * Device drivers can assume that when collecting total device stats, | 
|---|
| 91 | * the @get_base_stats and subsequent per-queue calls are performed | 
|---|
| 92 | * "atomically" (without releasing the relevant lock). | 
|---|
| 93 | * | 
|---|
| 94 | * Device drivers are encouraged to reset the per-queue statistics when | 
|---|
| 95 | * number of queues change. This is because the primary use case for | 
|---|
| 96 | * per-queue statistics is currently to detect traffic imbalance. | 
|---|
| 97 | */ | 
|---|
| 98 | struct netdev_stat_ops { | 
|---|
| 99 | void (*get_queue_stats_rx)(struct net_device *dev, int idx, | 
|---|
| 100 | struct netdev_queue_stats_rx *stats); | 
|---|
| 101 | void (*get_queue_stats_tx)(struct net_device *dev, int idx, | 
|---|
| 102 | struct netdev_queue_stats_tx *stats); | 
|---|
| 103 | void (*get_base_stats)(struct net_device *dev, | 
|---|
| 104 | struct netdev_queue_stats_rx *rx, | 
|---|
| 105 | struct netdev_queue_stats_tx *tx); | 
|---|
| 106 | }; | 
|---|
| 107 |  | 
|---|
| 108 | void netdev_stat_queue_sum(struct net_device *netdev, | 
|---|
| 109 | int rx_start, int rx_end, | 
|---|
| 110 | struct netdev_queue_stats_rx *rx_sum, | 
|---|
| 111 | int tx_start, int tx_end, | 
|---|
| 112 | struct netdev_queue_stats_tx *tx_sum); | 
|---|
| 113 |  | 
|---|
| 114 | /** | 
|---|
| 115 | * struct netdev_queue_mgmt_ops - netdev ops for queue management | 
|---|
| 116 | * | 
|---|
| 117 | * @ndo_queue_mem_size: Size of the struct that describes a queue's memory. | 
|---|
| 118 | * | 
|---|
| 119 | * @ndo_queue_mem_alloc: Allocate memory for an RX queue at the specified index. | 
|---|
| 120 | *			 The new memory is written at the specified address. | 
|---|
| 121 | * | 
|---|
| 122 | * @ndo_queue_mem_free:	Free memory from an RX queue. | 
|---|
| 123 | * | 
|---|
| 124 | * @ndo_queue_start:	Start an RX queue with the specified memory and at the | 
|---|
| 125 | *			specified index. | 
|---|
| 126 | * | 
|---|
| 127 | * @ndo_queue_stop:	Stop the RX queue at the specified index. The stopped | 
|---|
| 128 | *			queue's memory is written at the specified address. | 
|---|
| 129 | * | 
|---|
| 130 | * @ndo_queue_get_dma_dev: Get dma device for zero-copy operations to be used | 
|---|
| 131 | *			   for this queue. Return NULL on error. | 
|---|
| 132 | * | 
|---|
| 133 | * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while | 
|---|
| 134 | * the interface is closed. @ndo_queue_start and @ndo_queue_stop will only | 
|---|
| 135 | * be called for an interface which is open. | 
|---|
| 136 | */ | 
|---|
| 137 | struct netdev_queue_mgmt_ops { | 
|---|
| 138 | size_t			ndo_queue_mem_size; | 
|---|
| 139 | int			(*ndo_queue_mem_alloc)(struct net_device *dev, | 
|---|
| 140 | void *per_queue_mem, | 
|---|
| 141 | int idx); | 
|---|
| 142 | void			(*ndo_queue_mem_free)(struct net_device *dev, | 
|---|
| 143 | void *per_queue_mem); | 
|---|
| 144 | int			(*ndo_queue_start)(struct net_device *dev, | 
|---|
| 145 | void *per_queue_mem, | 
|---|
| 146 | int idx); | 
|---|
| 147 | int			(*ndo_queue_stop)(struct net_device *dev, | 
|---|
| 148 | void *per_queue_mem, | 
|---|
| 149 | int idx); | 
|---|
| 150 | struct device *		(*ndo_queue_get_dma_dev)(struct net_device *dev, | 
|---|
| 151 | int idx); | 
|---|
| 152 | }; | 
|---|
| 153 |  | 
|---|
| 154 | bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx); | 
|---|
| 155 |  | 
|---|
| 156 | /** | 
|---|
| 157 | * DOC: Lockless queue stopping / waking helpers. | 
|---|
| 158 | * | 
|---|
| 159 | * The netif_txq_maybe_stop() and __netif_txq_completed_wake() | 
|---|
| 160 | * macros are designed to safely implement stopping | 
|---|
| 161 | * and waking netdev queues without full lock protection. | 
|---|
| 162 | * | 
|---|
| 163 | * We assume that there can be no concurrent stop attempts and no concurrent | 
|---|
| 164 | * wake attempts. The try-stop should happen from the xmit handler, | 
|---|
| 165 | * while wake up should be triggered from NAPI poll context. | 
|---|
| 166 | * The two may run concurrently (single producer, single consumer). | 
|---|
| 167 | * | 
|---|
| 168 | * The try-stop side is expected to run from the xmit handler and therefore | 
|---|
| 169 | * it does not reschedule Tx (netif_tx_start_queue() instead of | 
|---|
| 170 | * netif_tx_wake_queue()). Uses of the ``stop`` macros outside of the xmit | 
|---|
| 171 | * handler may lead to xmit queue being enabled but not run. | 
|---|
| 172 | * The waking side does not have similar context restrictions. | 
|---|
| 173 | * | 
|---|
| 174 | * The macros guarantee that rings will not remain stopped if there's | 
|---|
| 175 | * space available, but they do *not* prevent false wake ups when | 
|---|
| 176 | * the ring is full! Drivers should check for ring full at the start | 
|---|
| 177 | * for the xmit handler. | 
|---|
| 178 | * | 
|---|
| 179 | * All descriptor ring indexes (and other relevant shared state) must | 
|---|
| 180 | * be updated before invoking the macros. | 
|---|
| 181 | */ | 
|---|
| 182 |  | 
|---|
| 183 | #define netif_txq_try_stop(txq, get_desc, start_thrs)			\ | 
|---|
| 184 | ({								\ | 
|---|
| 185 | int _res;						\ | 
|---|
| 186 | \ | 
|---|
| 187 | netif_tx_stop_queue(txq);				\ | 
|---|
| 188 | /* Producer index and stop bit must be visible		\ | 
|---|
| 189 | * to consumer before we recheck.			\ | 
|---|
| 190 | * Pairs with a barrier in __netif_txq_completed_wake(). \ | 
|---|
| 191 | */							\ | 
|---|
| 192 | smp_mb__after_atomic();					\ | 
|---|
| 193 | \ | 
|---|
| 194 | /* We need to check again in a case another		\ | 
|---|
| 195 | * CPU has just made room available.			\ | 
|---|
| 196 | */							\ | 
|---|
| 197 | _res = 0;						\ | 
|---|
| 198 | if (unlikely(get_desc >= start_thrs)) {			\ | 
|---|
| 199 | netif_tx_start_queue(txq);			\ | 
|---|
| 200 | _res = -1;					\ | 
|---|
| 201 | }							\ | 
|---|
| 202 | _res;							\ | 
|---|
| 203 | })								\ | 
|---|
| 204 |  | 
|---|
| 205 | /** | 
|---|
| 206 | * netif_txq_maybe_stop() - locklessly stop a Tx queue, if needed | 
|---|
| 207 | * @txq:	struct netdev_queue to stop/start | 
|---|
| 208 | * @get_desc:	get current number of free descriptors (see requirements below!) | 
|---|
| 209 | * @stop_thrs:	minimal number of available descriptors for queue to be left | 
|---|
| 210 | *		enabled | 
|---|
| 211 | * @start_thrs:	minimal number of descriptors to re-enable the queue, can be | 
|---|
| 212 | *		equal to @stop_thrs or higher to avoid frequent waking | 
|---|
| 213 | * | 
|---|
| 214 | * All arguments may be evaluated multiple times, beware of side effects. | 
|---|
| 215 | * @get_desc must be a formula or a function call, it must always | 
|---|
| 216 | * return up-to-date information when evaluated! | 
|---|
| 217 | * Expected to be used from ndo_start_xmit, see the comment on top of the file. | 
|---|
| 218 | * | 
|---|
| 219 | * Returns: | 
|---|
| 220 | *	 0 if the queue was stopped | 
|---|
| 221 | *	 1 if the queue was left enabled | 
|---|
| 222 | *	-1 if the queue was re-enabled (raced with waking) | 
|---|
| 223 | */ | 
|---|
| 224 | #define netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs)	\ | 
|---|
| 225 | ({								\ | 
|---|
| 226 | int _res;						\ | 
|---|
| 227 | \ | 
|---|
| 228 | _res = 1;						\ | 
|---|
| 229 | if (unlikely(get_desc < stop_thrs))			\ | 
|---|
| 230 | _res = netif_txq_try_stop(txq, get_desc, start_thrs); \ | 
|---|
| 231 | _res;							\ | 
|---|
| 232 | })								\ | 
|---|
| 233 |  | 
|---|
| 234 | /* Variant of netdev_tx_completed_queue() which guarantees smp_mb() if | 
|---|
| 235 | * @bytes != 0, regardless of kernel config. | 
|---|
| 236 | */ | 
|---|
| 237 | static inline void | 
|---|
| 238 | netdev_txq_completed_mb(struct netdev_queue *dev_queue, | 
|---|
| 239 | unsigned int pkts, unsigned int bytes) | 
|---|
| 240 | { | 
|---|
| 241 | if (IS_ENABLED(CONFIG_BQL)) | 
|---|
| 242 | netdev_tx_completed_queue(dev_queue, pkts, bytes); | 
|---|
| 243 | else if (bytes) | 
|---|
| 244 | smp_mb(); | 
|---|
| 245 | } | 
|---|
| 246 |  | 
|---|
| 247 | /** | 
|---|
| 248 | * __netif_txq_completed_wake() - locklessly wake a Tx queue, if needed | 
|---|
| 249 | * @txq:	struct netdev_queue to stop/start | 
|---|
| 250 | * @pkts:	number of packets completed | 
|---|
| 251 | * @bytes:	number of bytes completed | 
|---|
| 252 | * @get_desc:	get current number of free descriptors (see requirements below!) | 
|---|
| 253 | * @start_thrs:	minimal number of descriptors to re-enable the queue | 
|---|
| 254 | * @down_cond:	down condition, predicate indicating that the queue should | 
|---|
| 255 | *		not be woken up even if descriptors are available | 
|---|
| 256 | * | 
|---|
| 257 | * All arguments may be evaluated multiple times. | 
|---|
| 258 | * @get_desc must be a formula or a function call, it must always | 
|---|
| 259 | * return up-to-date information when evaluated! | 
|---|
| 260 | * Reports completed pkts/bytes to BQL. | 
|---|
| 261 | * | 
|---|
| 262 | * Returns: | 
|---|
| 263 | *	 0 if the queue was woken up | 
|---|
| 264 | *	 1 if the queue was already enabled (or disabled but @down_cond is true) | 
|---|
| 265 | *	-1 if the queue was left unchanged (@start_thrs not reached) | 
|---|
| 266 | */ | 
|---|
| 267 | #define __netif_txq_completed_wake(txq, pkts, bytes,			\ | 
|---|
| 268 | get_desc, start_thrs, down_cond)	\ | 
|---|
| 269 | ({								\ | 
|---|
| 270 | int _res;						\ | 
|---|
| 271 | \ | 
|---|
| 272 | /* Report to BQL and piggy back on its barrier.		\ | 
|---|
| 273 | * Barrier makes sure that anybody stopping the queue	\ | 
|---|
| 274 | * after this point sees the new consumer index.	\ | 
|---|
| 275 | * Pairs with barrier in netif_txq_try_stop().		\ | 
|---|
| 276 | */							\ | 
|---|
| 277 | netdev_txq_completed_mb(txq, pkts, bytes);		\ | 
|---|
| 278 | \ | 
|---|
| 279 | _res = -1;						\ | 
|---|
| 280 | if (pkts && likely(get_desc >= start_thrs)) {		\ | 
|---|
| 281 | _res = 1;					\ | 
|---|
| 282 | if (unlikely(netif_tx_queue_stopped(txq)) &&	\ | 
|---|
| 283 | !(down_cond)) {				\ | 
|---|
| 284 | netif_tx_wake_queue(txq);		\ | 
|---|
| 285 | _res = 0;				\ | 
|---|
| 286 | }						\ | 
|---|
| 287 | }							\ | 
|---|
| 288 | _res;							\ | 
|---|
| 289 | }) | 
|---|
| 290 |  | 
|---|
| 291 | #define netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs) \ | 
|---|
| 292 | __netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs, false) | 
|---|
| 293 |  | 
|---|
| 294 | /* subqueue variants follow */ | 
|---|
| 295 |  | 
|---|
| 296 | #define netif_subqueue_try_stop(dev, idx, get_desc, start_thrs)		\ | 
|---|
| 297 | ({								\ | 
|---|
| 298 | struct netdev_queue *_txq;				\ | 
|---|
| 299 | \ | 
|---|
| 300 | _txq = netdev_get_tx_queue(dev, idx);			\ | 
|---|
| 301 | netif_txq_try_stop(_txq, get_desc, start_thrs);		\ | 
|---|
| 302 | }) | 
|---|
| 303 |  | 
|---|
| 304 | static inline void netif_subqueue_sent(const struct net_device *dev, | 
|---|
| 305 | unsigned int idx, unsigned int bytes) | 
|---|
| 306 | { | 
|---|
| 307 | struct netdev_queue *txq; | 
|---|
| 308 |  | 
|---|
| 309 | txq = netdev_get_tx_queue(dev, index: idx); | 
|---|
| 310 | netdev_tx_sent_queue(dev_queue: txq, bytes); | 
|---|
| 311 | } | 
|---|
| 312 |  | 
|---|
| 313 | #define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \ | 
|---|
| 314 | ({								\ | 
|---|
| 315 | struct netdev_queue *_txq;				\ | 
|---|
| 316 | \ | 
|---|
| 317 | _txq = netdev_get_tx_queue(dev, idx);			\ | 
|---|
| 318 | netif_txq_maybe_stop(_txq, get_desc, stop_thrs, start_thrs); \ | 
|---|
| 319 | }) | 
|---|
| 320 |  | 
|---|
| 321 | #define netif_subqueue_completed_wake(dev, idx, pkts, bytes,		\ | 
|---|
| 322 | get_desc, start_thrs)		\ | 
|---|
| 323 | ({								\ | 
|---|
| 324 | struct netdev_queue *_txq;				\ | 
|---|
| 325 | \ | 
|---|
| 326 | _txq = netdev_get_tx_queue(dev, idx);			\ | 
|---|
| 327 | netif_txq_completed_wake(_txq, pkts, bytes,		\ | 
|---|
| 328 | get_desc, start_thrs);		\ | 
|---|
| 329 | }) | 
|---|
| 330 |  | 
|---|
| 331 | struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx); | 
|---|
| 332 |  | 
|---|
| 333 | #endif | 
|---|
| 334 |  | 
|---|