1// SPDX-License-Identifier: GPL-2.0-or-later
2/* A network driver using virtio.
3 *
4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
5 */
6//#define DEBUG
7#include <linux/netdevice.h>
8#include <linux/etherdevice.h>
9#include <linux/ethtool.h>
10#include <linux/module.h>
11#include <linux/virtio.h>
12#include <linux/virtio_net.h>
13#include <linux/bpf.h>
14#include <linux/bpf_trace.h>
15#include <linux/scatterlist.h>
16#include <linux/if_vlan.h>
17#include <linux/slab.h>
18#include <linux/cpu.h>
19#include <linux/average.h>
20#include <linux/filter.h>
21#include <linux/kernel.h>
22#include <linux/dim.h>
23#include <net/route.h>
24#include <net/xdp.h>
25#include <net/net_failover.h>
26#include <net/netdev_rx_queue.h>
27#include <net/netdev_queues.h>
28#include <net/xdp_sock_drv.h>
29
30static int napi_weight = NAPI_POLL_WEIGHT;
31module_param(napi_weight, int, 0444);
32
33static bool csum = true, gso = true, napi_tx = true;
34module_param(csum, bool, 0444);
35module_param(gso, bool, 0444);
36module_param(napi_tx, bool, 0644);
37
38#define VIRTIO_OFFLOAD_MAP_MIN 46
39#define VIRTIO_OFFLOAD_MAP_MAX 47
40#define VIRTIO_FEATURES_MAP_MIN 65
41#define VIRTIO_O2F_DELTA (VIRTIO_FEATURES_MAP_MIN - \
42 VIRTIO_OFFLOAD_MAP_MIN)
43
44static bool virtio_is_mapped_offload(unsigned int obit)
45{
46 return obit >= VIRTIO_OFFLOAD_MAP_MIN &&
47 obit <= VIRTIO_OFFLOAD_MAP_MAX;
48}
49
50static unsigned int virtio_offload_to_feature(unsigned int obit)
51{
52 return virtio_is_mapped_offload(obit) ? obit + VIRTIO_O2F_DELTA : obit;
53}
54
55/* FIXME: MTU in config. */
56#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
57#define GOOD_COPY_LEN 128
58
59#define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
60
61/* Separating two types of XDP xmit */
62#define VIRTIO_XDP_TX BIT(0)
63#define VIRTIO_XDP_REDIR BIT(1)
64
65/* RX packet size EWMA. The average packet size is used to determine the packet
66 * buffer size when refilling RX rings. As the entire RX ring may be refilled
67 * at once, the weight is chosen so that the EWMA will be insensitive to short-
68 * term, transient changes in packet size.
69 */
70DECLARE_EWMA(pkt_len, 0, 64)
71
72#define VIRTNET_DRIVER_VERSION "1.0.0"
73
74static const unsigned long guest_offloads[] = {
75 VIRTIO_NET_F_GUEST_TSO4,
76 VIRTIO_NET_F_GUEST_TSO6,
77 VIRTIO_NET_F_GUEST_ECN,
78 VIRTIO_NET_F_GUEST_UFO,
79 VIRTIO_NET_F_GUEST_CSUM,
80 VIRTIO_NET_F_GUEST_USO4,
81 VIRTIO_NET_F_GUEST_USO6,
82 VIRTIO_NET_F_GUEST_HDRLEN,
83 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED,
84 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED,
85};
86
87#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
88 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
89 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \
90 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \
91 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \
92 (1ULL << VIRTIO_NET_F_GUEST_USO6) | \
93 (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED) | \
94 (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED))
95
96struct virtnet_stat_desc {
97 char desc[ETH_GSTRING_LEN];
98 size_t offset;
99 size_t qstat_offset;
100};
101
102struct virtnet_sq_free_stats {
103 u64 packets;
104 u64 bytes;
105 u64 napi_packets;
106 u64 napi_bytes;
107 u64 xsk;
108};
109
110struct virtnet_sq_stats {
111 struct u64_stats_sync syncp;
112 u64_stats_t packets;
113 u64_stats_t bytes;
114 u64_stats_t xdp_tx;
115 u64_stats_t xdp_tx_drops;
116 u64_stats_t kicks;
117 u64_stats_t tx_timeouts;
118 u64_stats_t stop;
119 u64_stats_t wake;
120};
121
122struct virtnet_rq_stats {
123 struct u64_stats_sync syncp;
124 u64_stats_t packets;
125 u64_stats_t bytes;
126 u64_stats_t drops;
127 u64_stats_t xdp_packets;
128 u64_stats_t xdp_tx;
129 u64_stats_t xdp_redirects;
130 u64_stats_t xdp_drops;
131 u64_stats_t kicks;
132};
133
134#define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1}
135#define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1}
136
137#define VIRTNET_SQ_STAT_QSTAT(name, m) \
138 { \
139 name, \
140 offsetof(struct virtnet_sq_stats, m), \
141 offsetof(struct netdev_queue_stats_tx, m), \
142 }
143
144#define VIRTNET_RQ_STAT_QSTAT(name, m) \
145 { \
146 name, \
147 offsetof(struct virtnet_rq_stats, m), \
148 offsetof(struct netdev_queue_stats_rx, m), \
149 }
150
151static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = {
152 VIRTNET_SQ_STAT("xdp_tx", xdp_tx),
153 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops),
154 VIRTNET_SQ_STAT("kicks", kicks),
155 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts),
156};
157
158static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
159 VIRTNET_RQ_STAT("drops", drops),
160 VIRTNET_RQ_STAT("xdp_packets", xdp_packets),
161 VIRTNET_RQ_STAT("xdp_tx", xdp_tx),
162 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects),
163 VIRTNET_RQ_STAT("xdp_drops", xdp_drops),
164 VIRTNET_RQ_STAT("kicks", kicks),
165};
166
167static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = {
168 VIRTNET_SQ_STAT_QSTAT("packets", packets),
169 VIRTNET_SQ_STAT_QSTAT("bytes", bytes),
170 VIRTNET_SQ_STAT_QSTAT("stop", stop),
171 VIRTNET_SQ_STAT_QSTAT("wake", wake),
172};
173
174static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = {
175 VIRTNET_RQ_STAT_QSTAT("packets", packets),
176 VIRTNET_RQ_STAT_QSTAT("bytes", bytes),
177};
178
179#define VIRTNET_STATS_DESC_CQ(name) \
180 {#name, offsetof(struct virtio_net_stats_cvq, name), -1}
181
182#define VIRTNET_STATS_DESC_RX(class, name) \
183 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1}
184
185#define VIRTNET_STATS_DESC_TX(class, name) \
186 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1}
187
188
189static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = {
190 VIRTNET_STATS_DESC_CQ(command_num),
191 VIRTNET_STATS_DESC_CQ(ok_num),
192};
193
194static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = {
195 VIRTNET_STATS_DESC_RX(basic, packets),
196 VIRTNET_STATS_DESC_RX(basic, bytes),
197
198 VIRTNET_STATS_DESC_RX(basic, notifications),
199 VIRTNET_STATS_DESC_RX(basic, interrupts),
200};
201
202static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = {
203 VIRTNET_STATS_DESC_TX(basic, packets),
204 VIRTNET_STATS_DESC_TX(basic, bytes),
205
206 VIRTNET_STATS_DESC_TX(basic, notifications),
207 VIRTNET_STATS_DESC_TX(basic, interrupts),
208};
209
210static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = {
211 VIRTNET_STATS_DESC_RX(csum, needs_csum),
212};
213
214static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = {
215 VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg),
216 VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg),
217};
218
219static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = {
220 VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes),
221};
222
223static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = {
224 VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes),
225};
226
227#define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \
228 { \
229 #name, \
230 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \
231 offsetof(struct netdev_queue_stats_rx, qstat_field), \
232 }
233
234#define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \
235 { \
236 #name, \
237 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \
238 offsetof(struct netdev_queue_stats_tx, qstat_field), \
239 }
240
241static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = {
242 VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops),
243 VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns),
244};
245
246static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = {
247 VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops),
248 VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors),
249};
250
251static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = {
252 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary),
253 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none),
254 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad),
255};
256
257static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = {
258 VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none),
259 VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum),
260};
261
262static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = {
263 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets),
264 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes),
265 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets),
266 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes),
267};
268
269static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = {
270 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets),
271 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes),
272 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets),
273 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes),
274};
275
276static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = {
277 VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits),
278};
279
280static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = {
281 VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits),
282};
283
284#define VIRTNET_Q_TYPE_RX 0
285#define VIRTNET_Q_TYPE_TX 1
286#define VIRTNET_Q_TYPE_CQ 2
287
288struct virtnet_interrupt_coalesce {
289 u32 max_packets;
290 u32 max_usecs;
291};
292
293/* The dma information of pages allocated at a time. */
294struct virtnet_rq_dma {
295 dma_addr_t addr;
296 u32 ref;
297 u16 len;
298 u16 need_sync;
299};
300
301/* Internal representation of a send virtqueue */
302struct send_queue {
303 /* Virtqueue associated with this send _queue */
304 struct virtqueue *vq;
305
306 /* TX: fragments + linear part + virtio header */
307 struct scatterlist sg[MAX_SKB_FRAGS + 2];
308
309 /* Name of the send queue: output.$index */
310 char name[16];
311
312 struct virtnet_sq_stats stats;
313
314 struct virtnet_interrupt_coalesce intr_coal;
315
316 struct napi_struct napi;
317
318 /* Record whether sq is in reset state. */
319 bool reset;
320
321 struct xsk_buff_pool *xsk_pool;
322
323 dma_addr_t xsk_hdr_dma_addr;
324};
325
326/* Internal representation of a receive virtqueue */
327struct receive_queue {
328 /* Virtqueue associated with this receive_queue */
329 struct virtqueue *vq;
330
331 struct napi_struct napi;
332
333 struct bpf_prog __rcu *xdp_prog;
334
335 struct virtnet_rq_stats stats;
336
337 /* The number of rx notifications */
338 u16 calls;
339
340 /* Is dynamic interrupt moderation enabled? */
341 bool dim_enabled;
342
343 /* Used to protect dim_enabled and inter_coal */
344 struct mutex dim_lock;
345
346 /* Dynamic Interrupt Moderation */
347 struct dim dim;
348
349 u32 packets_in_napi;
350
351 struct virtnet_interrupt_coalesce intr_coal;
352
353 /* Chain pages by the private ptr. */
354 struct page *pages;
355
356 /* Average packet length for mergeable receive buffers. */
357 struct ewma_pkt_len mrg_avg_pkt_len;
358
359 /* Page frag for packet buffer allocation. */
360 struct page_frag alloc_frag;
361
362 /* RX: fragments + linear part + virtio header */
363 struct scatterlist sg[MAX_SKB_FRAGS + 2];
364
365 /* Min single buffer size for mergeable buffers case. */
366 unsigned int min_buf_len;
367
368 /* Name of this receive queue: input.$index */
369 char name[16];
370
371 struct xdp_rxq_info xdp_rxq;
372
373 /* Record the last dma info to free after new pages is allocated. */
374 struct virtnet_rq_dma *last_dma;
375
376 struct xsk_buff_pool *xsk_pool;
377
378 /* xdp rxq used by xsk */
379 struct xdp_rxq_info xsk_rxq_info;
380
381 struct xdp_buff **xsk_buffs;
382};
383
384#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40
385
386/* Control VQ buffers: protected by the rtnl lock */
387struct control_buf {
388 struct virtio_net_ctrl_hdr hdr;
389 virtio_net_ctrl_ack status;
390};
391
392struct virtnet_info {
393 struct virtio_device *vdev;
394 struct virtqueue *cvq;
395 struct net_device *dev;
396 struct send_queue *sq;
397 struct receive_queue *rq;
398 unsigned int status;
399
400 /* Max # of queue pairs supported by the device */
401 u16 max_queue_pairs;
402
403 /* # of queue pairs currently used by the driver */
404 u16 curr_queue_pairs;
405
406 /* # of XDP queue pairs currently used by the driver */
407 u16 xdp_queue_pairs;
408
409 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
410 bool xdp_enabled;
411
412 /* I like... big packets and I cannot lie! */
413 bool big_packets;
414
415 /* number of sg entries allocated for big packets */
416 unsigned int big_packets_num_skbfrags;
417
418 /* Host will merge rx buffers for big packets (shake it! shake it!) */
419 bool mergeable_rx_bufs;
420
421 /* Host supports rss and/or hash report */
422 bool has_rss;
423 bool has_rss_hash_report;
424 u8 rss_key_size;
425 u16 rss_indir_table_size;
426 u32 rss_hash_types_supported;
427 u32 rss_hash_types_saved;
428 struct virtio_net_rss_config_hdr *rss_hdr;
429 struct virtio_net_rss_config_trailer rss_trailer;
430 u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE];
431
432 /* Has control virtqueue */
433 bool has_cvq;
434
435 /* Lock to protect the control VQ */
436 struct mutex cvq_lock;
437
438 /* Host can handle any s/g split between our header and packet data */
439 bool any_header_sg;
440
441 /* Packet virtio header size */
442 u8 hdr_len;
443
444 /* Work struct for delayed refilling if we run low on memory. */
445 struct delayed_work refill;
446
447 /* UDP tunnel support */
448 bool tx_tnl;
449
450 bool rx_tnl;
451
452 bool rx_tnl_csum;
453
454 /* Is delayed refill enabled? */
455 bool refill_enabled;
456
457 /* The lock to synchronize the access to refill_enabled */
458 spinlock_t refill_lock;
459
460 /* Work struct for config space updates */
461 struct work_struct config_work;
462
463 /* Work struct for setting rx mode */
464 struct work_struct rx_mode_work;
465
466 /* OK to queue work setting RX mode? */
467 bool rx_mode_work_enabled;
468
469 /* Does the affinity hint is set for virtqueues? */
470 bool affinity_hint_set;
471
472 /* CPU hotplug instances for online & dead */
473 struct hlist_node node;
474 struct hlist_node node_dead;
475
476 struct control_buf *ctrl;
477
478 /* Ethtool settings */
479 u8 duplex;
480 u32 speed;
481
482 /* Is rx dynamic interrupt moderation enabled? */
483 bool rx_dim_enabled;
484
485 /* Interrupt coalescing settings */
486 struct virtnet_interrupt_coalesce intr_coal_tx;
487 struct virtnet_interrupt_coalesce intr_coal_rx;
488
489 unsigned long guest_offloads;
490 unsigned long guest_offloads_capable;
491
492 /* failover when STANDBY feature enabled */
493 struct failover *failover;
494
495 u64 device_stats_cap;
496};
497
498struct padded_vnet_hdr {
499 struct virtio_net_hdr_v1_hash hdr;
500 /*
501 * hdr is in a separate sg buffer, and data sg buffer shares same page
502 * with this header sg. This padding makes next sg 16 byte aligned
503 * after the header.
504 */
505 char padding[12];
506};
507
508struct virtio_net_common_hdr {
509 union {
510 struct virtio_net_hdr hdr;
511 struct virtio_net_hdr_mrg_rxbuf mrg_hdr;
512 struct virtio_net_hdr_v1_hash hash_v1_hdr;
513 struct virtio_net_hdr_v1_hash_tunnel tnl_hdr;
514 };
515};
516
517static struct virtio_net_common_hdr xsk_hdr;
518
519static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
520static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq);
521static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
522 struct net_device *dev,
523 unsigned int *xdp_xmit,
524 struct virtnet_rq_stats *stats);
525static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq,
526 struct sk_buff *skb, u8 flags);
527static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb,
528 struct sk_buff *curr_skb,
529 struct page *page, void *buf,
530 int len, int truesize);
531static void virtnet_xsk_completed(struct send_queue *sq, int num);
532
533enum virtnet_xmit_type {
534 VIRTNET_XMIT_TYPE_SKB,
535 VIRTNET_XMIT_TYPE_SKB_ORPHAN,
536 VIRTNET_XMIT_TYPE_XDP,
537 VIRTNET_XMIT_TYPE_XSK,
538};
539
540static size_t virtnet_rss_hdr_size(const struct virtnet_info *vi)
541{
542 u16 indir_table_size = vi->has_rss ? vi->rss_indir_table_size : 1;
543
544 return struct_size(vi->rss_hdr, indirection_table, indir_table_size);
545}
546
547static size_t virtnet_rss_trailer_size(const struct virtnet_info *vi)
548{
549 return struct_size(&vi->rss_trailer, hash_key_data, vi->rss_key_size);
550}
551
552/* We use the last two bits of the pointer to distinguish the xmit type. */
553#define VIRTNET_XMIT_TYPE_MASK (BIT(0) | BIT(1))
554
555#define VIRTIO_XSK_FLAG_OFFSET 2
556
557static enum virtnet_xmit_type virtnet_xmit_ptr_unpack(void **ptr)
558{
559 unsigned long p = (unsigned long)*ptr;
560
561 *ptr = (void *)(p & ~VIRTNET_XMIT_TYPE_MASK);
562
563 return p & VIRTNET_XMIT_TYPE_MASK;
564}
565
566static void *virtnet_xmit_ptr_pack(void *ptr, enum virtnet_xmit_type type)
567{
568 return (void *)((unsigned long)ptr | type);
569}
570
571static int virtnet_add_outbuf(struct send_queue *sq, int num, void *data,
572 enum virtnet_xmit_type type)
573{
574 return virtqueue_add_outbuf(vq: sq->vq, sg: sq->sg, num,
575 data: virtnet_xmit_ptr_pack(ptr: data, type),
576 GFP_ATOMIC);
577}
578
579static u32 virtnet_ptr_to_xsk_buff_len(void *ptr)
580{
581 return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET;
582}
583
584static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
585{
586 sg_dma_address(sg) = addr;
587 sg_dma_len(sg) = len;
588}
589
590static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
591 bool in_napi, struct virtnet_sq_free_stats *stats)
592{
593 struct xdp_frame *frame;
594 struct sk_buff *skb;
595 unsigned int len;
596 void *ptr;
597
598 while ((ptr = virtqueue_get_buf(vq: sq->vq, len: &len)) != NULL) {
599 switch (virtnet_xmit_ptr_unpack(ptr: &ptr)) {
600 case VIRTNET_XMIT_TYPE_SKB:
601 skb = ptr;
602
603 pr_debug("Sent skb %p\n", skb);
604 stats->napi_packets++;
605 stats->napi_bytes += skb->len;
606 napi_consume_skb(skb, budget: in_napi);
607 break;
608
609 case VIRTNET_XMIT_TYPE_SKB_ORPHAN:
610 skb = ptr;
611
612 stats->packets++;
613 stats->bytes += skb->len;
614 napi_consume_skb(skb, budget: in_napi);
615 break;
616
617 case VIRTNET_XMIT_TYPE_XDP:
618 frame = ptr;
619
620 stats->packets++;
621 stats->bytes += xdp_get_frame_len(xdpf: frame);
622 xdp_return_frame(xdpf: frame);
623 break;
624
625 case VIRTNET_XMIT_TYPE_XSK:
626 stats->bytes += virtnet_ptr_to_xsk_buff_len(ptr);
627 stats->xsk++;
628 break;
629 }
630 }
631 netdev_tx_completed_queue(dev_queue: txq, pkts: stats->napi_packets, bytes: stats->napi_bytes);
632}
633
634static void virtnet_free_old_xmit(struct send_queue *sq,
635 struct netdev_queue *txq,
636 bool in_napi,
637 struct virtnet_sq_free_stats *stats)
638{
639 __free_old_xmit(sq, txq, in_napi, stats);
640
641 if (stats->xsk)
642 virtnet_xsk_completed(sq, num: stats->xsk);
643}
644
645/* Converting between virtqueue no. and kernel tx/rx queue no.
646 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
647 */
648static int vq2txq(struct virtqueue *vq)
649{
650 return (vq->index - 1) / 2;
651}
652
653static int txq2vq(int txq)
654{
655 return txq * 2 + 1;
656}
657
658static int vq2rxq(struct virtqueue *vq)
659{
660 return vq->index / 2;
661}
662
663static int rxq2vq(int rxq)
664{
665 return rxq * 2;
666}
667
668static int vq_type(struct virtnet_info *vi, int qid)
669{
670 if (qid == vi->max_queue_pairs * 2)
671 return VIRTNET_Q_TYPE_CQ;
672
673 if (qid % 2)
674 return VIRTNET_Q_TYPE_TX;
675
676 return VIRTNET_Q_TYPE_RX;
677}
678
679static inline struct virtio_net_common_hdr *
680skb_vnet_common_hdr(struct sk_buff *skb)
681{
682 return (struct virtio_net_common_hdr *)skb->cb;
683}
684
685/*
686 * private is used to chain pages for big packets, put the whole
687 * most recent used list in the beginning for reuse
688 */
689static void give_pages(struct receive_queue *rq, struct page *page)
690{
691 struct page *end;
692
693 /* Find end of list, sew whole thing into vi->rq.pages. */
694 for (end = page; end->private; end = (struct page *)end->private);
695 end->private = (unsigned long)rq->pages;
696 rq->pages = page;
697}
698
699static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
700{
701 struct page *p = rq->pages;
702
703 if (p) {
704 rq->pages = (struct page *)p->private;
705 /* clear private here, it is used to chain pages */
706 p->private = 0;
707 } else
708 p = alloc_page(gfp_mask);
709 return p;
710}
711
712static void virtnet_rq_free_buf(struct virtnet_info *vi,
713 struct receive_queue *rq, void *buf)
714{
715 if (vi->mergeable_rx_bufs)
716 put_page(page: virt_to_head_page(x: buf));
717 else if (vi->big_packets)
718 give_pages(rq, page: buf);
719 else
720 put_page(page: virt_to_head_page(x: buf));
721}
722
723static void enable_delayed_refill(struct virtnet_info *vi)
724{
725 spin_lock_bh(lock: &vi->refill_lock);
726 vi->refill_enabled = true;
727 spin_unlock_bh(lock: &vi->refill_lock);
728}
729
730static void disable_delayed_refill(struct virtnet_info *vi)
731{
732 spin_lock_bh(lock: &vi->refill_lock);
733 vi->refill_enabled = false;
734 spin_unlock_bh(lock: &vi->refill_lock);
735}
736
737static void enable_rx_mode_work(struct virtnet_info *vi)
738{
739 rtnl_lock();
740 vi->rx_mode_work_enabled = true;
741 rtnl_unlock();
742}
743
744static void disable_rx_mode_work(struct virtnet_info *vi)
745{
746 rtnl_lock();
747 vi->rx_mode_work_enabled = false;
748 rtnl_unlock();
749}
750
751static void virtqueue_napi_schedule(struct napi_struct *napi,
752 struct virtqueue *vq)
753{
754 if (napi_schedule_prep(n: napi)) {
755 virtqueue_disable_cb(vq);
756 __napi_schedule(n: napi);
757 }
758}
759
760static bool virtqueue_napi_complete(struct napi_struct *napi,
761 struct virtqueue *vq, int processed)
762{
763 int opaque;
764
765 opaque = virtqueue_enable_cb_prepare(vq);
766 if (napi_complete_done(n: napi, work_done: processed)) {
767 if (unlikely(virtqueue_poll(vq, opaque)))
768 virtqueue_napi_schedule(napi, vq);
769 else
770 return true;
771 } else {
772 virtqueue_disable_cb(vq);
773 }
774
775 return false;
776}
777
778static void skb_xmit_done(struct virtqueue *vq)
779{
780 struct virtnet_info *vi = vq->vdev->priv;
781 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi;
782
783 /* Suppress further interrupts. */
784 virtqueue_disable_cb(vq);
785
786 if (napi->weight)
787 virtqueue_napi_schedule(napi, vq);
788 else
789 /* We were probably waiting for more output buffers. */
790 netif_wake_subqueue(dev: vi->dev, queue_index: vq2txq(vq));
791}
792
793#define MRG_CTX_HEADER_SHIFT 22
794static void *mergeable_len_to_ctx(unsigned int truesize,
795 unsigned int headroom)
796{
797 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
798}
799
800static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
801{
802 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
803}
804
805static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
806{
807 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
808}
809
810static int check_mergeable_len(struct net_device *dev, void *mrg_ctx,
811 unsigned int len)
812{
813 unsigned int headroom, tailroom, room, truesize;
814
815 truesize = mergeable_ctx_to_truesize(mrg_ctx);
816 headroom = mergeable_ctx_to_headroom(mrg_ctx);
817 tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
818 room = SKB_DATA_ALIGN(headroom + tailroom);
819
820 if (len > truesize - room) {
821 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
822 dev->name, len, (unsigned long)(truesize - room));
823 DEV_STATS_INC(dev, rx_length_errors);
824 return -1;
825 }
826
827 return 0;
828}
829
830static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen,
831 unsigned int headroom,
832 unsigned int len)
833{
834 struct sk_buff *skb;
835
836 skb = build_skb(data: buf, frag_size: buflen);
837 if (unlikely(!skb))
838 return NULL;
839
840 skb_reserve(skb, len: headroom);
841 skb_put(skb, len);
842
843 return skb;
844}
845
846/* Called from bottom half context */
847static struct sk_buff *page_to_skb(struct virtnet_info *vi,
848 struct receive_queue *rq,
849 struct page *page, unsigned int offset,
850 unsigned int len, unsigned int truesize,
851 unsigned int headroom)
852{
853 struct sk_buff *skb;
854 struct virtio_net_common_hdr *hdr;
855 unsigned int copy, hdr_len, hdr_padded_len;
856 struct page *page_to_free = NULL;
857 int tailroom, shinfo_size;
858 char *p, *hdr_p, *buf;
859
860 p = page_address(page) + offset;
861 hdr_p = p;
862
863 hdr_len = vi->hdr_len;
864 if (vi->mergeable_rx_bufs)
865 hdr_padded_len = hdr_len;
866 else
867 hdr_padded_len = sizeof(struct padded_vnet_hdr);
868
869 buf = p - headroom;
870 len -= hdr_len;
871 offset += hdr_padded_len;
872 p += hdr_padded_len;
873 tailroom = truesize - headroom - hdr_padded_len - len;
874
875 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
876
877 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
878 skb = virtnet_build_skb(buf, buflen: truesize, headroom: p - buf, len);
879 if (unlikely(!skb))
880 return NULL;
881
882 page = (struct page *)page->private;
883 if (page)
884 give_pages(rq, page);
885 goto ok;
886 }
887
888 /* copy small packet so we can reuse these pages for small data */
889 skb = napi_alloc_skb(napi: &rq->napi, GOOD_COPY_LEN);
890 if (unlikely(!skb))
891 return NULL;
892
893 /* Copy all frame if it fits skb->head, otherwise
894 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
895 */
896 if (len <= skb_tailroom(skb))
897 copy = len;
898 else
899 copy = ETH_HLEN;
900 skb_put_data(skb, data: p, len: copy);
901
902 len -= copy;
903 offset += copy;
904
905 if (vi->mergeable_rx_bufs) {
906 if (len)
907 skb_add_rx_frag(skb, i: 0, page, off: offset, size: len, truesize);
908 else
909 page_to_free = page;
910 goto ok;
911 }
912
913 /*
914 * Verify that we can indeed put this data into a skb.
915 * This is here to handle cases when the device erroneously
916 * tries to receive more than is possible. This is usually
917 * the case of a broken device.
918 */
919 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
920 net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
921 dev_kfree_skb(skb);
922 return NULL;
923 }
924 BUG_ON(offset >= PAGE_SIZE);
925 while (len) {
926 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
927 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, off: offset,
928 size: frag_size, truesize);
929 len -= frag_size;
930 page = (struct page *)page->private;
931 offset = 0;
932 }
933
934 if (page)
935 give_pages(rq, page);
936
937ok:
938 hdr = skb_vnet_common_hdr(skb);
939 memcpy(to: hdr, from: hdr_p, len: hdr_len);
940 if (page_to_free)
941 put_page(page: page_to_free);
942
943 return skb;
944}
945
946static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len)
947{
948 struct virtnet_info *vi = rq->vq->vdev->priv;
949 struct page *page = virt_to_head_page(x: buf);
950 struct virtnet_rq_dma *dma;
951 void *head;
952 int offset;
953
954 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs);
955
956 head = page_address(page);
957
958 dma = head;
959
960 --dma->ref;
961
962 if (dma->need_sync && len) {
963 offset = buf - (head + sizeof(*dma));
964
965 virtqueue_map_sync_single_range_for_cpu(vq: rq->vq, addr: dma->addr,
966 offset, size: len,
967 dir: DMA_FROM_DEVICE);
968 }
969
970 if (dma->ref)
971 return;
972
973 virtqueue_unmap_single_attrs(vq: rq->vq, addr: dma->addr, size: dma->len,
974 dir: DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
975 put_page(page);
976}
977
978static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx)
979{
980 struct virtnet_info *vi = rq->vq->vdev->priv;
981 void *buf;
982
983 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs);
984
985 buf = virtqueue_get_buf_ctx(vq: rq->vq, len, ctx);
986 if (buf)
987 virtnet_rq_unmap(rq, buf, len: *len);
988
989 return buf;
990}
991
992static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len)
993{
994 struct virtnet_info *vi = rq->vq->vdev->priv;
995 struct virtnet_rq_dma *dma;
996 dma_addr_t addr;
997 u32 offset;
998 void *head;
999
1000 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs);
1001
1002 head = page_address(rq->alloc_frag.page);
1003
1004 offset = buf - head;
1005
1006 dma = head;
1007
1008 addr = dma->addr - sizeof(*dma) + offset;
1009
1010 sg_init_table(rq->sg, 1);
1011 sg_fill_dma(sg: rq->sg, addr, len);
1012}
1013
1014static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp)
1015{
1016 struct page_frag *alloc_frag = &rq->alloc_frag;
1017 struct virtnet_info *vi = rq->vq->vdev->priv;
1018 struct virtnet_rq_dma *dma;
1019 void *buf, *head;
1020 dma_addr_t addr;
1021
1022 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs);
1023
1024 head = page_address(alloc_frag->page);
1025
1026 dma = head;
1027
1028 /* new pages */
1029 if (!alloc_frag->offset) {
1030 if (rq->last_dma) {
1031 /* Now, the new page is allocated, the last dma
1032 * will not be used. So the dma can be unmapped
1033 * if the ref is 0.
1034 */
1035 virtnet_rq_unmap(rq, buf: rq->last_dma, len: 0);
1036 rq->last_dma = NULL;
1037 }
1038
1039 dma->len = alloc_frag->size - sizeof(*dma);
1040
1041 addr = virtqueue_map_single_attrs(vq: rq->vq, ptr: dma + 1,
1042 size: dma->len, dir: DMA_FROM_DEVICE, attrs: 0);
1043 if (virtqueue_map_mapping_error(vq: rq->vq, addr))
1044 return NULL;
1045
1046 dma->addr = addr;
1047 dma->need_sync = virtqueue_map_need_sync(vq: rq->vq, addr);
1048
1049 /* Add a reference to dma to prevent the entire dma from
1050 * being released during error handling. This reference
1051 * will be freed after the pages are no longer used.
1052 */
1053 get_page(page: alloc_frag->page);
1054 dma->ref = 1;
1055 alloc_frag->offset = sizeof(*dma);
1056
1057 rq->last_dma = dma;
1058 }
1059
1060 ++dma->ref;
1061
1062 buf = head + alloc_frag->offset;
1063
1064 get_page(page: alloc_frag->page);
1065 alloc_frag->offset += size;
1066
1067 return buf;
1068}
1069
1070static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
1071{
1072 struct virtnet_info *vi = vq->vdev->priv;
1073 struct receive_queue *rq;
1074 int i = vq2rxq(vq);
1075
1076 rq = &vi->rq[i];
1077
1078 if (rq->xsk_pool) {
1079 xsk_buff_free(xdp: (struct xdp_buff *)buf);
1080 return;
1081 }
1082
1083 if (!vi->big_packets || vi->mergeable_rx_bufs)
1084 virtnet_rq_unmap(rq, buf, len: 0);
1085
1086 virtnet_rq_free_buf(vi, rq, buf);
1087}
1088
1089static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
1090 bool in_napi)
1091{
1092 struct virtnet_sq_free_stats stats = {0};
1093
1094 virtnet_free_old_xmit(sq, txq, in_napi, stats: &stats);
1095
1096 /* Avoid overhead when no packets have been processed
1097 * happens when called speculatively from start_xmit.
1098 */
1099 if (!stats.packets && !stats.napi_packets)
1100 return;
1101
1102 u64_stats_update_begin(syncp: &sq->stats.syncp);
1103 u64_stats_add(p: &sq->stats.bytes, val: stats.bytes + stats.napi_bytes);
1104 u64_stats_add(p: &sq->stats.packets, val: stats.packets + stats.napi_packets);
1105 u64_stats_update_end(syncp: &sq->stats.syncp);
1106}
1107
1108static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
1109{
1110 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
1111 return false;
1112 else if (q < vi->curr_queue_pairs)
1113 return true;
1114 else
1115 return false;
1116}
1117
1118static bool tx_may_stop(struct virtnet_info *vi,
1119 struct net_device *dev,
1120 struct send_queue *sq)
1121{
1122 int qnum;
1123
1124 qnum = sq - vi->sq;
1125
1126 /* If running out of space, stop queue to avoid getting packets that we
1127 * are then unable to transmit.
1128 * An alternative would be to force queuing layer to requeue the skb by
1129 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
1130 * returned in a normal path of operation: it means that driver is not
1131 * maintaining the TX queue stop/start state properly, and causes
1132 * the stack to do a non-trivial amount of useless work.
1133 * Since most packets only take 1 or 2 ring slots, stopping the queue
1134 * early means 16 slots are typically wasted.
1135 */
1136 if (sq->vq->num_free < MAX_SKB_FRAGS + 2) {
1137 struct netdev_queue *txq = netdev_get_tx_queue(dev, index: qnum);
1138
1139 netif_tx_stop_queue(dev_queue: txq);
1140 u64_stats_update_begin(syncp: &sq->stats.syncp);
1141 u64_stats_inc(p: &sq->stats.stop);
1142 u64_stats_update_end(syncp: &sq->stats.syncp);
1143
1144 return true;
1145 }
1146
1147 return false;
1148}
1149
1150static void check_sq_full_and_disable(struct virtnet_info *vi,
1151 struct net_device *dev,
1152 struct send_queue *sq)
1153{
1154 bool use_napi = sq->napi.weight;
1155 int qnum;
1156
1157 qnum = sq - vi->sq;
1158
1159 if (tx_may_stop(vi, dev, sq)) {
1160 struct netdev_queue *txq = netdev_get_tx_queue(dev, index: qnum);
1161
1162 if (use_napi) {
1163 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
1164 virtqueue_napi_schedule(napi: &sq->napi, vq: sq->vq);
1165 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
1166 /* More just got used, free them then recheck. */
1167 free_old_xmit(sq, txq, in_napi: false);
1168 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) {
1169 netif_start_subqueue(dev, queue_index: qnum);
1170 u64_stats_update_begin(syncp: &sq->stats.syncp);
1171 u64_stats_inc(p: &sq->stats.wake);
1172 u64_stats_update_end(syncp: &sq->stats.syncp);
1173 virtqueue_disable_cb(vq: sq->vq);
1174 }
1175 }
1176 }
1177}
1178
1179/* Note that @len is the length of received data without virtio header */
1180static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi,
1181 struct receive_queue *rq, void *buf,
1182 u32 len, bool first_buf)
1183{
1184 struct xdp_buff *xdp;
1185 u32 bufsize;
1186
1187 xdp = (struct xdp_buff *)buf;
1188
1189 /* In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for
1190 * virtio header and ask the vhost to fill data from
1191 * hard_start + XDP_PACKET_HEADROOM - vi->hdr_len
1192 * The first buffer has virtio header so the remaining region for frame
1193 * data is
1194 * xsk_pool_get_rx_frame_size()
1195 * While other buffers than the first one do not have virtio header, so
1196 * the maximum frame data's length can be
1197 * xsk_pool_get_rx_frame_size() + vi->hdr_len
1198 */
1199 bufsize = xsk_pool_get_rx_frame_size(pool: rq->xsk_pool);
1200 if (!first_buf)
1201 bufsize += vi->hdr_len;
1202
1203 if (unlikely(len > bufsize)) {
1204 pr_debug("%s: rx error: len %u exceeds truesize %u\n",
1205 vi->dev->name, len, bufsize);
1206 DEV_STATS_INC(vi->dev, rx_length_errors);
1207 xsk_buff_free(xdp);
1208 return NULL;
1209 }
1210
1211 if (first_buf) {
1212 xsk_buff_set_size(xdp, size: len);
1213 } else {
1214 xdp_prepare_buff(xdp, hard_start: xdp->data_hard_start,
1215 XDP_PACKET_HEADROOM - vi->hdr_len, data_len: len, meta_valid: 1);
1216 xdp->flags = 0;
1217 }
1218
1219 xsk_buff_dma_sync_for_cpu(xdp);
1220
1221 return xdp;
1222}
1223
1224static struct sk_buff *xsk_construct_skb(struct receive_queue *rq,
1225 struct xdp_buff *xdp)
1226{
1227 unsigned int metasize = xdp->data - xdp->data_meta;
1228 struct sk_buff *skb;
1229 unsigned int size;
1230
1231 size = xdp->data_end - xdp->data_hard_start;
1232 skb = napi_alloc_skb(napi: &rq->napi, length: size);
1233 if (unlikely(!skb)) {
1234 xsk_buff_free(xdp);
1235 return NULL;
1236 }
1237
1238 skb_reserve(skb, len: xdp->data_meta - xdp->data_hard_start);
1239
1240 size = xdp->data_end - xdp->data_meta;
1241 memcpy(to: __skb_put(skb, len: size), from: xdp->data_meta, len: size);
1242
1243 if (metasize) {
1244 __skb_pull(skb, len: metasize);
1245 skb_metadata_set(skb, meta_len: metasize);
1246 }
1247
1248 xsk_buff_free(xdp);
1249
1250 return skb;
1251}
1252
1253static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi,
1254 struct receive_queue *rq, struct xdp_buff *xdp,
1255 unsigned int *xdp_xmit,
1256 struct virtnet_rq_stats *stats)
1257{
1258 struct bpf_prog *prog;
1259 u32 ret;
1260
1261 ret = XDP_PASS;
1262 rcu_read_lock();
1263 prog = rcu_dereference(rq->xdp_prog);
1264 if (prog)
1265 ret = virtnet_xdp_handler(xdp_prog: prog, xdp, dev, xdp_xmit, stats);
1266 rcu_read_unlock();
1267
1268 switch (ret) {
1269 case XDP_PASS:
1270 return xsk_construct_skb(rq, xdp);
1271
1272 case XDP_TX:
1273 case XDP_REDIRECT:
1274 return NULL;
1275
1276 default:
1277 /* drop packet */
1278 xsk_buff_free(xdp);
1279 u64_stats_inc(p: &stats->drops);
1280 return NULL;
1281 }
1282}
1283
1284static void xsk_drop_follow_bufs(struct net_device *dev,
1285 struct receive_queue *rq,
1286 u32 num_buf,
1287 struct virtnet_rq_stats *stats)
1288{
1289 struct xdp_buff *xdp;
1290 u32 len;
1291
1292 while (num_buf-- > 1) {
1293 xdp = virtqueue_get_buf(vq: rq->vq, len: &len);
1294 if (unlikely(!xdp)) {
1295 pr_debug("%s: rx error: %d buffers missing\n",
1296 dev->name, num_buf);
1297 DEV_STATS_INC(dev, rx_length_errors);
1298 break;
1299 }
1300 u64_stats_add(p: &stats->bytes, val: len);
1301 xsk_buff_free(xdp);
1302 }
1303}
1304
1305static int xsk_append_merge_buffer(struct virtnet_info *vi,
1306 struct receive_queue *rq,
1307 struct sk_buff *head_skb,
1308 u32 num_buf,
1309 struct virtio_net_hdr_mrg_rxbuf *hdr,
1310 struct virtnet_rq_stats *stats)
1311{
1312 struct sk_buff *curr_skb;
1313 struct xdp_buff *xdp;
1314 u32 len, truesize;
1315 struct page *page;
1316 void *buf;
1317
1318 curr_skb = head_skb;
1319
1320 while (--num_buf) {
1321 buf = virtqueue_get_buf(vq: rq->vq, len: &len);
1322 if (unlikely(!buf)) {
1323 pr_debug("%s: rx error: %d buffers out of %d missing\n",
1324 vi->dev->name, num_buf,
1325 virtio16_to_cpu(vi->vdev,
1326 hdr->num_buffers));
1327 DEV_STATS_INC(vi->dev, rx_length_errors);
1328 return -EINVAL;
1329 }
1330
1331 u64_stats_add(p: &stats->bytes, val: len);
1332
1333 xdp = buf_to_xdp(vi, rq, buf, len, first_buf: false);
1334 if (!xdp)
1335 goto err;
1336
1337 buf = napi_alloc_frag(fragsz: len);
1338 if (!buf) {
1339 xsk_buff_free(xdp);
1340 goto err;
1341 }
1342
1343 memcpy(to: buf, from: xdp->data, len);
1344
1345 xsk_buff_free(xdp);
1346
1347 page = virt_to_page(buf);
1348
1349 truesize = len;
1350
1351 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page,
1352 buf, len, truesize);
1353 if (!curr_skb) {
1354 put_page(page);
1355 goto err;
1356 }
1357 }
1358
1359 return 0;
1360
1361err:
1362 xsk_drop_follow_bufs(dev: vi->dev, rq, num_buf, stats);
1363 return -EINVAL;
1364}
1365
1366static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi,
1367 struct receive_queue *rq, struct xdp_buff *xdp,
1368 unsigned int *xdp_xmit,
1369 struct virtnet_rq_stats *stats)
1370{
1371 struct virtio_net_hdr_mrg_rxbuf *hdr;
1372 struct bpf_prog *prog;
1373 struct sk_buff *skb;
1374 u32 ret, num_buf;
1375
1376 hdr = xdp->data - vi->hdr_len;
1377 num_buf = virtio16_to_cpu(vdev: vi->vdev, val: hdr->num_buffers);
1378
1379 ret = XDP_PASS;
1380 rcu_read_lock();
1381 prog = rcu_dereference(rq->xdp_prog);
1382 /* TODO: support multi buffer. */
1383 if (prog && num_buf == 1)
1384 ret = virtnet_xdp_handler(xdp_prog: prog, xdp, dev, xdp_xmit, stats);
1385 rcu_read_unlock();
1386
1387 switch (ret) {
1388 case XDP_PASS:
1389 skb = xsk_construct_skb(rq, xdp);
1390 if (!skb)
1391 goto drop_bufs;
1392
1393 if (xsk_append_merge_buffer(vi, rq, head_skb: skb, num_buf, hdr, stats)) {
1394 dev_kfree_skb(skb);
1395 goto drop;
1396 }
1397
1398 return skb;
1399
1400 case XDP_TX:
1401 case XDP_REDIRECT:
1402 return NULL;
1403
1404 default:
1405 /* drop packet */
1406 xsk_buff_free(xdp);
1407 }
1408
1409drop_bufs:
1410 xsk_drop_follow_bufs(dev, rq, num_buf, stats);
1411
1412drop:
1413 u64_stats_inc(p: &stats->drops);
1414 return NULL;
1415}
1416
1417static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq,
1418 void *buf, u32 len,
1419 unsigned int *xdp_xmit,
1420 struct virtnet_rq_stats *stats)
1421{
1422 struct net_device *dev = vi->dev;
1423 struct sk_buff *skb = NULL;
1424 struct xdp_buff *xdp;
1425 u8 flags;
1426
1427 len -= vi->hdr_len;
1428
1429 u64_stats_add(p: &stats->bytes, val: len);
1430
1431 xdp = buf_to_xdp(vi, rq, buf, len, first_buf: true);
1432 if (!xdp)
1433 return;
1434
1435 if (unlikely(len < ETH_HLEN)) {
1436 pr_debug("%s: short packet %i\n", dev->name, len);
1437 DEV_STATS_INC(dev, rx_length_errors);
1438 xsk_buff_free(xdp);
1439 return;
1440 }
1441
1442 flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags;
1443
1444 if (!vi->mergeable_rx_bufs)
1445 skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats);
1446 else
1447 skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats);
1448
1449 if (skb)
1450 virtnet_receive_done(vi, rq, skb, flags);
1451}
1452
1453static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq,
1454 struct xsk_buff_pool *pool, gfp_t gfp)
1455{
1456 struct xdp_buff **xsk_buffs;
1457 dma_addr_t addr;
1458 int err = 0;
1459 u32 len, i;
1460 int num;
1461
1462 xsk_buffs = rq->xsk_buffs;
1463
1464 num = xsk_buff_alloc_batch(pool, xdp: xsk_buffs, max: rq->vq->num_free);
1465 if (!num)
1466 return -ENOMEM;
1467
1468 len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len;
1469
1470 for (i = 0; i < num; ++i) {
1471 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space.
1472 * We assume XDP_PACKET_HEADROOM is larger than hdr->len.
1473 * (see function virtnet_xsk_pool_enable)
1474 */
1475 addr = xsk_buff_xdp_get_dma(xdp: xsk_buffs[i]) - vi->hdr_len;
1476
1477 sg_init_table(rq->sg, 1);
1478 sg_fill_dma(sg: rq->sg, addr, len);
1479
1480 err = virtqueue_add_inbuf_premapped(vq: rq->vq, sg: rq->sg, num: 1,
1481 data: xsk_buffs[i], NULL, gfp);
1482 if (err)
1483 goto err;
1484 }
1485
1486 return num;
1487
1488err:
1489 for (; i < num; ++i)
1490 xsk_buff_free(xdp: xsk_buffs[i]);
1491
1492 return err;
1493}
1494
1495static void *virtnet_xsk_to_ptr(u32 len)
1496{
1497 unsigned long p;
1498
1499 p = len << VIRTIO_XSK_FLAG_OFFSET;
1500
1501 return virtnet_xmit_ptr_pack(ptr: (void *)p, type: VIRTNET_XMIT_TYPE_XSK);
1502}
1503
1504static int virtnet_xsk_xmit_one(struct send_queue *sq,
1505 struct xsk_buff_pool *pool,
1506 struct xdp_desc *desc)
1507{
1508 struct virtnet_info *vi;
1509 dma_addr_t addr;
1510
1511 vi = sq->vq->vdev->priv;
1512
1513 addr = xsk_buff_raw_get_dma(pool, addr: desc->addr);
1514 xsk_buff_raw_dma_sync_for_device(pool, dma: addr, size: desc->len);
1515
1516 sg_init_table(sq->sg, 2);
1517 sg_fill_dma(sg: sq->sg, addr: sq->xsk_hdr_dma_addr, len: vi->hdr_len);
1518 sg_fill_dma(sg: sq->sg + 1, addr, len: desc->len);
1519
1520 return virtqueue_add_outbuf_premapped(vq: sq->vq, sg: sq->sg, num: 2,
1521 data: virtnet_xsk_to_ptr(len: desc->len),
1522 GFP_ATOMIC);
1523}
1524
1525static int virtnet_xsk_xmit_batch(struct send_queue *sq,
1526 struct xsk_buff_pool *pool,
1527 unsigned int budget,
1528 u64 *kicks)
1529{
1530 struct xdp_desc *descs = pool->tx_descs;
1531 bool kick = false;
1532 u32 nb_pkts, i;
1533 int err;
1534
1535 budget = min_t(u32, budget, sq->vq->num_free);
1536
1537 nb_pkts = xsk_tx_peek_release_desc_batch(pool, max: budget);
1538 if (!nb_pkts)
1539 return 0;
1540
1541 for (i = 0; i < nb_pkts; i++) {
1542 err = virtnet_xsk_xmit_one(sq, pool, desc: &descs[i]);
1543 if (unlikely(err)) {
1544 xsk_tx_completed(pool: sq->xsk_pool, nb_entries: nb_pkts - i);
1545 break;
1546 }
1547
1548 kick = true;
1549 }
1550
1551 if (kick && virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq))
1552 (*kicks)++;
1553
1554 return i;
1555}
1556
1557static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool,
1558 int budget)
1559{
1560 struct virtnet_info *vi = sq->vq->vdev->priv;
1561 struct virtnet_sq_free_stats stats = {};
1562 struct net_device *dev = vi->dev;
1563 u64 kicks = 0;
1564 int sent;
1565
1566 /* Avoid to wakeup napi meanless, so call __free_old_xmit instead of
1567 * free_old_xmit().
1568 */
1569 __free_old_xmit(sq, txq: netdev_get_tx_queue(dev, index: sq - vi->sq), in_napi: true, stats: &stats);
1570
1571 if (stats.xsk)
1572 xsk_tx_completed(pool: sq->xsk_pool, nb_entries: stats.xsk);
1573
1574 sent = virtnet_xsk_xmit_batch(sq, pool, budget, kicks: &kicks);
1575
1576 if (!is_xdp_raw_buffer_queue(vi, q: sq - vi->sq))
1577 check_sq_full_and_disable(vi, dev: vi->dev, sq);
1578
1579 if (sent) {
1580 struct netdev_queue *txq;
1581
1582 txq = netdev_get_tx_queue(dev: vi->dev, index: sq - vi->sq);
1583 txq_trans_cond_update(txq);
1584 }
1585
1586 u64_stats_update_begin(syncp: &sq->stats.syncp);
1587 u64_stats_add(p: &sq->stats.packets, val: stats.packets);
1588 u64_stats_add(p: &sq->stats.bytes, val: stats.bytes);
1589 u64_stats_add(p: &sq->stats.kicks, val: kicks);
1590 u64_stats_add(p: &sq->stats.xdp_tx, val: sent);
1591 u64_stats_update_end(syncp: &sq->stats.syncp);
1592
1593 if (xsk_uses_need_wakeup(pool))
1594 xsk_set_tx_need_wakeup(pool);
1595
1596 return sent;
1597}
1598
1599static void xsk_wakeup(struct send_queue *sq)
1600{
1601 if (napi_if_scheduled_mark_missed(n: &sq->napi))
1602 return;
1603
1604 local_bh_disable();
1605 virtqueue_napi_schedule(napi: &sq->napi, vq: sq->vq);
1606 local_bh_enable();
1607}
1608
1609static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag)
1610{
1611 struct virtnet_info *vi = netdev_priv(dev);
1612 struct send_queue *sq;
1613
1614 if (!netif_running(dev))
1615 return -ENETDOWN;
1616
1617 if (qid >= vi->curr_queue_pairs)
1618 return -EINVAL;
1619
1620 sq = &vi->sq[qid];
1621
1622 xsk_wakeup(sq);
1623 return 0;
1624}
1625
1626static void virtnet_xsk_completed(struct send_queue *sq, int num)
1627{
1628 xsk_tx_completed(pool: sq->xsk_pool, nb_entries: num);
1629
1630 /* If this is called by rx poll, start_xmit and xdp xmit we should
1631 * wakeup the tx napi to consume the xsk tx queue, because the tx
1632 * interrupt may not be triggered.
1633 */
1634 xsk_wakeup(sq);
1635}
1636
1637static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
1638 struct send_queue *sq,
1639 struct xdp_frame *xdpf)
1640{
1641 struct virtio_net_hdr_mrg_rxbuf *hdr;
1642 struct skb_shared_info *shinfo;
1643 u8 nr_frags = 0;
1644 int err, i;
1645
1646 if (unlikely(xdpf->headroom < vi->hdr_len))
1647 return -EOVERFLOW;
1648
1649 if (unlikely(xdp_frame_has_frags(xdpf))) {
1650 shinfo = xdp_get_shared_info_from_frame(frame: xdpf);
1651 nr_frags = shinfo->nr_frags;
1652 }
1653
1654 /* In wrapping function virtnet_xdp_xmit(), we need to free
1655 * up the pending old buffers, where we need to calculate the
1656 * position of skb_shared_info in xdp_get_frame_len() and
1657 * xdp_return_frame(), which will involve to xdpf->data and
1658 * xdpf->headroom. Therefore, we need to update the value of
1659 * headroom synchronously here.
1660 */
1661 xdpf->headroom -= vi->hdr_len;
1662 xdpf->data -= vi->hdr_len;
1663 /* Zero header and leave csum up to XDP layers */
1664 hdr = xdpf->data;
1665 memset(s: hdr, c: 0, n: vi->hdr_len);
1666 xdpf->len += vi->hdr_len;
1667
1668 sg_init_table(sq->sg, nr_frags + 1);
1669 sg_set_buf(sg: sq->sg, buf: xdpf->data, buflen: xdpf->len);
1670 for (i = 0; i < nr_frags; i++) {
1671 skb_frag_t *frag = &shinfo->frags[i];
1672
1673 sg_set_page(sg: &sq->sg[i + 1], page: skb_frag_page(frag),
1674 len: skb_frag_size(frag), offset: skb_frag_off(frag));
1675 }
1676
1677 err = virtnet_add_outbuf(sq, num: nr_frags + 1, data: xdpf, type: VIRTNET_XMIT_TYPE_XDP);
1678 if (unlikely(err))
1679 return -ENOSPC; /* Caller handle free/refcnt */
1680
1681 return 0;
1682}
1683
1684/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
1685 * the current cpu, so it does not need to be locked.
1686 *
1687 * Here we use marco instead of inline functions because we have to deal with
1688 * three issues at the same time: 1. the choice of sq. 2. judge and execute the
1689 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline
1690 * functions to perfectly solve these three problems at the same time.
1691 */
1692#define virtnet_xdp_get_sq(vi) ({ \
1693 int cpu = smp_processor_id(); \
1694 struct netdev_queue *txq; \
1695 typeof(vi) v = (vi); \
1696 unsigned int qp; \
1697 \
1698 if (v->curr_queue_pairs > nr_cpu_ids) { \
1699 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \
1700 qp += cpu; \
1701 txq = netdev_get_tx_queue(v->dev, qp); \
1702 __netif_tx_acquire(txq); \
1703 } else { \
1704 qp = cpu % v->curr_queue_pairs; \
1705 txq = netdev_get_tx_queue(v->dev, qp); \
1706 __netif_tx_lock(txq, cpu); \
1707 } \
1708 v->sq + qp; \
1709})
1710
1711#define virtnet_xdp_put_sq(vi, q) { \
1712 struct netdev_queue *txq; \
1713 typeof(vi) v = (vi); \
1714 \
1715 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \
1716 if (v->curr_queue_pairs > nr_cpu_ids) \
1717 __netif_tx_release(txq); \
1718 else \
1719 __netif_tx_unlock(txq); \
1720}
1721
1722static int virtnet_xdp_xmit(struct net_device *dev,
1723 int n, struct xdp_frame **frames, u32 flags)
1724{
1725 struct virtnet_info *vi = netdev_priv(dev);
1726 struct virtnet_sq_free_stats stats = {0};
1727 struct receive_queue *rq = vi->rq;
1728 struct bpf_prog *xdp_prog;
1729 struct send_queue *sq;
1730 int nxmit = 0;
1731 int kicks = 0;
1732 int ret;
1733 int i;
1734
1735 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
1736 * indicate XDP resources have been successfully allocated.
1737 */
1738 xdp_prog = rcu_access_pointer(rq->xdp_prog);
1739 if (!xdp_prog)
1740 return -ENXIO;
1741
1742 sq = virtnet_xdp_get_sq(vi);
1743
1744 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
1745 ret = -EINVAL;
1746 goto out;
1747 }
1748
1749 /* Free up any pending old buffers before queueing new ones. */
1750 virtnet_free_old_xmit(sq, txq: netdev_get_tx_queue(dev, index: sq - vi->sq),
1751 in_napi: false, stats: &stats);
1752
1753 for (i = 0; i < n; i++) {
1754 struct xdp_frame *xdpf = frames[i];
1755
1756 if (__virtnet_xdp_xmit_one(vi, sq, xdpf))
1757 break;
1758 nxmit++;
1759 }
1760 ret = nxmit;
1761
1762 if (!is_xdp_raw_buffer_queue(vi, q: sq - vi->sq))
1763 check_sq_full_and_disable(vi, dev, sq);
1764
1765 if (flags & XDP_XMIT_FLUSH) {
1766 if (virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq))
1767 kicks = 1;
1768 }
1769out:
1770 u64_stats_update_begin(syncp: &sq->stats.syncp);
1771 u64_stats_add(p: &sq->stats.bytes, val: stats.bytes);
1772 u64_stats_add(p: &sq->stats.packets, val: stats.packets);
1773 u64_stats_add(p: &sq->stats.xdp_tx, val: n);
1774 u64_stats_add(p: &sq->stats.xdp_tx_drops, val: n - nxmit);
1775 u64_stats_add(p: &sq->stats.kicks, val: kicks);
1776 u64_stats_update_end(syncp: &sq->stats.syncp);
1777
1778 virtnet_xdp_put_sq(vi, sq);
1779 return ret;
1780}
1781
1782static void put_xdp_frags(struct xdp_buff *xdp)
1783{
1784 struct skb_shared_info *shinfo;
1785 struct page *xdp_page;
1786 int i;
1787
1788 if (xdp_buff_has_frags(xdp)) {
1789 shinfo = xdp_get_shared_info_from_buff(xdp);
1790 for (i = 0; i < shinfo->nr_frags; i++) {
1791 xdp_page = skb_frag_page(frag: &shinfo->frags[i]);
1792 put_page(page: xdp_page);
1793 }
1794 }
1795}
1796
1797static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
1798 struct net_device *dev,
1799 unsigned int *xdp_xmit,
1800 struct virtnet_rq_stats *stats)
1801{
1802 struct xdp_frame *xdpf;
1803 int err;
1804 u32 act;
1805
1806 act = bpf_prog_run_xdp(prog: xdp_prog, xdp);
1807 u64_stats_inc(p: &stats->xdp_packets);
1808
1809 switch (act) {
1810 case XDP_PASS:
1811 return act;
1812
1813 case XDP_TX:
1814 u64_stats_inc(p: &stats->xdp_tx);
1815 xdpf = xdp_convert_buff_to_frame(xdp);
1816 if (unlikely(!xdpf)) {
1817 netdev_dbg(dev, "convert buff to frame failed for xdp\n");
1818 return XDP_DROP;
1819 }
1820
1821 err = virtnet_xdp_xmit(dev, n: 1, frames: &xdpf, flags: 0);
1822 if (unlikely(!err)) {
1823 xdp_return_frame_rx_napi(xdpf);
1824 } else if (unlikely(err < 0)) {
1825 trace_xdp_exception(dev, xdp: xdp_prog, act);
1826 return XDP_DROP;
1827 }
1828 *xdp_xmit |= VIRTIO_XDP_TX;
1829 return act;
1830
1831 case XDP_REDIRECT:
1832 u64_stats_inc(p: &stats->xdp_redirects);
1833 err = xdp_do_redirect(dev, xdp, prog: xdp_prog);
1834 if (err)
1835 return XDP_DROP;
1836
1837 *xdp_xmit |= VIRTIO_XDP_REDIR;
1838 return act;
1839
1840 default:
1841 bpf_warn_invalid_xdp_action(dev, prog: xdp_prog, act);
1842 fallthrough;
1843 case XDP_ABORTED:
1844 trace_xdp_exception(dev, xdp: xdp_prog, act);
1845 fallthrough;
1846 case XDP_DROP:
1847 return XDP_DROP;
1848 }
1849}
1850
1851static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
1852{
1853 return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0;
1854}
1855
1856/* We copy the packet for XDP in the following cases:
1857 *
1858 * 1) Packet is scattered across multiple rx buffers.
1859 * 2) Headroom space is insufficient.
1860 *
1861 * This is inefficient but it's a temporary condition that
1862 * we hit right after XDP is enabled and until queue is refilled
1863 * with large buffers with sufficient headroom - so it should affect
1864 * at most queue size packets.
1865 * Afterwards, the conditions to enable
1866 * XDP should preclude the underlying device from sending packets
1867 * across multiple buffers (num_buf > 1), and we make sure buffers
1868 * have enough headroom.
1869 */
1870static struct page *xdp_linearize_page(struct net_device *dev,
1871 struct receive_queue *rq,
1872 int *num_buf,
1873 struct page *p,
1874 int offset,
1875 int page_off,
1876 unsigned int *len)
1877{
1878 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1879 struct page *page;
1880
1881 if (page_off + *len + tailroom > PAGE_SIZE)
1882 return NULL;
1883
1884 page = alloc_page(GFP_ATOMIC);
1885 if (!page)
1886 return NULL;
1887
1888 memcpy(page_address(page) + page_off, page_address(p) + offset, len: *len);
1889 page_off += *len;
1890
1891 /* Only mergeable mode can go inside this while loop. In small mode,
1892 * *num_buf == 1, so it cannot go inside.
1893 */
1894 while (--*num_buf) {
1895 unsigned int buflen;
1896 void *buf;
1897 void *ctx;
1898 int off;
1899
1900 buf = virtnet_rq_get_buf(rq, len: &buflen, ctx: &ctx);
1901 if (unlikely(!buf))
1902 goto err_buf;
1903
1904 p = virt_to_head_page(x: buf);
1905 off = buf - page_address(p);
1906
1907 if (check_mergeable_len(dev, mrg_ctx: ctx, len: buflen)) {
1908 put_page(page: p);
1909 goto err_buf;
1910 }
1911
1912 /* guard against a misconfigured or uncooperative backend that
1913 * is sending packet larger than the MTU.
1914 */
1915 if ((page_off + buflen + tailroom) > PAGE_SIZE) {
1916 put_page(page: p);
1917 goto err_buf;
1918 }
1919
1920 memcpy(page_address(page) + page_off,
1921 page_address(p) + off, len: buflen);
1922 page_off += buflen;
1923 put_page(page: p);
1924 }
1925
1926 /* Headroom does not contribute to packet length */
1927 *len = page_off - XDP_PACKET_HEADROOM;
1928 return page;
1929err_buf:
1930 __free_pages(page, order: 0);
1931 return NULL;
1932}
1933
1934static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi,
1935 unsigned int xdp_headroom,
1936 void *buf,
1937 unsigned int len)
1938{
1939 unsigned int header_offset;
1940 unsigned int headroom;
1941 unsigned int buflen;
1942 struct sk_buff *skb;
1943
1944 header_offset = VIRTNET_RX_PAD + xdp_headroom;
1945 headroom = vi->hdr_len + header_offset;
1946 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
1947 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1948
1949 skb = virtnet_build_skb(buf, buflen, headroom, len);
1950 if (unlikely(!skb))
1951 return NULL;
1952
1953 buf += header_offset;
1954 memcpy(to: skb_vnet_common_hdr(skb), from: buf, len: vi->hdr_len);
1955
1956 return skb;
1957}
1958
1959static struct sk_buff *receive_small_xdp(struct net_device *dev,
1960 struct virtnet_info *vi,
1961 struct receive_queue *rq,
1962 struct bpf_prog *xdp_prog,
1963 void *buf,
1964 unsigned int xdp_headroom,
1965 unsigned int len,
1966 unsigned int *xdp_xmit,
1967 struct virtnet_rq_stats *stats)
1968{
1969 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
1970 unsigned int headroom = vi->hdr_len + header_offset;
1971 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
1972 struct page *page = virt_to_head_page(x: buf);
1973 struct page *xdp_page;
1974 unsigned int buflen;
1975 struct xdp_buff xdp;
1976 struct sk_buff *skb;
1977 unsigned int metasize = 0;
1978 u32 act;
1979
1980 if (unlikely(hdr->hdr.gso_type))
1981 goto err_xdp;
1982
1983 /* Partially checksummed packets must be dropped. */
1984 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
1985 goto err_xdp;
1986
1987 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
1988 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1989
1990 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
1991 int offset = buf - page_address(page) + header_offset;
1992 unsigned int tlen = len + vi->hdr_len;
1993 int num_buf = 1;
1994
1995 xdp_headroom = virtnet_get_headroom(vi);
1996 header_offset = VIRTNET_RX_PAD + xdp_headroom;
1997 headroom = vi->hdr_len + header_offset;
1998 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
1999 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
2000 xdp_page = xdp_linearize_page(dev, rq, num_buf: &num_buf, p: page,
2001 offset, page_off: header_offset,
2002 len: &tlen);
2003 if (!xdp_page)
2004 goto err_xdp;
2005
2006 buf = page_address(xdp_page);
2007 put_page(page);
2008 page = xdp_page;
2009 }
2010
2011 xdp_init_buff(xdp: &xdp, frame_sz: buflen, rxq: &rq->xdp_rxq);
2012 xdp_prepare_buff(xdp: &xdp, hard_start: buf + VIRTNET_RX_PAD + vi->hdr_len,
2013 headroom: xdp_headroom, data_len: len, meta_valid: true);
2014
2015 act = virtnet_xdp_handler(xdp_prog, xdp: &xdp, dev, xdp_xmit, stats);
2016
2017 switch (act) {
2018 case XDP_PASS:
2019 /* Recalculate length in case bpf program changed it */
2020 len = xdp.data_end - xdp.data;
2021 metasize = xdp.data - xdp.data_meta;
2022 break;
2023
2024 case XDP_TX:
2025 case XDP_REDIRECT:
2026 goto xdp_xmit;
2027
2028 default:
2029 goto err_xdp;
2030 }
2031
2032 skb = virtnet_build_skb(buf, buflen, headroom: xdp.data - buf, len);
2033 if (unlikely(!skb))
2034 goto err;
2035
2036 if (metasize)
2037 skb_metadata_set(skb, meta_len: metasize);
2038
2039 return skb;
2040
2041err_xdp:
2042 u64_stats_inc(p: &stats->xdp_drops);
2043err:
2044 u64_stats_inc(p: &stats->drops);
2045 put_page(page);
2046xdp_xmit:
2047 return NULL;
2048}
2049
2050static struct sk_buff *receive_small(struct net_device *dev,
2051 struct virtnet_info *vi,
2052 struct receive_queue *rq,
2053 void *buf, void *ctx,
2054 unsigned int len,
2055 unsigned int *xdp_xmit,
2056 struct virtnet_rq_stats *stats)
2057{
2058 unsigned int xdp_headroom = (unsigned long)ctx;
2059 struct page *page = virt_to_head_page(x: buf);
2060 struct sk_buff *skb;
2061
2062 /* We passed the address of virtnet header to virtio-core,
2063 * so truncate the padding.
2064 */
2065 buf -= VIRTNET_RX_PAD + xdp_headroom;
2066
2067 len -= vi->hdr_len;
2068 u64_stats_add(p: &stats->bytes, val: len);
2069
2070 if (unlikely(len > GOOD_PACKET_LEN)) {
2071 pr_debug("%s: rx error: len %u exceeds max size %d\n",
2072 dev->name, len, GOOD_PACKET_LEN);
2073 DEV_STATS_INC(dev, rx_length_errors);
2074 goto err;
2075 }
2076
2077 if (unlikely(vi->xdp_enabled)) {
2078 struct bpf_prog *xdp_prog;
2079
2080 rcu_read_lock();
2081 xdp_prog = rcu_dereference(rq->xdp_prog);
2082 if (xdp_prog) {
2083 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf,
2084 xdp_headroom, len, xdp_xmit,
2085 stats);
2086 rcu_read_unlock();
2087 return skb;
2088 }
2089 rcu_read_unlock();
2090 }
2091
2092 skb = receive_small_build_skb(vi, xdp_headroom, buf, len);
2093 if (likely(skb))
2094 return skb;
2095
2096err:
2097 u64_stats_inc(p: &stats->drops);
2098 put_page(page);
2099 return NULL;
2100}
2101
2102static struct sk_buff *receive_big(struct net_device *dev,
2103 struct virtnet_info *vi,
2104 struct receive_queue *rq,
2105 void *buf,
2106 unsigned int len,
2107 struct virtnet_rq_stats *stats)
2108{
2109 struct page *page = buf;
2110 struct sk_buff *skb =
2111 page_to_skb(vi, rq, page, offset: 0, len, PAGE_SIZE, headroom: 0);
2112
2113 u64_stats_add(p: &stats->bytes, val: len - vi->hdr_len);
2114 if (unlikely(!skb))
2115 goto err;
2116
2117 return skb;
2118
2119err:
2120 u64_stats_inc(p: &stats->drops);
2121 give_pages(rq, page);
2122 return NULL;
2123}
2124
2125static void mergeable_buf_free(struct receive_queue *rq, int num_buf,
2126 struct net_device *dev,
2127 struct virtnet_rq_stats *stats)
2128{
2129 struct page *page;
2130 void *buf;
2131 int len;
2132
2133 while (num_buf-- > 1) {
2134 buf = virtnet_rq_get_buf(rq, len: &len, NULL);
2135 if (unlikely(!buf)) {
2136 pr_debug("%s: rx error: %d buffers missing\n",
2137 dev->name, num_buf);
2138 DEV_STATS_INC(dev, rx_length_errors);
2139 break;
2140 }
2141 u64_stats_add(p: &stats->bytes, val: len);
2142 page = virt_to_head_page(x: buf);
2143 put_page(page);
2144 }
2145}
2146
2147/* Why not use xdp_build_skb_from_frame() ?
2148 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in
2149 * virtio-net there are 2 points that do not match its requirements:
2150 * 1. The size of the prefilled buffer is not fixed before xdp is set.
2151 * 2. xdp_build_skb_from_frame() does more checks that we don't need,
2152 * like eth_type_trans() (which virtio-net does in receive_buf()).
2153 */
2154static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev,
2155 struct virtnet_info *vi,
2156 struct xdp_buff *xdp,
2157 unsigned int xdp_frags_truesz)
2158{
2159 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
2160 unsigned int headroom, data_len;
2161 struct sk_buff *skb;
2162 int metasize;
2163 u8 nr_frags;
2164
2165 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) {
2166 pr_debug("Error building skb as missing reserved tailroom for xdp");
2167 return NULL;
2168 }
2169
2170 if (unlikely(xdp_buff_has_frags(xdp)))
2171 nr_frags = sinfo->nr_frags;
2172
2173 skb = build_skb(data: xdp->data_hard_start, frag_size: xdp->frame_sz);
2174 if (unlikely(!skb))
2175 return NULL;
2176
2177 headroom = xdp->data - xdp->data_hard_start;
2178 data_len = xdp->data_end - xdp->data;
2179 skb_reserve(skb, len: headroom);
2180 __skb_put(skb, len: data_len);
2181
2182 metasize = xdp->data - xdp->data_meta;
2183 metasize = metasize > 0 ? metasize : 0;
2184 if (metasize)
2185 skb_metadata_set(skb, meta_len: metasize);
2186
2187 if (unlikely(xdp_buff_has_frags(xdp)))
2188 xdp_update_skb_frags_info(skb, nr_frags, size: sinfo->xdp_frags_size,
2189 truesize: xdp_frags_truesz,
2190 xdp_flags: xdp_buff_get_skb_flags(xdp));
2191
2192 return skb;
2193}
2194
2195/* TODO: build xdp in big mode */
2196static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
2197 struct virtnet_info *vi,
2198 struct receive_queue *rq,
2199 struct xdp_buff *xdp,
2200 void *buf,
2201 unsigned int len,
2202 unsigned int frame_sz,
2203 int *num_buf,
2204 unsigned int *xdp_frags_truesize,
2205 struct virtnet_rq_stats *stats)
2206{
2207 struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
2208 struct skb_shared_info *shinfo;
2209 unsigned int xdp_frags_truesz = 0;
2210 unsigned int truesize;
2211 struct page *page;
2212 skb_frag_t *frag;
2213 int offset;
2214 void *ctx;
2215
2216 xdp_init_buff(xdp, frame_sz, rxq: &rq->xdp_rxq);
2217 xdp_prepare_buff(xdp, hard_start: buf - XDP_PACKET_HEADROOM,
2218 XDP_PACKET_HEADROOM + vi->hdr_len, data_len: len - vi->hdr_len, meta_valid: true);
2219
2220 if (!*num_buf)
2221 return 0;
2222
2223 if (*num_buf > 1) {
2224 /* If we want to build multi-buffer xdp, we need
2225 * to specify that the flags of xdp_buff have the
2226 * XDP_FLAGS_HAS_FRAG bit.
2227 */
2228 if (!xdp_buff_has_frags(xdp))
2229 xdp_buff_set_frags_flag(xdp);
2230
2231 shinfo = xdp_get_shared_info_from_buff(xdp);
2232 shinfo->nr_frags = 0;
2233 shinfo->xdp_frags_size = 0;
2234 }
2235
2236 if (*num_buf > MAX_SKB_FRAGS + 1)
2237 return -EINVAL;
2238
2239 while (--*num_buf > 0) {
2240 buf = virtnet_rq_get_buf(rq, len: &len, ctx: &ctx);
2241 if (unlikely(!buf)) {
2242 pr_debug("%s: rx error: %d buffers out of %d missing\n",
2243 dev->name, *num_buf,
2244 virtio16_to_cpu(vi->vdev, hdr->num_buffers));
2245 DEV_STATS_INC(dev, rx_length_errors);
2246 goto err;
2247 }
2248
2249 u64_stats_add(p: &stats->bytes, val: len);
2250 page = virt_to_head_page(x: buf);
2251 offset = buf - page_address(page);
2252
2253 if (check_mergeable_len(dev, mrg_ctx: ctx, len)) {
2254 put_page(page);
2255 goto err;
2256 }
2257
2258 truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx);
2259 xdp_frags_truesz += truesize;
2260
2261 frag = &shinfo->frags[shinfo->nr_frags++];
2262 skb_frag_fill_page_desc(frag, page, off: offset, size: len);
2263 if (page_is_pfmemalloc(page))
2264 xdp_buff_set_frag_pfmemalloc(xdp);
2265
2266 shinfo->xdp_frags_size += len;
2267 }
2268
2269 *xdp_frags_truesize = xdp_frags_truesz;
2270 return 0;
2271
2272err:
2273 put_xdp_frags(xdp);
2274 return -EINVAL;
2275}
2276
2277static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
2278 struct receive_queue *rq,
2279 struct bpf_prog *xdp_prog,
2280 void *ctx,
2281 unsigned int *frame_sz,
2282 int *num_buf,
2283 struct page **page,
2284 int offset,
2285 unsigned int *len,
2286 struct virtio_net_hdr_mrg_rxbuf *hdr)
2287{
2288 unsigned int truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx);
2289 unsigned int headroom = mergeable_ctx_to_headroom(mrg_ctx: ctx);
2290 struct page *xdp_page;
2291 unsigned int xdp_room;
2292
2293 /* Transient failure which in theory could occur if
2294 * in-flight packets from before XDP was enabled reach
2295 * the receive path after XDP is loaded.
2296 */
2297 if (unlikely(hdr->hdr.gso_type))
2298 return NULL;
2299
2300 /* Partially checksummed packets must be dropped. */
2301 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
2302 return NULL;
2303
2304 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers
2305 * with headroom may add hole in truesize, which
2306 * make their length exceed PAGE_SIZE. So we disabled the
2307 * hole mechanism for xdp. See add_recvbuf_mergeable().
2308 */
2309 *frame_sz = truesize;
2310
2311 if (likely(headroom >= virtnet_get_headroom(vi) &&
2312 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) {
2313 return page_address(*page) + offset;
2314 }
2315
2316 /* This happens when headroom is not enough because
2317 * of the buffer was prefilled before XDP is set.
2318 * This should only happen for the first several packets.
2319 * In fact, vq reset can be used here to help us clean up
2320 * the prefilled buffers, but many existing devices do not
2321 * support it, and we don't want to bother users who are
2322 * using xdp normally.
2323 */
2324 if (!xdp_prog->aux->xdp_has_frags) {
2325 /* linearize data for XDP */
2326 xdp_page = xdp_linearize_page(dev: vi->dev, rq, num_buf,
2327 p: *page, offset,
2328 XDP_PACKET_HEADROOM,
2329 len);
2330 if (!xdp_page)
2331 return NULL;
2332 } else {
2333 xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM +
2334 sizeof(struct skb_shared_info));
2335 if (*len + xdp_room > PAGE_SIZE)
2336 return NULL;
2337
2338 xdp_page = alloc_page(GFP_ATOMIC);
2339 if (!xdp_page)
2340 return NULL;
2341
2342 memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM,
2343 page_address(*page) + offset, len: *len);
2344 }
2345
2346 *frame_sz = PAGE_SIZE;
2347
2348 put_page(page: *page);
2349
2350 *page = xdp_page;
2351
2352 return page_address(*page) + XDP_PACKET_HEADROOM;
2353}
2354
2355static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
2356 struct virtnet_info *vi,
2357 struct receive_queue *rq,
2358 struct bpf_prog *xdp_prog,
2359 void *buf,
2360 void *ctx,
2361 unsigned int len,
2362 unsigned int *xdp_xmit,
2363 struct virtnet_rq_stats *stats)
2364{
2365 struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
2366 int num_buf = virtio16_to_cpu(vdev: vi->vdev, val: hdr->num_buffers);
2367 struct page *page = virt_to_head_page(x: buf);
2368 int offset = buf - page_address(page);
2369 unsigned int xdp_frags_truesz = 0;
2370 struct sk_buff *head_skb;
2371 unsigned int frame_sz;
2372 struct xdp_buff xdp;
2373 void *data;
2374 u32 act;
2375 int err;
2376
2377 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, frame_sz: &frame_sz, num_buf: &num_buf, page: &page,
2378 offset, len: &len, hdr);
2379 if (unlikely(!data))
2380 goto err_xdp;
2381
2382 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, xdp: &xdp, buf: data, len, frame_sz,
2383 num_buf: &num_buf, xdp_frags_truesize: &xdp_frags_truesz, stats);
2384 if (unlikely(err))
2385 goto err_xdp;
2386
2387 act = virtnet_xdp_handler(xdp_prog, xdp: &xdp, dev, xdp_xmit, stats);
2388
2389 switch (act) {
2390 case XDP_PASS:
2391 head_skb = build_skb_from_xdp_buff(dev, vi, xdp: &xdp, xdp_frags_truesz);
2392 if (unlikely(!head_skb))
2393 break;
2394 return head_skb;
2395
2396 case XDP_TX:
2397 case XDP_REDIRECT:
2398 return NULL;
2399
2400 default:
2401 break;
2402 }
2403
2404 put_xdp_frags(xdp: &xdp);
2405
2406err_xdp:
2407 put_page(page);
2408 mergeable_buf_free(rq, num_buf, dev, stats);
2409
2410 u64_stats_inc(p: &stats->xdp_drops);
2411 u64_stats_inc(p: &stats->drops);
2412 return NULL;
2413}
2414
2415static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb,
2416 struct sk_buff *curr_skb,
2417 struct page *page, void *buf,
2418 int len, int truesize)
2419{
2420 int num_skb_frags;
2421 int offset;
2422
2423 num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
2424 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
2425 struct sk_buff *nskb = alloc_skb(size: 0, GFP_ATOMIC);
2426
2427 if (unlikely(!nskb))
2428 return NULL;
2429
2430 if (curr_skb == head_skb)
2431 skb_shinfo(curr_skb)->frag_list = nskb;
2432 else
2433 curr_skb->next = nskb;
2434 curr_skb = nskb;
2435 head_skb->truesize += nskb->truesize;
2436 num_skb_frags = 0;
2437 }
2438
2439 if (curr_skb != head_skb) {
2440 head_skb->data_len += len;
2441 head_skb->len += len;
2442 head_skb->truesize += truesize;
2443 }
2444
2445 offset = buf - page_address(page);
2446 if (skb_can_coalesce(skb: curr_skb, i: num_skb_frags, page, off: offset)) {
2447 put_page(page);
2448 skb_coalesce_rx_frag(skb: curr_skb, i: num_skb_frags - 1,
2449 size: len, truesize);
2450 } else {
2451 skb_add_rx_frag(skb: curr_skb, i: num_skb_frags, page,
2452 off: offset, size: len, truesize);
2453 }
2454
2455 return curr_skb;
2456}
2457
2458static struct sk_buff *receive_mergeable(struct net_device *dev,
2459 struct virtnet_info *vi,
2460 struct receive_queue *rq,
2461 void *buf,
2462 void *ctx,
2463 unsigned int len,
2464 unsigned int *xdp_xmit,
2465 struct virtnet_rq_stats *stats)
2466{
2467 struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
2468 int num_buf = virtio16_to_cpu(vdev: vi->vdev, val: hdr->num_buffers);
2469 struct page *page = virt_to_head_page(x: buf);
2470 int offset = buf - page_address(page);
2471 struct sk_buff *head_skb, *curr_skb;
2472 unsigned int truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx);
2473 unsigned int headroom = mergeable_ctx_to_headroom(mrg_ctx: ctx);
2474
2475 head_skb = NULL;
2476 u64_stats_add(p: &stats->bytes, val: len - vi->hdr_len);
2477
2478 if (check_mergeable_len(dev, mrg_ctx: ctx, len))
2479 goto err_skb;
2480
2481 if (unlikely(vi->xdp_enabled)) {
2482 struct bpf_prog *xdp_prog;
2483
2484 rcu_read_lock();
2485 xdp_prog = rcu_dereference(rq->xdp_prog);
2486 if (xdp_prog) {
2487 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx,
2488 len, xdp_xmit, stats);
2489 rcu_read_unlock();
2490 return head_skb;
2491 }
2492 rcu_read_unlock();
2493 }
2494
2495 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom);
2496 curr_skb = head_skb;
2497
2498 if (unlikely(!curr_skb))
2499 goto err_skb;
2500 while (--num_buf) {
2501 buf = virtnet_rq_get_buf(rq, len: &len, ctx: &ctx);
2502 if (unlikely(!buf)) {
2503 pr_debug("%s: rx error: %d buffers out of %d missing\n",
2504 dev->name, num_buf,
2505 virtio16_to_cpu(vi->vdev,
2506 hdr->num_buffers));
2507 DEV_STATS_INC(dev, rx_length_errors);
2508 goto err_buf;
2509 }
2510
2511 u64_stats_add(p: &stats->bytes, val: len);
2512 page = virt_to_head_page(x: buf);
2513
2514 if (check_mergeable_len(dev, mrg_ctx: ctx, len))
2515 goto err_skb;
2516
2517 truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx);
2518 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page,
2519 buf, len, truesize);
2520 if (!curr_skb)
2521 goto err_skb;
2522 }
2523
2524 ewma_pkt_len_add(e: &rq->mrg_avg_pkt_len, val: head_skb->len);
2525 return head_skb;
2526
2527err_skb:
2528 put_page(page);
2529 mergeable_buf_free(rq, num_buf, dev, stats);
2530
2531err_buf:
2532 u64_stats_inc(p: &stats->drops);
2533 dev_kfree_skb(head_skb);
2534 return NULL;
2535}
2536
2537static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash,
2538 struct sk_buff *skb)
2539{
2540 enum pkt_hash_types rss_hash_type;
2541
2542 if (!hdr_hash || !skb)
2543 return;
2544
2545 switch (__le16_to_cpu(hdr_hash->hash_report)) {
2546 case VIRTIO_NET_HASH_REPORT_TCPv4:
2547 case VIRTIO_NET_HASH_REPORT_UDPv4:
2548 case VIRTIO_NET_HASH_REPORT_TCPv6:
2549 case VIRTIO_NET_HASH_REPORT_UDPv6:
2550 case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
2551 case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
2552 rss_hash_type = PKT_HASH_TYPE_L4;
2553 break;
2554 case VIRTIO_NET_HASH_REPORT_IPv4:
2555 case VIRTIO_NET_HASH_REPORT_IPv6:
2556 case VIRTIO_NET_HASH_REPORT_IPv6_EX:
2557 rss_hash_type = PKT_HASH_TYPE_L3;
2558 break;
2559 case VIRTIO_NET_HASH_REPORT_NONE:
2560 default:
2561 rss_hash_type = PKT_HASH_TYPE_NONE;
2562 }
2563 skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), type: rss_hash_type);
2564}
2565
2566static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq,
2567 struct sk_buff *skb, u8 flags)
2568{
2569 struct virtio_net_common_hdr *hdr;
2570 struct net_device *dev = vi->dev;
2571
2572 hdr = skb_vnet_common_hdr(skb);
2573 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report)
2574 virtio_skb_set_hash(hdr_hash: &hdr->hash_v1_hdr, skb);
2575
2576 hdr->hdr.flags = flags;
2577 if (virtio_net_handle_csum_offload(skb, hdr: &hdr->hdr, tnl_csum_negotiated: vi->rx_tnl_csum)) {
2578 net_warn_ratelimited("%s: bad csum: flags: %x, gso_type: %x rx_tnl_csum %d\n",
2579 dev->name, hdr->hdr.flags,
2580 hdr->hdr.gso_type, vi->rx_tnl_csum);
2581 goto frame_err;
2582 }
2583
2584 if (virtio_net_hdr_tnl_to_skb(skb, vhdr: &hdr->tnl_hdr, tnl_hdr_negotiated: vi->rx_tnl,
2585 tnl_csum_negotiated: vi->rx_tnl_csum,
2586 little_endian: virtio_is_little_endian(vdev: vi->vdev))) {
2587 net_warn_ratelimited("%s: bad gso: type: %x, size: %u, flags %x tunnel %d tnl csum %d\n",
2588 dev->name, hdr->hdr.gso_type,
2589 hdr->hdr.gso_size, hdr->hdr.flags,
2590 vi->rx_tnl, vi->rx_tnl_csum);
2591 goto frame_err;
2592 }
2593
2594 skb_record_rx_queue(skb, rx_queue: vq2rxq(vq: rq->vq));
2595 skb->protocol = eth_type_trans(skb, dev);
2596 pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
2597 ntohs(skb->protocol), skb->len, skb->pkt_type);
2598
2599 napi_gro_receive(napi: &rq->napi, skb);
2600 return;
2601
2602frame_err:
2603 DEV_STATS_INC(dev, rx_frame_errors);
2604 dev_kfree_skb(skb);
2605}
2606
2607static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
2608 void *buf, unsigned int len, void **ctx,
2609 unsigned int *xdp_xmit,
2610 struct virtnet_rq_stats *stats)
2611{
2612 struct net_device *dev = vi->dev;
2613 struct sk_buff *skb;
2614 u8 flags;
2615
2616 if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
2617 pr_debug("%s: short packet %i\n", dev->name, len);
2618 DEV_STATS_INC(dev, rx_length_errors);
2619 virtnet_rq_free_buf(vi, rq, buf);
2620 return;
2621 }
2622
2623 /* 1. Save the flags early, as the XDP program might overwrite them.
2624 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID
2625 * stay valid after XDP processing.
2626 * 2. XDP doesn't work with partially checksummed packets (refer to
2627 * virtnet_xdp_set()), so packets marked as
2628 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing.
2629 */
2630 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags;
2631
2632 if (vi->mergeable_rx_bufs)
2633 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit,
2634 stats);
2635 else if (vi->big_packets)
2636 skb = receive_big(dev, vi, rq, buf, len, stats);
2637 else
2638 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats);
2639
2640 if (unlikely(!skb))
2641 return;
2642
2643 virtnet_receive_done(vi, rq, skb, flags);
2644}
2645
2646/* Unlike mergeable buffers, all buffers are allocated to the
2647 * same size, except for the headroom. For this reason we do
2648 * not need to use mergeable_len_to_ctx here - it is enough
2649 * to store the headroom as the context ignoring the truesize.
2650 */
2651static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
2652 gfp_t gfp)
2653{
2654 char *buf;
2655 unsigned int xdp_headroom = virtnet_get_headroom(vi);
2656 void *ctx = (void *)(unsigned long)xdp_headroom;
2657 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
2658 int err;
2659
2660 len = SKB_DATA_ALIGN(len) +
2661 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
2662
2663 if (unlikely(!skb_page_frag_refill(len, &rq->alloc_frag, gfp)))
2664 return -ENOMEM;
2665
2666 buf = virtnet_rq_alloc(rq, size: len, gfp);
2667 if (unlikely(!buf))
2668 return -ENOMEM;
2669
2670 buf += VIRTNET_RX_PAD + xdp_headroom;
2671
2672 virtnet_rq_init_one_sg(rq, buf, len: vi->hdr_len + GOOD_PACKET_LEN);
2673
2674 err = virtqueue_add_inbuf_premapped(vq: rq->vq, sg: rq->sg, num: 1, data: buf, ctx, gfp);
2675 if (err < 0) {
2676 virtnet_rq_unmap(rq, buf, len: 0);
2677 put_page(page: virt_to_head_page(x: buf));
2678 }
2679
2680 return err;
2681}
2682
2683static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
2684 gfp_t gfp)
2685{
2686 struct page *first, *list = NULL;
2687 char *p;
2688 int i, err, offset;
2689
2690 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2);
2691
2692 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */
2693 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) {
2694 first = get_a_page(rq, gfp_mask: gfp);
2695 if (!first) {
2696 if (list)
2697 give_pages(rq, page: list);
2698 return -ENOMEM;
2699 }
2700 sg_set_buf(sg: &rq->sg[i], page_address(first), PAGE_SIZE);
2701
2702 /* chain new page in list head to match sg */
2703 first->private = (unsigned long)list;
2704 list = first;
2705 }
2706
2707 first = get_a_page(rq, gfp_mask: gfp);
2708 if (!first) {
2709 give_pages(rq, page: list);
2710 return -ENOMEM;
2711 }
2712 p = page_address(first);
2713
2714 /* rq->sg[0], rq->sg[1] share the same page */
2715 /* a separated rq->sg[0] for header - required in case !any_header_sg */
2716 sg_set_buf(sg: &rq->sg[0], buf: p, buflen: vi->hdr_len);
2717
2718 /* rq->sg[1] for data packet, from offset */
2719 offset = sizeof(struct padded_vnet_hdr);
2720 sg_set_buf(sg: &rq->sg[1], buf: p + offset, PAGE_SIZE - offset);
2721
2722 /* chain first in list head */
2723 first->private = (unsigned long)list;
2724 err = virtqueue_add_inbuf(vq: rq->vq, sg: rq->sg, num: vi->big_packets_num_skbfrags + 2,
2725 data: first, gfp);
2726 if (err < 0)
2727 give_pages(rq, page: first);
2728
2729 return err;
2730}
2731
2732static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
2733 struct ewma_pkt_len *avg_pkt_len,
2734 unsigned int room)
2735{
2736 struct virtnet_info *vi = rq->vq->vdev->priv;
2737 const size_t hdr_len = vi->hdr_len;
2738 unsigned int len;
2739
2740 if (room)
2741 return PAGE_SIZE - room;
2742
2743 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
2744 rq->min_buf_len, PAGE_SIZE - hdr_len);
2745
2746 return ALIGN(len, L1_CACHE_BYTES);
2747}
2748
2749static int add_recvbuf_mergeable(struct virtnet_info *vi,
2750 struct receive_queue *rq, gfp_t gfp)
2751{
2752 struct page_frag *alloc_frag = &rq->alloc_frag;
2753 unsigned int headroom = virtnet_get_headroom(vi);
2754 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
2755 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
2756 unsigned int len, hole;
2757 void *ctx;
2758 char *buf;
2759 int err;
2760
2761 /* Extra tailroom is needed to satisfy XDP's assumption. This
2762 * means rx frags coalescing won't work, but consider we've
2763 * disabled GSO for XDP, it won't be a big issue.
2764 */
2765 len = get_mergeable_buf_len(rq, avg_pkt_len: &rq->mrg_avg_pkt_len, room);
2766
2767 if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
2768 return -ENOMEM;
2769
2770 if (!alloc_frag->offset && len + room + sizeof(struct virtnet_rq_dma) > alloc_frag->size)
2771 len -= sizeof(struct virtnet_rq_dma);
2772
2773 buf = virtnet_rq_alloc(rq, size: len + room, gfp);
2774 if (unlikely(!buf))
2775 return -ENOMEM;
2776
2777 buf += headroom; /* advance address leaving hole at front of pkt */
2778 hole = alloc_frag->size - alloc_frag->offset;
2779 if (hole < len + room) {
2780 /* To avoid internal fragmentation, if there is very likely not
2781 * enough space for another buffer, add the remaining space to
2782 * the current buffer.
2783 * XDP core assumes that frame_size of xdp_buff and the length
2784 * of the frag are PAGE_SIZE, so we disable the hole mechanism.
2785 */
2786 if (!headroom)
2787 len += hole;
2788 alloc_frag->offset += hole;
2789 }
2790
2791 virtnet_rq_init_one_sg(rq, buf, len);
2792
2793 ctx = mergeable_len_to_ctx(truesize: len + room, headroom);
2794 err = virtqueue_add_inbuf_premapped(vq: rq->vq, sg: rq->sg, num: 1, data: buf, ctx, gfp);
2795 if (err < 0) {
2796 virtnet_rq_unmap(rq, buf, len: 0);
2797 put_page(page: virt_to_head_page(x: buf));
2798 }
2799
2800 return err;
2801}
2802
2803/*
2804 * Returns false if we couldn't fill entirely (OOM).
2805 *
2806 * Normally run in the receive path, but can also be run from ndo_open
2807 * before we're receiving packets, or from refill_work which is
2808 * careful to disable receiving (using napi_disable).
2809 */
2810static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
2811 gfp_t gfp)
2812{
2813 int err;
2814
2815 if (rq->xsk_pool) {
2816 err = virtnet_add_recvbuf_xsk(vi, rq, pool: rq->xsk_pool, gfp);
2817 goto kick;
2818 }
2819
2820 do {
2821 if (vi->mergeable_rx_bufs)
2822 err = add_recvbuf_mergeable(vi, rq, gfp);
2823 else if (vi->big_packets)
2824 err = add_recvbuf_big(vi, rq, gfp);
2825 else
2826 err = add_recvbuf_small(vi, rq, gfp);
2827
2828 if (err)
2829 break;
2830 } while (rq->vq->num_free);
2831
2832kick:
2833 if (virtqueue_kick_prepare(vq: rq->vq) && virtqueue_notify(vq: rq->vq)) {
2834 unsigned long flags;
2835
2836 flags = u64_stats_update_begin_irqsave(syncp: &rq->stats.syncp);
2837 u64_stats_inc(p: &rq->stats.kicks);
2838 u64_stats_update_end_irqrestore(syncp: &rq->stats.syncp, flags);
2839 }
2840
2841 return err != -ENOMEM;
2842}
2843
2844static void skb_recv_done(struct virtqueue *rvq)
2845{
2846 struct virtnet_info *vi = rvq->vdev->priv;
2847 struct receive_queue *rq = &vi->rq[vq2rxq(vq: rvq)];
2848
2849 rq->calls++;
2850 virtqueue_napi_schedule(napi: &rq->napi, vq: rvq);
2851}
2852
2853static void virtnet_napi_do_enable(struct virtqueue *vq,
2854 struct napi_struct *napi)
2855{
2856 napi_enable(n: napi);
2857
2858 /* If all buffers were filled by other side before we napi_enabled, we
2859 * won't get another interrupt, so process any outstanding packets now.
2860 * Call local_bh_enable after to trigger softIRQ processing.
2861 */
2862 local_bh_disable();
2863 virtqueue_napi_schedule(napi, vq);
2864 local_bh_enable();
2865}
2866
2867static void virtnet_napi_enable(struct receive_queue *rq)
2868{
2869 struct virtnet_info *vi = rq->vq->vdev->priv;
2870 int qidx = vq2rxq(vq: rq->vq);
2871
2872 virtnet_napi_do_enable(vq: rq->vq, napi: &rq->napi);
2873 netif_queue_set_napi(dev: vi->dev, queue_index: qidx, type: NETDEV_QUEUE_TYPE_RX, napi: &rq->napi);
2874}
2875
2876static void virtnet_napi_tx_enable(struct send_queue *sq)
2877{
2878 struct virtnet_info *vi = sq->vq->vdev->priv;
2879 struct napi_struct *napi = &sq->napi;
2880 int qidx = vq2txq(vq: sq->vq);
2881
2882 if (!napi->weight)
2883 return;
2884
2885 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only
2886 * enable the feature if this is likely affine with the transmit path.
2887 */
2888 if (!vi->affinity_hint_set) {
2889 napi->weight = 0;
2890 return;
2891 }
2892
2893 virtnet_napi_do_enable(vq: sq->vq, napi);
2894 netif_queue_set_napi(dev: vi->dev, queue_index: qidx, type: NETDEV_QUEUE_TYPE_TX, napi);
2895}
2896
2897static void virtnet_napi_tx_disable(struct send_queue *sq)
2898{
2899 struct virtnet_info *vi = sq->vq->vdev->priv;
2900 struct napi_struct *napi = &sq->napi;
2901 int qidx = vq2txq(vq: sq->vq);
2902
2903 if (napi->weight) {
2904 netif_queue_set_napi(dev: vi->dev, queue_index: qidx, type: NETDEV_QUEUE_TYPE_TX, NULL);
2905 napi_disable(n: napi);
2906 }
2907}
2908
2909static void virtnet_napi_disable(struct receive_queue *rq)
2910{
2911 struct virtnet_info *vi = rq->vq->vdev->priv;
2912 struct napi_struct *napi = &rq->napi;
2913 int qidx = vq2rxq(vq: rq->vq);
2914
2915 netif_queue_set_napi(dev: vi->dev, queue_index: qidx, type: NETDEV_QUEUE_TYPE_RX, NULL);
2916 napi_disable(n: napi);
2917}
2918
2919static void refill_work(struct work_struct *work)
2920{
2921 struct virtnet_info *vi =
2922 container_of(work, struct virtnet_info, refill.work);
2923 bool still_empty;
2924 int i;
2925
2926 for (i = 0; i < vi->curr_queue_pairs; i++) {
2927 struct receive_queue *rq = &vi->rq[i];
2928
2929 /*
2930 * When queue API support is added in the future and the call
2931 * below becomes napi_disable_locked, this driver will need to
2932 * be refactored.
2933 *
2934 * One possible solution would be to:
2935 * - cancel refill_work with cancel_delayed_work (note:
2936 * non-sync)
2937 * - cancel refill_work with cancel_delayed_work_sync in
2938 * virtnet_remove after the netdev is unregistered
2939 * - wrap all of the work in a lock (perhaps the netdev
2940 * instance lock)
2941 * - check netif_running() and return early to avoid a race
2942 */
2943 napi_disable(n: &rq->napi);
2944 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
2945 virtnet_napi_do_enable(vq: rq->vq, napi: &rq->napi);
2946
2947 /* In theory, this can happen: if we don't get any buffers in
2948 * we will *never* try to fill again.
2949 */
2950 if (still_empty)
2951 schedule_delayed_work(dwork: &vi->refill, HZ/2);
2952 }
2953}
2954
2955static int virtnet_receive_xsk_bufs(struct virtnet_info *vi,
2956 struct receive_queue *rq,
2957 int budget,
2958 unsigned int *xdp_xmit,
2959 struct virtnet_rq_stats *stats)
2960{
2961 unsigned int len;
2962 int packets = 0;
2963 void *buf;
2964
2965 while (packets < budget) {
2966 buf = virtqueue_get_buf(vq: rq->vq, len: &len);
2967 if (!buf)
2968 break;
2969
2970 virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats);
2971 packets++;
2972 }
2973
2974 return packets;
2975}
2976
2977static int virtnet_receive_packets(struct virtnet_info *vi,
2978 struct receive_queue *rq,
2979 int budget,
2980 unsigned int *xdp_xmit,
2981 struct virtnet_rq_stats *stats)
2982{
2983 unsigned int len;
2984 int packets = 0;
2985 void *buf;
2986
2987 if (!vi->big_packets || vi->mergeable_rx_bufs) {
2988 void *ctx;
2989 while (packets < budget &&
2990 (buf = virtnet_rq_get_buf(rq, len: &len, ctx: &ctx))) {
2991 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats);
2992 packets++;
2993 }
2994 } else {
2995 while (packets < budget &&
2996 (buf = virtqueue_get_buf(vq: rq->vq, len: &len)) != NULL) {
2997 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats);
2998 packets++;
2999 }
3000 }
3001
3002 return packets;
3003}
3004
3005static int virtnet_receive(struct receive_queue *rq, int budget,
3006 unsigned int *xdp_xmit)
3007{
3008 struct virtnet_info *vi = rq->vq->vdev->priv;
3009 struct virtnet_rq_stats stats = {};
3010 int i, packets;
3011
3012 if (rq->xsk_pool)
3013 packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, stats: &stats);
3014 else
3015 packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, stats: &stats);
3016
3017 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
3018 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
3019 spin_lock(lock: &vi->refill_lock);
3020 if (vi->refill_enabled)
3021 schedule_delayed_work(dwork: &vi->refill, delay: 0);
3022 spin_unlock(lock: &vi->refill_lock);
3023 }
3024 }
3025
3026 u64_stats_set(p: &stats.packets, val: packets);
3027 u64_stats_update_begin(syncp: &rq->stats.syncp);
3028 for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) {
3029 size_t offset = virtnet_rq_stats_desc[i].offset;
3030 u64_stats_t *item, *src;
3031
3032 item = (u64_stats_t *)((u8 *)&rq->stats + offset);
3033 src = (u64_stats_t *)((u8 *)&stats + offset);
3034 u64_stats_add(p: item, val: u64_stats_read(p: src));
3035 }
3036
3037 u64_stats_add(p: &rq->stats.packets, val: u64_stats_read(p: &stats.packets));
3038 u64_stats_add(p: &rq->stats.bytes, val: u64_stats_read(p: &stats.bytes));
3039
3040 u64_stats_update_end(syncp: &rq->stats.syncp);
3041
3042 return packets;
3043}
3044
3045static void virtnet_poll_cleantx(struct receive_queue *rq, int budget)
3046{
3047 struct virtnet_info *vi = rq->vq->vdev->priv;
3048 unsigned int index = vq2rxq(vq: rq->vq);
3049 struct send_queue *sq = &vi->sq[index];
3050 struct netdev_queue *txq = netdev_get_tx_queue(dev: vi->dev, index);
3051
3052 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, q: index))
3053 return;
3054
3055 if (__netif_tx_trylock(txq)) {
3056 if (sq->reset) {
3057 __netif_tx_unlock(txq);
3058 return;
3059 }
3060
3061 do {
3062 virtqueue_disable_cb(vq: sq->vq);
3063 free_old_xmit(sq, txq, in_napi: !!budget);
3064 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
3065
3066 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2 &&
3067 netif_tx_queue_stopped(dev_queue: txq)) {
3068 u64_stats_update_begin(syncp: &sq->stats.syncp);
3069 u64_stats_inc(p: &sq->stats.wake);
3070 u64_stats_update_end(syncp: &sq->stats.syncp);
3071 netif_tx_wake_queue(dev_queue: txq);
3072 }
3073
3074 __netif_tx_unlock(txq);
3075 }
3076}
3077
3078static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq)
3079{
3080 struct dim_sample cur_sample = {};
3081
3082 if (!rq->packets_in_napi)
3083 return;
3084
3085 /* Don't need protection when fetching stats, since fetcher and
3086 * updater of the stats are in same context
3087 */
3088 dim_update_sample(event_ctr: rq->calls,
3089 packets: u64_stats_read(p: &rq->stats.packets),
3090 bytes: u64_stats_read(p: &rq->stats.bytes),
3091 s: &cur_sample);
3092
3093 net_dim(dim: &rq->dim, end_sample: &cur_sample);
3094 rq->packets_in_napi = 0;
3095}
3096
3097static int virtnet_poll(struct napi_struct *napi, int budget)
3098{
3099 struct receive_queue *rq =
3100 container_of(napi, struct receive_queue, napi);
3101 struct virtnet_info *vi = rq->vq->vdev->priv;
3102 struct send_queue *sq;
3103 unsigned int received;
3104 unsigned int xdp_xmit = 0;
3105 bool napi_complete;
3106
3107 virtnet_poll_cleantx(rq, budget);
3108
3109 received = virtnet_receive(rq, budget, xdp_xmit: &xdp_xmit);
3110 rq->packets_in_napi += received;
3111
3112 if (xdp_xmit & VIRTIO_XDP_REDIR)
3113 xdp_do_flush();
3114
3115 /* Out of packets? */
3116 if (received < budget) {
3117 napi_complete = virtqueue_napi_complete(napi, vq: rq->vq, processed: received);
3118 /* Intentionally not taking dim_lock here. This may result in a
3119 * spurious net_dim call. But if that happens virtnet_rx_dim_work
3120 * will not act on the scheduled work.
3121 */
3122 if (napi_complete && rq->dim_enabled)
3123 virtnet_rx_dim_update(vi, rq);
3124 }
3125
3126 if (xdp_xmit & VIRTIO_XDP_TX) {
3127 sq = virtnet_xdp_get_sq(vi);
3128 if (virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq)) {
3129 u64_stats_update_begin(syncp: &sq->stats.syncp);
3130 u64_stats_inc(p: &sq->stats.kicks);
3131 u64_stats_update_end(syncp: &sq->stats.syncp);
3132 }
3133 virtnet_xdp_put_sq(vi, sq);
3134 }
3135
3136 return received;
3137}
3138
3139static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
3140{
3141 virtnet_napi_tx_disable(sq: &vi->sq[qp_index]);
3142 virtnet_napi_disable(rq: &vi->rq[qp_index]);
3143 xdp_rxq_info_unreg(xdp_rxq: &vi->rq[qp_index].xdp_rxq);
3144}
3145
3146static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
3147{
3148 struct net_device *dev = vi->dev;
3149 int err;
3150
3151 err = xdp_rxq_info_reg(xdp_rxq: &vi->rq[qp_index].xdp_rxq, dev, queue_index: qp_index,
3152 napi_id: vi->rq[qp_index].napi.napi_id);
3153 if (err < 0)
3154 return err;
3155
3156 err = xdp_rxq_info_reg_mem_model(xdp_rxq: &vi->rq[qp_index].xdp_rxq,
3157 type: MEM_TYPE_PAGE_SHARED, NULL);
3158 if (err < 0)
3159 goto err_xdp_reg_mem_model;
3160
3161 virtnet_napi_enable(rq: &vi->rq[qp_index]);
3162 virtnet_napi_tx_enable(sq: &vi->sq[qp_index]);
3163
3164 return 0;
3165
3166err_xdp_reg_mem_model:
3167 xdp_rxq_info_unreg(xdp_rxq: &vi->rq[qp_index].xdp_rxq);
3168 return err;
3169}
3170
3171static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim)
3172{
3173 if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
3174 return;
3175 net_dim_work_cancel(dim);
3176}
3177
3178static void virtnet_update_settings(struct virtnet_info *vi)
3179{
3180 u32 speed;
3181 u8 duplex;
3182
3183 if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX))
3184 return;
3185
3186 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed);
3187
3188 if (ethtool_validate_speed(speed))
3189 vi->speed = speed;
3190
3191 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex);
3192
3193 if (ethtool_validate_duplex(duplex))
3194 vi->duplex = duplex;
3195}
3196
3197static int virtnet_open(struct net_device *dev)
3198{
3199 struct virtnet_info *vi = netdev_priv(dev);
3200 int i, err;
3201
3202 enable_delayed_refill(vi);
3203
3204 for (i = 0; i < vi->max_queue_pairs; i++) {
3205 if (i < vi->curr_queue_pairs)
3206 /* Make sure we have some buffers: if oom use wq. */
3207 if (!try_fill_recv(vi, rq: &vi->rq[i], GFP_KERNEL))
3208 schedule_delayed_work(dwork: &vi->refill, delay: 0);
3209
3210 err = virtnet_enable_queue_pair(vi, qp_index: i);
3211 if (err < 0)
3212 goto err_enable_qp;
3213 }
3214
3215 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STATUS)) {
3216 if (vi->status & VIRTIO_NET_S_LINK_UP)
3217 netif_carrier_on(dev: vi->dev);
3218 virtio_config_driver_enable(dev: vi->vdev);
3219 } else {
3220 vi->status = VIRTIO_NET_S_LINK_UP;
3221 netif_carrier_on(dev);
3222 }
3223
3224 return 0;
3225
3226err_enable_qp:
3227 disable_delayed_refill(vi);
3228 cancel_delayed_work_sync(dwork: &vi->refill);
3229
3230 for (i--; i >= 0; i--) {
3231 virtnet_disable_queue_pair(vi, qp_index: i);
3232 virtnet_cancel_dim(vi, dim: &vi->rq[i].dim);
3233 }
3234
3235 return err;
3236}
3237
3238static int virtnet_poll_tx(struct napi_struct *napi, int budget)
3239{
3240 struct send_queue *sq = container_of(napi, struct send_queue, napi);
3241 struct virtnet_info *vi = sq->vq->vdev->priv;
3242 unsigned int index = vq2txq(vq: sq->vq);
3243 struct netdev_queue *txq;
3244 int opaque, xsk_done = 0;
3245 bool done;
3246
3247 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
3248 /* We don't need to enable cb for XDP */
3249 napi_complete_done(n: napi, work_done: 0);
3250 return 0;
3251 }
3252
3253 txq = netdev_get_tx_queue(dev: vi->dev, index);
3254 __netif_tx_lock(txq, raw_smp_processor_id());
3255 virtqueue_disable_cb(vq: sq->vq);
3256
3257 if (sq->xsk_pool)
3258 xsk_done = virtnet_xsk_xmit(sq, pool: sq->xsk_pool, budget);
3259 else
3260 free_old_xmit(sq, txq, in_napi: !!budget);
3261
3262 if (sq->vq->num_free >= MAX_SKB_FRAGS + 2 &&
3263 netif_tx_queue_stopped(dev_queue: txq)) {
3264 u64_stats_update_begin(syncp: &sq->stats.syncp);
3265 u64_stats_inc(p: &sq->stats.wake);
3266 u64_stats_update_end(syncp: &sq->stats.syncp);
3267 netif_tx_wake_queue(dev_queue: txq);
3268 }
3269
3270 if (xsk_done >= budget) {
3271 __netif_tx_unlock(txq);
3272 return budget;
3273 }
3274
3275 opaque = virtqueue_enable_cb_prepare(vq: sq->vq);
3276
3277 done = napi_complete_done(n: napi, work_done: 0);
3278
3279 if (!done)
3280 virtqueue_disable_cb(vq: sq->vq);
3281
3282 __netif_tx_unlock(txq);
3283
3284 if (done) {
3285 if (unlikely(virtqueue_poll(sq->vq, opaque))) {
3286 if (napi_schedule_prep(n: napi)) {
3287 __netif_tx_lock(txq, raw_smp_processor_id());
3288 virtqueue_disable_cb(vq: sq->vq);
3289 __netif_tx_unlock(txq);
3290 __napi_schedule(n: napi);
3291 }
3292 }
3293 }
3294
3295 return 0;
3296}
3297
3298static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan)
3299{
3300 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
3301 struct virtnet_info *vi = sq->vq->vdev->priv;
3302 struct virtio_net_hdr_v1_hash_tunnel *hdr;
3303 int num_sg;
3304 unsigned hdr_len = vi->hdr_len;
3305 bool can_push;
3306
3307 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
3308
3309 can_push = vi->any_header_sg &&
3310 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
3311 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
3312 /* Even if we can, don't push here yet as this would skew
3313 * csum_start offset below. */
3314 if (can_push)
3315 hdr = (struct virtio_net_hdr_v1_hash_tunnel *)(skb->data -
3316 hdr_len);
3317 else
3318 hdr = &skb_vnet_common_hdr(skb)->tnl_hdr;
3319
3320 if (virtio_net_hdr_tnl_from_skb(skb, vhdr: hdr, tnl_hdr_negotiated: vi->tx_tnl,
3321 little_endian: virtio_is_little_endian(vdev: vi->vdev), vlan_hlen: 0))
3322 return -EPROTO;
3323
3324 if (vi->mergeable_rx_bufs)
3325 hdr->hash_hdr.hdr.num_buffers = 0;
3326
3327 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2));
3328 if (can_push) {
3329 __skb_push(skb, len: hdr_len);
3330 num_sg = skb_to_sgvec(skb, sg: sq->sg, offset: 0, len: skb->len);
3331 if (unlikely(num_sg < 0))
3332 return num_sg;
3333 /* Pull header back to avoid skew in tx bytes calculations. */
3334 __skb_pull(skb, len: hdr_len);
3335 } else {
3336 sg_set_buf(sg: sq->sg, buf: hdr, buflen: hdr_len);
3337 num_sg = skb_to_sgvec(skb, sg: sq->sg + 1, offset: 0, len: skb->len);
3338 if (unlikely(num_sg < 0))
3339 return num_sg;
3340 num_sg++;
3341 }
3342
3343 return virtnet_add_outbuf(sq, num: num_sg, data: skb,
3344 type: orphan ? VIRTNET_XMIT_TYPE_SKB_ORPHAN : VIRTNET_XMIT_TYPE_SKB);
3345}
3346
3347static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
3348{
3349 struct virtnet_info *vi = netdev_priv(dev);
3350 int qnum = skb_get_queue_mapping(skb);
3351 struct send_queue *sq = &vi->sq[qnum];
3352 int err;
3353 struct netdev_queue *txq = netdev_get_tx_queue(dev, index: qnum);
3354 bool xmit_more = netdev_xmit_more();
3355 bool use_napi = sq->napi.weight;
3356 bool kick;
3357
3358 if (!use_napi)
3359 free_old_xmit(sq, txq, in_napi: false);
3360 else
3361 virtqueue_disable_cb(vq: sq->vq);
3362
3363 /* timestamp packet in software */
3364 skb_tx_timestamp(skb);
3365
3366 /* Try to transmit */
3367 err = xmit_skb(sq, skb, orphan: !use_napi);
3368
3369 /* This should not happen! */
3370 if (unlikely(err)) {
3371 DEV_STATS_INC(dev, tx_fifo_errors);
3372 if (net_ratelimit())
3373 dev_warn(&dev->dev,
3374 "Unexpected TXQ (%d) queue failure: %d\n",
3375 qnum, err);
3376 DEV_STATS_INC(dev, tx_dropped);
3377 dev_kfree_skb_any(skb);
3378 return NETDEV_TX_OK;
3379 }
3380
3381 /* Don't wait up for transmitted skbs to be freed. */
3382 if (!use_napi) {
3383 skb_orphan(skb);
3384 nf_reset_ct(skb);
3385 }
3386
3387 if (use_napi)
3388 tx_may_stop(vi, dev, sq);
3389 else
3390 check_sq_full_and_disable(vi, dev,sq);
3391
3392 kick = use_napi ? __netdev_tx_sent_queue(dev_queue: txq, bytes: skb->len, xmit_more) :
3393 !xmit_more || netif_xmit_stopped(dev_queue: txq);
3394 if (kick) {
3395 if (virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq)) {
3396 u64_stats_update_begin(syncp: &sq->stats.syncp);
3397 u64_stats_inc(p: &sq->stats.kicks);
3398 u64_stats_update_end(syncp: &sq->stats.syncp);
3399 }
3400 }
3401
3402 if (use_napi && kick && unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
3403 virtqueue_napi_schedule(napi: &sq->napi, vq: sq->vq);
3404
3405 return NETDEV_TX_OK;
3406}
3407
3408static void __virtnet_rx_pause(struct virtnet_info *vi,
3409 struct receive_queue *rq)
3410{
3411 bool running = netif_running(dev: vi->dev);
3412
3413 if (running) {
3414 virtnet_napi_disable(rq);
3415 virtnet_cancel_dim(vi, dim: &rq->dim);
3416 }
3417}
3418
3419static void virtnet_rx_pause_all(struct virtnet_info *vi)
3420{
3421 int i;
3422
3423 /*
3424 * Make sure refill_work does not run concurrently to
3425 * avoid napi_disable race which leads to deadlock.
3426 */
3427 disable_delayed_refill(vi);
3428 cancel_delayed_work_sync(dwork: &vi->refill);
3429 for (i = 0; i < vi->max_queue_pairs; i++)
3430 __virtnet_rx_pause(vi, rq: &vi->rq[i]);
3431}
3432
3433static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
3434{
3435 /*
3436 * Make sure refill_work does not run concurrently to
3437 * avoid napi_disable race which leads to deadlock.
3438 */
3439 disable_delayed_refill(vi);
3440 cancel_delayed_work_sync(dwork: &vi->refill);
3441 __virtnet_rx_pause(vi, rq);
3442}
3443
3444static void __virtnet_rx_resume(struct virtnet_info *vi,
3445 struct receive_queue *rq,
3446 bool refill)
3447{
3448 bool running = netif_running(dev: vi->dev);
3449 bool schedule_refill = false;
3450
3451 if (refill && !try_fill_recv(vi, rq, GFP_KERNEL))
3452 schedule_refill = true;
3453 if (running)
3454 virtnet_napi_enable(rq);
3455
3456 if (schedule_refill)
3457 schedule_delayed_work(dwork: &vi->refill, delay: 0);
3458}
3459
3460static void virtnet_rx_resume_all(struct virtnet_info *vi)
3461{
3462 int i;
3463
3464 enable_delayed_refill(vi);
3465 for (i = 0; i < vi->max_queue_pairs; i++) {
3466 if (i < vi->curr_queue_pairs)
3467 __virtnet_rx_resume(vi, rq: &vi->rq[i], refill: true);
3468 else
3469 __virtnet_rx_resume(vi, rq: &vi->rq[i], refill: false);
3470 }
3471}
3472
3473static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
3474{
3475 enable_delayed_refill(vi);
3476 __virtnet_rx_resume(vi, rq, refill: true);
3477}
3478
3479static int virtnet_rx_resize(struct virtnet_info *vi,
3480 struct receive_queue *rq, u32 ring_num)
3481{
3482 int err, qindex;
3483
3484 qindex = rq - vi->rq;
3485
3486 virtnet_rx_pause(vi, rq);
3487
3488 err = virtqueue_resize(vq: rq->vq, num: ring_num, recycle: virtnet_rq_unmap_free_buf, NULL);
3489 if (err)
3490 netdev_err(dev: vi->dev, format: "resize rx fail: rx queue index: %d err: %d\n", qindex, err);
3491
3492 virtnet_rx_resume(vi, rq);
3493 return err;
3494}
3495
3496static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq)
3497{
3498 bool running = netif_running(dev: vi->dev);
3499 struct netdev_queue *txq;
3500 int qindex;
3501
3502 qindex = sq - vi->sq;
3503
3504 if (running)
3505 virtnet_napi_tx_disable(sq);
3506
3507 txq = netdev_get_tx_queue(dev: vi->dev, index: qindex);
3508
3509 /* 1. wait all ximt complete
3510 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue()
3511 */
3512 __netif_tx_lock_bh(txq);
3513
3514 /* Prevent rx poll from accessing sq. */
3515 sq->reset = true;
3516
3517 /* Prevent the upper layer from trying to send packets. */
3518 netif_stop_subqueue(dev: vi->dev, queue_index: qindex);
3519
3520 __netif_tx_unlock_bh(txq);
3521}
3522
3523static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq)
3524{
3525 bool running = netif_running(dev: vi->dev);
3526 struct netdev_queue *txq;
3527 int qindex;
3528
3529 qindex = sq - vi->sq;
3530
3531 txq = netdev_get_tx_queue(dev: vi->dev, index: qindex);
3532
3533 __netif_tx_lock_bh(txq);
3534 sq->reset = false;
3535 netif_tx_wake_queue(dev_queue: txq);
3536 __netif_tx_unlock_bh(txq);
3537
3538 if (running)
3539 virtnet_napi_tx_enable(sq);
3540}
3541
3542static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq,
3543 u32 ring_num)
3544{
3545 int qindex, err;
3546
3547 if (ring_num <= MAX_SKB_FRAGS + 2) {
3548 netdev_err(dev: vi->dev, format: "tx size (%d) cannot be smaller than %d\n",
3549 ring_num, MAX_SKB_FRAGS + 2);
3550 return -EINVAL;
3551 }
3552
3553 qindex = sq - vi->sq;
3554
3555 virtnet_tx_pause(vi, sq);
3556
3557 err = virtqueue_resize(vq: sq->vq, num: ring_num, recycle: virtnet_sq_free_unused_buf,
3558 recycle_done: virtnet_sq_free_unused_buf_done);
3559 if (err)
3560 netdev_err(dev: vi->dev, format: "resize tx fail: tx queue index: %d err: %d\n", qindex, err);
3561
3562 virtnet_tx_resume(vi, sq);
3563
3564 return err;
3565}
3566
3567/*
3568 * Send command via the control virtqueue and check status. Commands
3569 * supported by the hypervisor, as indicated by feature bits, should
3570 * never fail unless improperly formatted.
3571 */
3572static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd,
3573 struct scatterlist *out,
3574 struct scatterlist *in)
3575{
3576 struct scatterlist *sgs[5], hdr, stat;
3577 u32 out_num = 0, tmp, in_num = 0;
3578 bool ok;
3579 int ret;
3580
3581 /* Caller should know better */
3582 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
3583
3584 mutex_lock(lock: &vi->cvq_lock);
3585 vi->ctrl->status = ~0;
3586 vi->ctrl->hdr.class = class;
3587 vi->ctrl->hdr.cmd = cmd;
3588 /* Add header */
3589 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr));
3590 sgs[out_num++] = &hdr;
3591
3592 if (out)
3593 sgs[out_num++] = out;
3594
3595 /* Add return status. */
3596 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status));
3597 sgs[out_num + in_num++] = &stat;
3598
3599 if (in)
3600 sgs[out_num + in_num++] = in;
3601
3602 BUG_ON(out_num + in_num > ARRAY_SIZE(sgs));
3603 ret = virtqueue_add_sgs(vq: vi->cvq, sgs, out_sgs: out_num, in_sgs: in_num, data: vi, GFP_ATOMIC);
3604 if (ret < 0) {
3605 dev_warn(&vi->vdev->dev,
3606 "Failed to add sgs for command vq: %d\n.", ret);
3607 mutex_unlock(lock: &vi->cvq_lock);
3608 return false;
3609 }
3610
3611 if (unlikely(!virtqueue_kick(vi->cvq)))
3612 goto unlock;
3613
3614 /* Spin for a response, the kick causes an ioport write, trapping
3615 * into the hypervisor, so the request should be handled immediately.
3616 */
3617 while (!virtqueue_get_buf(vq: vi->cvq, len: &tmp) &&
3618 !virtqueue_is_broken(vq: vi->cvq)) {
3619 cond_resched();
3620 cpu_relax();
3621 }
3622
3623unlock:
3624 ok = vi->ctrl->status == VIRTIO_NET_OK;
3625 mutex_unlock(lock: &vi->cvq_lock);
3626 return ok;
3627}
3628
3629static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
3630 struct scatterlist *out)
3631{
3632 return virtnet_send_command_reply(vi, class, cmd, out, NULL);
3633}
3634
3635static int virtnet_set_mac_address(struct net_device *dev, void *p)
3636{
3637 struct virtnet_info *vi = netdev_priv(dev);
3638 struct virtio_device *vdev = vi->vdev;
3639 int ret;
3640 struct sockaddr *addr;
3641 struct scatterlist sg;
3642
3643 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STANDBY))
3644 return -EOPNOTSUPP;
3645
3646 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL);
3647 if (!addr)
3648 return -ENOMEM;
3649
3650 ret = eth_prepare_mac_addr_change(dev, p: addr);
3651 if (ret)
3652 goto out;
3653
3654 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
3655 sg_init_one(&sg, addr->sa_data, dev->addr_len);
3656 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
3657 VIRTIO_NET_CTRL_MAC_ADDR_SET, out: &sg)) {
3658 dev_warn(&vdev->dev,
3659 "Failed to set mac address by vq command.\n");
3660 ret = -EINVAL;
3661 goto out;
3662 }
3663 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
3664 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3665 unsigned int i;
3666
3667 /* Naturally, this has an atomicity problem. */
3668 for (i = 0; i < dev->addr_len; i++)
3669 virtio_cwrite8(vdev,
3670 offsetof(struct virtio_net_config, mac) +
3671 i, val: addr->sa_data[i]);
3672 }
3673
3674 eth_commit_mac_addr_change(dev, p);
3675 ret = 0;
3676
3677out:
3678 kfree(objp: addr);
3679 return ret;
3680}
3681
3682static void virtnet_stats(struct net_device *dev,
3683 struct rtnl_link_stats64 *tot)
3684{
3685 struct virtnet_info *vi = netdev_priv(dev);
3686 unsigned int start;
3687 int i;
3688
3689 for (i = 0; i < vi->max_queue_pairs; i++) {
3690 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops;
3691 struct receive_queue *rq = &vi->rq[i];
3692 struct send_queue *sq = &vi->sq[i];
3693
3694 do {
3695 start = u64_stats_fetch_begin(syncp: &sq->stats.syncp);
3696 tpackets = u64_stats_read(p: &sq->stats.packets);
3697 tbytes = u64_stats_read(p: &sq->stats.bytes);
3698 terrors = u64_stats_read(p: &sq->stats.tx_timeouts);
3699 } while (u64_stats_fetch_retry(syncp: &sq->stats.syncp, start));
3700
3701 do {
3702 start = u64_stats_fetch_begin(syncp: &rq->stats.syncp);
3703 rpackets = u64_stats_read(p: &rq->stats.packets);
3704 rbytes = u64_stats_read(p: &rq->stats.bytes);
3705 rdrops = u64_stats_read(p: &rq->stats.drops);
3706 } while (u64_stats_fetch_retry(syncp: &rq->stats.syncp, start));
3707
3708 tot->rx_packets += rpackets;
3709 tot->tx_packets += tpackets;
3710 tot->rx_bytes += rbytes;
3711 tot->tx_bytes += tbytes;
3712 tot->rx_dropped += rdrops;
3713 tot->tx_errors += terrors;
3714 }
3715
3716 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
3717 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors);
3718 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors);
3719 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors);
3720}
3721
3722static void virtnet_ack_link_announce(struct virtnet_info *vi)
3723{
3724 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
3725 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
3726 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
3727}
3728
3729static bool virtnet_commit_rss_command(struct virtnet_info *vi);
3730
3731static void virtnet_rss_update_by_qpairs(struct virtnet_info *vi, u16 queue_pairs)
3732{
3733 u32 indir_val = 0;
3734 int i = 0;
3735
3736 for (; i < vi->rss_indir_table_size; ++i) {
3737 indir_val = ethtool_rxfh_indir_default(index: i, n_rx_rings: queue_pairs);
3738 vi->rss_hdr->indirection_table[i] = cpu_to_le16(indir_val);
3739 }
3740 vi->rss_trailer.max_tx_vq = cpu_to_le16(queue_pairs);
3741}
3742
3743static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
3744{
3745 struct virtio_net_ctrl_mq *mq __free(kfree) = NULL;
3746 struct virtio_net_rss_config_hdr *old_rss_hdr;
3747 struct virtio_net_rss_config_trailer old_rss_trailer;
3748 struct net_device *dev = vi->dev;
3749 struct scatterlist sg;
3750
3751 if (!vi->has_cvq || !virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_MQ))
3752 return 0;
3753
3754 /* Firstly check if we need update rss. Do updating if both (1) rss enabled and
3755 * (2) no user configuration.
3756 *
3757 * During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is,
3758 * the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs
3759 * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly.
3760 */
3761 if (vi->has_rss && !netif_is_rxfh_configured(dev)) {
3762 old_rss_hdr = vi->rss_hdr;
3763 old_rss_trailer = vi->rss_trailer;
3764 vi->rss_hdr = devm_kzalloc(dev: &dev->dev, size: virtnet_rss_hdr_size(vi), GFP_KERNEL);
3765 if (!vi->rss_hdr) {
3766 vi->rss_hdr = old_rss_hdr;
3767 return -ENOMEM;
3768 }
3769
3770 *vi->rss_hdr = *old_rss_hdr;
3771 virtnet_rss_update_by_qpairs(vi, queue_pairs);
3772
3773 if (!virtnet_commit_rss_command(vi)) {
3774 /* restore ctrl_rss if commit_rss_command failed */
3775 devm_kfree(dev: &dev->dev, p: vi->rss_hdr);
3776 vi->rss_hdr = old_rss_hdr;
3777 vi->rss_trailer = old_rss_trailer;
3778
3779 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d, because committing RSS failed\n",
3780 queue_pairs);
3781 return -EINVAL;
3782 }
3783 devm_kfree(dev: &dev->dev, p: old_rss_hdr);
3784 goto succ;
3785 }
3786
3787 mq = kzalloc(sizeof(*mq), GFP_KERNEL);
3788 if (!mq)
3789 return -ENOMEM;
3790
3791 mq->virtqueue_pairs = cpu_to_virtio16(vdev: vi->vdev, val: queue_pairs);
3792 sg_init_one(&sg, mq, sizeof(*mq));
3793
3794 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
3795 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, out: &sg)) {
3796 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
3797 queue_pairs);
3798 return -EINVAL;
3799 }
3800succ:
3801 vi->curr_queue_pairs = queue_pairs;
3802 /* virtnet_open() will refill when device is going to up. */
3803 spin_lock_bh(lock: &vi->refill_lock);
3804 if (dev->flags & IFF_UP && vi->refill_enabled)
3805 schedule_delayed_work(dwork: &vi->refill, delay: 0);
3806 spin_unlock_bh(lock: &vi->refill_lock);
3807
3808 return 0;
3809}
3810
3811static int virtnet_close(struct net_device *dev)
3812{
3813 struct virtnet_info *vi = netdev_priv(dev);
3814 int i;
3815
3816 /* Make sure NAPI doesn't schedule refill work */
3817 disable_delayed_refill(vi);
3818 /* Make sure refill_work doesn't re-enable napi! */
3819 cancel_delayed_work_sync(dwork: &vi->refill);
3820 /* Prevent the config change callback from changing carrier
3821 * after close
3822 */
3823 virtio_config_driver_disable(dev: vi->vdev);
3824 /* Stop getting status/speed updates: we don't care until next
3825 * open
3826 */
3827 cancel_work_sync(work: &vi->config_work);
3828
3829 for (i = 0; i < vi->max_queue_pairs; i++) {
3830 virtnet_disable_queue_pair(vi, qp_index: i);
3831 virtnet_cancel_dim(vi, dim: &vi->rq[i].dim);
3832 }
3833
3834 netif_carrier_off(dev);
3835
3836 return 0;
3837}
3838
3839static void virtnet_rx_mode_work(struct work_struct *work)
3840{
3841 struct virtnet_info *vi =
3842 container_of(work, struct virtnet_info, rx_mode_work);
3843 u8 *promisc_allmulti __free(kfree) = NULL;
3844 struct net_device *dev = vi->dev;
3845 struct scatterlist sg[2];
3846 struct virtio_net_ctrl_mac *mac_data;
3847 struct netdev_hw_addr *ha;
3848 int uc_count;
3849 int mc_count;
3850 void *buf;
3851 int i;
3852
3853 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */
3854 if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_RX))
3855 return;
3856
3857 promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL);
3858 if (!promisc_allmulti) {
3859 dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n");
3860 return;
3861 }
3862
3863 rtnl_lock();
3864
3865 *promisc_allmulti = !!(dev->flags & IFF_PROMISC);
3866 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti));
3867
3868 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
3869 VIRTIO_NET_CTRL_RX_PROMISC, out: sg))
3870 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
3871 *promisc_allmulti ? "en" : "dis");
3872
3873 *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI);
3874 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti));
3875
3876 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
3877 VIRTIO_NET_CTRL_RX_ALLMULTI, out: sg))
3878 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
3879 *promisc_allmulti ? "en" : "dis");
3880
3881 netif_addr_lock_bh(dev);
3882
3883 uc_count = netdev_uc_count(dev);
3884 mc_count = netdev_mc_count(dev);
3885 /* MAC filter - use one buffer for both lists */
3886 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
3887 (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
3888 mac_data = buf;
3889 if (!buf) {
3890 netif_addr_unlock_bh(dev);
3891 rtnl_unlock();
3892 return;
3893 }
3894
3895 sg_init_table(sg, 2);
3896
3897 /* Store the unicast list and count in the front of the buffer */
3898 mac_data->entries = cpu_to_virtio32(vdev: vi->vdev, val: uc_count);
3899 i = 0;
3900 netdev_for_each_uc_addr(ha, dev)
3901 memcpy(to: &mac_data->macs[i++][0], from: ha->addr, ETH_ALEN);
3902
3903 sg_set_buf(sg: &sg[0], buf: mac_data,
3904 buflen: sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
3905
3906 /* multicast list and count fill the end */
3907 mac_data = (void *)&mac_data->macs[uc_count][0];
3908
3909 mac_data->entries = cpu_to_virtio32(vdev: vi->vdev, val: mc_count);
3910 i = 0;
3911 netdev_for_each_mc_addr(ha, dev)
3912 memcpy(to: &mac_data->macs[i++][0], from: ha->addr, ETH_ALEN);
3913
3914 netif_addr_unlock_bh(dev);
3915
3916 sg_set_buf(sg: &sg[1], buf: mac_data,
3917 buflen: sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
3918
3919 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
3920 VIRTIO_NET_CTRL_MAC_TABLE_SET, out: sg))
3921 dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
3922
3923 rtnl_unlock();
3924
3925 kfree(objp: buf);
3926}
3927
3928static void virtnet_set_rx_mode(struct net_device *dev)
3929{
3930 struct virtnet_info *vi = netdev_priv(dev);
3931
3932 if (vi->rx_mode_work_enabled)
3933 schedule_work(work: &vi->rx_mode_work);
3934}
3935
3936static int virtnet_vlan_rx_add_vid(struct net_device *dev,
3937 __be16 proto, u16 vid)
3938{
3939 struct virtnet_info *vi = netdev_priv(dev);
3940 __virtio16 *_vid __free(kfree) = NULL;
3941 struct scatterlist sg;
3942
3943 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL);
3944 if (!_vid)
3945 return -ENOMEM;
3946
3947 *_vid = cpu_to_virtio16(vdev: vi->vdev, val: vid);
3948 sg_init_one(&sg, _vid, sizeof(*_vid));
3949
3950 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
3951 VIRTIO_NET_CTRL_VLAN_ADD, out: &sg))
3952 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
3953 return 0;
3954}
3955
3956static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
3957 __be16 proto, u16 vid)
3958{
3959 struct virtnet_info *vi = netdev_priv(dev);
3960 __virtio16 *_vid __free(kfree) = NULL;
3961 struct scatterlist sg;
3962
3963 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL);
3964 if (!_vid)
3965 return -ENOMEM;
3966
3967 *_vid = cpu_to_virtio16(vdev: vi->vdev, val: vid);
3968 sg_init_one(&sg, _vid, sizeof(*_vid));
3969
3970 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
3971 VIRTIO_NET_CTRL_VLAN_DEL, out: &sg))
3972 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
3973 return 0;
3974}
3975
3976static void virtnet_clean_affinity(struct virtnet_info *vi)
3977{
3978 int i;
3979
3980 if (vi->affinity_hint_set) {
3981 for (i = 0; i < vi->max_queue_pairs; i++) {
3982 virtqueue_set_affinity(vq: vi->rq[i].vq, NULL);
3983 virtqueue_set_affinity(vq: vi->sq[i].vq, NULL);
3984 }
3985
3986 vi->affinity_hint_set = false;
3987 }
3988}
3989
3990static void virtnet_set_affinity(struct virtnet_info *vi)
3991{
3992 cpumask_var_t mask;
3993 int stragglers;
3994 int group_size;
3995 int i, start = 0, cpu;
3996 int num_cpu;
3997 int stride;
3998
3999 if (!zalloc_cpumask_var(mask: &mask, GFP_KERNEL)) {
4000 virtnet_clean_affinity(vi);
4001 return;
4002 }
4003
4004 num_cpu = num_online_cpus();
4005 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1);
4006 stragglers = num_cpu >= vi->curr_queue_pairs ?
4007 num_cpu % vi->curr_queue_pairs :
4008 0;
4009
4010 for (i = 0; i < vi->curr_queue_pairs; i++) {
4011 group_size = stride + (i < stragglers ? 1 : 0);
4012
4013 for_each_online_cpu_wrap(cpu, start) {
4014 if (!group_size--) {
4015 start = cpu;
4016 break;
4017 }
4018 cpumask_set_cpu(cpu, dstp: mask);
4019 }
4020
4021 virtqueue_set_affinity(vq: vi->rq[i].vq, cpu_mask: mask);
4022 virtqueue_set_affinity(vq: vi->sq[i].vq, cpu_mask: mask);
4023 __netif_set_xps_queue(dev: vi->dev, cpumask_bits(mask), index: i, type: XPS_CPUS);
4024 cpumask_clear(dstp: mask);
4025 }
4026
4027 vi->affinity_hint_set = true;
4028 free_cpumask_var(mask);
4029}
4030
4031static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
4032{
4033 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
4034 node);
4035 virtnet_set_affinity(vi);
4036 return 0;
4037}
4038
4039static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
4040{
4041 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
4042 node_dead);
4043 virtnet_set_affinity(vi);
4044 return 0;
4045}
4046
4047static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
4048{
4049 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
4050 node);
4051
4052 virtnet_clean_affinity(vi);
4053 return 0;
4054}
4055
4056static enum cpuhp_state virtionet_online;
4057
4058static int virtnet_cpu_notif_add(struct virtnet_info *vi)
4059{
4060 int ret;
4061
4062 ret = cpuhp_state_add_instance_nocalls(state: virtionet_online, node: &vi->node);
4063 if (ret)
4064 return ret;
4065 ret = cpuhp_state_add_instance_nocalls(state: CPUHP_VIRT_NET_DEAD,
4066 node: &vi->node_dead);
4067 if (!ret)
4068 return ret;
4069 cpuhp_state_remove_instance_nocalls(state: virtionet_online, node: &vi->node);
4070 return ret;
4071}
4072
4073static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
4074{
4075 cpuhp_state_remove_instance_nocalls(state: virtionet_online, node: &vi->node);
4076 cpuhp_state_remove_instance_nocalls(state: CPUHP_VIRT_NET_DEAD,
4077 node: &vi->node_dead);
4078}
4079
4080static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi,
4081 u16 vqn, u32 max_usecs, u32 max_packets)
4082{
4083 struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL;
4084 struct scatterlist sgs;
4085
4086 coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL);
4087 if (!coal_vq)
4088 return -ENOMEM;
4089
4090 coal_vq->vqn = cpu_to_le16(vqn);
4091 coal_vq->coal.max_usecs = cpu_to_le32(max_usecs);
4092 coal_vq->coal.max_packets = cpu_to_le32(max_packets);
4093 sg_init_one(&sgs, coal_vq, sizeof(*coal_vq));
4094
4095 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
4096 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET,
4097 out: &sgs))
4098 return -EINVAL;
4099
4100 return 0;
4101}
4102
4103static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi,
4104 u16 queue, u32 max_usecs,
4105 u32 max_packets)
4106{
4107 int err;
4108
4109 if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
4110 return -EOPNOTSUPP;
4111
4112 err = virtnet_send_ctrl_coal_vq_cmd(vi, vqn: rxq2vq(rxq: queue),
4113 max_usecs, max_packets);
4114 if (err)
4115 return err;
4116
4117 vi->rq[queue].intr_coal.max_usecs = max_usecs;
4118 vi->rq[queue].intr_coal.max_packets = max_packets;
4119
4120 return 0;
4121}
4122
4123static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi,
4124 u16 queue, u32 max_usecs,
4125 u32 max_packets)
4126{
4127 int err;
4128
4129 if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
4130 return -EOPNOTSUPP;
4131
4132 err = virtnet_send_ctrl_coal_vq_cmd(vi, vqn: txq2vq(txq: queue),
4133 max_usecs, max_packets);
4134 if (err)
4135 return err;
4136
4137 vi->sq[queue].intr_coal.max_usecs = max_usecs;
4138 vi->sq[queue].intr_coal.max_packets = max_packets;
4139
4140 return 0;
4141}
4142
4143static void virtnet_get_ringparam(struct net_device *dev,
4144 struct ethtool_ringparam *ring,
4145 struct kernel_ethtool_ringparam *kernel_ring,
4146 struct netlink_ext_ack *extack)
4147{
4148 struct virtnet_info *vi = netdev_priv(dev);
4149
4150 ring->rx_max_pending = vi->rq[0].vq->num_max;
4151 ring->tx_max_pending = vi->sq[0].vq->num_max;
4152 ring->rx_pending = virtqueue_get_vring_size(vq: vi->rq[0].vq);
4153 ring->tx_pending = virtqueue_get_vring_size(vq: vi->sq[0].vq);
4154}
4155
4156static int virtnet_set_ringparam(struct net_device *dev,
4157 struct ethtool_ringparam *ring,
4158 struct kernel_ethtool_ringparam *kernel_ring,
4159 struct netlink_ext_ack *extack)
4160{
4161 struct virtnet_info *vi = netdev_priv(dev);
4162 u32 rx_pending, tx_pending;
4163 struct receive_queue *rq;
4164 struct send_queue *sq;
4165 int i, err;
4166
4167 if (ring->rx_mini_pending || ring->rx_jumbo_pending)
4168 return -EINVAL;
4169
4170 rx_pending = virtqueue_get_vring_size(vq: vi->rq[0].vq);
4171 tx_pending = virtqueue_get_vring_size(vq: vi->sq[0].vq);
4172
4173 if (ring->rx_pending == rx_pending &&
4174 ring->tx_pending == tx_pending)
4175 return 0;
4176
4177 if (ring->rx_pending > vi->rq[0].vq->num_max)
4178 return -EINVAL;
4179
4180 if (ring->tx_pending > vi->sq[0].vq->num_max)
4181 return -EINVAL;
4182
4183 for (i = 0; i < vi->max_queue_pairs; i++) {
4184 rq = vi->rq + i;
4185 sq = vi->sq + i;
4186
4187 if (ring->tx_pending != tx_pending) {
4188 err = virtnet_tx_resize(vi, sq, ring_num: ring->tx_pending);
4189 if (err)
4190 return err;
4191
4192 /* Upon disabling and re-enabling a transmit virtqueue, the device must
4193 * set the coalescing parameters of the virtqueue to those configured
4194 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver
4195 * did not set any TX coalescing parameters, to 0.
4196 */
4197 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue: i,
4198 max_usecs: vi->intr_coal_tx.max_usecs,
4199 max_packets: vi->intr_coal_tx.max_packets);
4200
4201 /* Don't break the tx resize action if the vq coalescing is not
4202 * supported. The same is true for rx resize below.
4203 */
4204 if (err && err != -EOPNOTSUPP)
4205 return err;
4206 }
4207
4208 if (ring->rx_pending != rx_pending) {
4209 err = virtnet_rx_resize(vi, rq, ring_num: ring->rx_pending);
4210 if (err)
4211 return err;
4212
4213 /* The reason is same as the transmit virtqueue reset */
4214 mutex_lock(lock: &vi->rq[i].dim_lock);
4215 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue: i,
4216 max_usecs: vi->intr_coal_rx.max_usecs,
4217 max_packets: vi->intr_coal_rx.max_packets);
4218 mutex_unlock(lock: &vi->rq[i].dim_lock);
4219 if (err && err != -EOPNOTSUPP)
4220 return err;
4221 }
4222 }
4223
4224 return 0;
4225}
4226
4227static bool virtnet_commit_rss_command(struct virtnet_info *vi)
4228{
4229 struct net_device *dev = vi->dev;
4230 struct scatterlist sgs[2];
4231
4232 /* prepare sgs */
4233 sg_init_table(sgs, 2);
4234 sg_set_buf(sg: &sgs[0], buf: vi->rss_hdr, buflen: virtnet_rss_hdr_size(vi));
4235 sg_set_buf(sg: &sgs[1], buf: &vi->rss_trailer, buflen: virtnet_rss_trailer_size(vi));
4236
4237 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
4238 cmd: vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG
4239 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, out: sgs))
4240 goto err;
4241
4242 return true;
4243
4244err:
4245 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n");
4246 return false;
4247
4248}
4249
4250static void virtnet_init_default_rss(struct virtnet_info *vi)
4251{
4252 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_supported);
4253 vi->rss_hash_types_saved = vi->rss_hash_types_supported;
4254 vi->rss_hdr->indirection_table_mask = vi->rss_indir_table_size
4255 ? cpu_to_le16(vi->rss_indir_table_size - 1) : 0;
4256 vi->rss_hdr->unclassified_queue = 0;
4257
4258 virtnet_rss_update_by_qpairs(vi, queue_pairs: vi->curr_queue_pairs);
4259
4260 vi->rss_trailer.hash_key_length = vi->rss_key_size;
4261
4262 netdev_rss_key_fill(buffer: vi->rss_hash_key_data, len: vi->rss_key_size);
4263}
4264
4265static int virtnet_get_hashflow(struct net_device *dev,
4266 struct ethtool_rxfh_fields *info)
4267{
4268 struct virtnet_info *vi = netdev_priv(dev);
4269
4270 info->data = 0;
4271 switch (info->flow_type) {
4272 case TCP_V4_FLOW:
4273 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
4274 info->data = RXH_IP_SRC | RXH_IP_DST |
4275 RXH_L4_B_0_1 | RXH_L4_B_2_3;
4276 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
4277 info->data = RXH_IP_SRC | RXH_IP_DST;
4278 }
4279 break;
4280 case TCP_V6_FLOW:
4281 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
4282 info->data = RXH_IP_SRC | RXH_IP_DST |
4283 RXH_L4_B_0_1 | RXH_L4_B_2_3;
4284 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
4285 info->data = RXH_IP_SRC | RXH_IP_DST;
4286 }
4287 break;
4288 case UDP_V4_FLOW:
4289 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
4290 info->data = RXH_IP_SRC | RXH_IP_DST |
4291 RXH_L4_B_0_1 | RXH_L4_B_2_3;
4292 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
4293 info->data = RXH_IP_SRC | RXH_IP_DST;
4294 }
4295 break;
4296 case UDP_V6_FLOW:
4297 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
4298 info->data = RXH_IP_SRC | RXH_IP_DST |
4299 RXH_L4_B_0_1 | RXH_L4_B_2_3;
4300 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
4301 info->data = RXH_IP_SRC | RXH_IP_DST;
4302 }
4303 break;
4304 case IPV4_FLOW:
4305 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4)
4306 info->data = RXH_IP_SRC | RXH_IP_DST;
4307
4308 break;
4309 case IPV6_FLOW:
4310 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6)
4311 info->data = RXH_IP_SRC | RXH_IP_DST;
4312
4313 break;
4314 default:
4315 info->data = 0;
4316 break;
4317 }
4318
4319 return 0;
4320}
4321
4322static int virtnet_set_hashflow(struct net_device *dev,
4323 const struct ethtool_rxfh_fields *info,
4324 struct netlink_ext_ack *extack)
4325{
4326 struct virtnet_info *vi = netdev_priv(dev);
4327 u32 new_hashtypes = vi->rss_hash_types_saved;
4328 bool is_disable = info->data & RXH_DISCARD;
4329 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3);
4330
4331 /* supports only 'sd', 'sdfn' and 'r' */
4332 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable))
4333 return -EINVAL;
4334
4335 switch (info->flow_type) {
4336 case TCP_V4_FLOW:
4337 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4);
4338 if (!is_disable)
4339 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
4340 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0);
4341 break;
4342 case UDP_V4_FLOW:
4343 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4);
4344 if (!is_disable)
4345 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
4346 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0);
4347 break;
4348 case IPV4_FLOW:
4349 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4;
4350 if (!is_disable)
4351 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4;
4352 break;
4353 case TCP_V6_FLOW:
4354 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6);
4355 if (!is_disable)
4356 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6
4357 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0);
4358 break;
4359 case UDP_V6_FLOW:
4360 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6);
4361 if (!is_disable)
4362 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6
4363 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0);
4364 break;
4365 case IPV6_FLOW:
4366 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6;
4367 if (!is_disable)
4368 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6;
4369 break;
4370 default:
4371 /* unsupported flow */
4372 return -EINVAL;
4373 }
4374
4375 /* if unsupported hashtype was set */
4376 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported))
4377 return -EINVAL;
4378
4379 if (new_hashtypes != vi->rss_hash_types_saved) {
4380 vi->rss_hash_types_saved = new_hashtypes;
4381 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved);
4382 if (vi->dev->features & NETIF_F_RXHASH)
4383 if (!virtnet_commit_rss_command(vi))
4384 return -EINVAL;
4385 }
4386
4387 return 0;
4388}
4389
4390static void virtnet_get_drvinfo(struct net_device *dev,
4391 struct ethtool_drvinfo *info)
4392{
4393 struct virtnet_info *vi = netdev_priv(dev);
4394 struct virtio_device *vdev = vi->vdev;
4395
4396 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
4397 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
4398 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
4399
4400}
4401
4402/* TODO: Eliminate OOO packets during switching */
4403static int virtnet_set_channels(struct net_device *dev,
4404 struct ethtool_channels *channels)
4405{
4406 struct virtnet_info *vi = netdev_priv(dev);
4407 u16 queue_pairs = channels->combined_count;
4408 int err;
4409
4410 /* We don't support separate rx/tx channels.
4411 * We don't allow setting 'other' channels.
4412 */
4413 if (channels->rx_count || channels->tx_count || channels->other_count)
4414 return -EINVAL;
4415
4416 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
4417 return -EINVAL;
4418
4419 /* For now we don't support modifying channels while XDP is loaded
4420 * also when XDP is loaded all RX queues have XDP programs so we only
4421 * need to check a single RX queue.
4422 */
4423 if (vi->rq[0].xdp_prog)
4424 return -EINVAL;
4425
4426 cpus_read_lock();
4427 err = virtnet_set_queues(vi, queue_pairs);
4428 if (err) {
4429 cpus_read_unlock();
4430 goto err;
4431 }
4432 virtnet_set_affinity(vi);
4433 cpus_read_unlock();
4434
4435 netif_set_real_num_tx_queues(dev, txq: queue_pairs);
4436 netif_set_real_num_rx_queues(dev, rxq: queue_pairs);
4437 err:
4438 return err;
4439}
4440
4441static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt,
4442 int num, int qid, const struct virtnet_stat_desc *desc)
4443{
4444 int i;
4445
4446 if (qid < 0) {
4447 for (i = 0; i < num; ++i)
4448 ethtool_sprintf(data: p, fmt: noq_fmt, desc[i].desc);
4449 } else {
4450 for (i = 0; i < num; ++i)
4451 ethtool_sprintf(data: p, fmt, qid, desc[i].desc);
4452 }
4453}
4454
4455/* qid == -1: for rx/tx queue total field */
4456static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data)
4457{
4458 const struct virtnet_stat_desc *desc;
4459 const char *fmt, *noq_fmt;
4460 u8 *p = *data;
4461 u32 num;
4462
4463 if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) {
4464 noq_fmt = "cq_hw_%s";
4465
4466 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) {
4467 desc = &virtnet_stats_cvq_desc[0];
4468 num = ARRAY_SIZE(virtnet_stats_cvq_desc);
4469
4470 virtnet_stats_sprintf(p: &p, NULL, noq_fmt, num, qid: -1, desc);
4471 }
4472 }
4473
4474 if (type == VIRTNET_Q_TYPE_RX) {
4475 fmt = "rx%u_%s";
4476 noq_fmt = "rx_%s";
4477
4478 desc = &virtnet_rq_stats_desc[0];
4479 num = ARRAY_SIZE(virtnet_rq_stats_desc);
4480
4481 virtnet_stats_sprintf(p: &p, fmt, noq_fmt, num, qid, desc);
4482
4483 fmt = "rx%u_hw_%s";
4484 noq_fmt = "rx_hw_%s";
4485
4486 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
4487 desc = &virtnet_stats_rx_basic_desc[0];
4488 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc);
4489
4490 virtnet_stats_sprintf(p: &p, fmt, noq_fmt, num, qid, desc);
4491 }
4492
4493 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
4494 desc = &virtnet_stats_rx_csum_desc[0];
4495 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc);
4496
4497 virtnet_stats_sprintf(p: &p, fmt, noq_fmt, num, qid, desc);
4498 }
4499
4500 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
4501 desc = &virtnet_stats_rx_speed_desc[0];
4502 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc);
4503
4504 virtnet_stats_sprintf(p: &p, fmt, noq_fmt, num, qid, desc);
4505 }
4506 }
4507
4508 if (type == VIRTNET_Q_TYPE_TX) {
4509 fmt = "tx%u_%s";
4510 noq_fmt = "tx_%s";
4511
4512 desc = &virtnet_sq_stats_desc[0];
4513 num = ARRAY_SIZE(virtnet_sq_stats_desc);
4514
4515 virtnet_stats_sprintf(p: &p, fmt, noq_fmt, num, qid, desc);
4516
4517 fmt = "tx%u_hw_%s";
4518 noq_fmt = "tx_hw_%s";
4519
4520 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
4521 desc = &virtnet_stats_tx_basic_desc[0];
4522 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc);
4523
4524 virtnet_stats_sprintf(p: &p, fmt, noq_fmt, num, qid, desc);
4525 }
4526
4527 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
4528 desc = &virtnet_stats_tx_gso_desc[0];
4529 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc);
4530
4531 virtnet_stats_sprintf(p: &p, fmt, noq_fmt, num, qid, desc);
4532 }
4533
4534 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
4535 desc = &virtnet_stats_tx_speed_desc[0];
4536 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc);
4537
4538 virtnet_stats_sprintf(p: &p, fmt, noq_fmt, num, qid, desc);
4539 }
4540 }
4541
4542 *data = p;
4543}
4544
4545struct virtnet_stats_ctx {
4546 /* The stats are write to qstats or ethtool -S */
4547 bool to_qstat;
4548
4549 /* Used to calculate the offset inside the output buffer. */
4550 u32 desc_num[3];
4551
4552 /* The actual supported stat types. */
4553 u64 bitmap[3];
4554
4555 /* Used to calculate the reply buffer size. */
4556 u32 size[3];
4557
4558 /* Record the output buffer. */
4559 u64 *data;
4560};
4561
4562static void virtnet_stats_ctx_init(struct virtnet_info *vi,
4563 struct virtnet_stats_ctx *ctx,
4564 u64 *data, bool to_qstat)
4565{
4566 u32 queue_type;
4567
4568 ctx->data = data;
4569 ctx->to_qstat = to_qstat;
4570
4571 if (to_qstat) {
4572 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat);
4573 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat);
4574
4575 queue_type = VIRTNET_Q_TYPE_RX;
4576
4577 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
4578 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC;
4579 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat);
4580 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic);
4581 }
4582
4583 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
4584 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM;
4585 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat);
4586 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum);
4587 }
4588
4589 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) {
4590 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO;
4591 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat);
4592 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso);
4593 }
4594
4595 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
4596 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED;
4597 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat);
4598 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed);
4599 }
4600
4601 queue_type = VIRTNET_Q_TYPE_TX;
4602
4603 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
4604 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC;
4605 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat);
4606 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic);
4607 }
4608
4609 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) {
4610 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM;
4611 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat);
4612 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum);
4613 }
4614
4615 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
4616 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO;
4617 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat);
4618 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso);
4619 }
4620
4621 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
4622 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED;
4623 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat);
4624 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed);
4625 }
4626
4627 return;
4628 }
4629
4630 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc);
4631 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc);
4632
4633 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) {
4634 queue_type = VIRTNET_Q_TYPE_CQ;
4635
4636 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ;
4637 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc);
4638 ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq);
4639 }
4640
4641 queue_type = VIRTNET_Q_TYPE_RX;
4642
4643 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
4644 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC;
4645 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc);
4646 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic);
4647 }
4648
4649 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
4650 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM;
4651 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc);
4652 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum);
4653 }
4654
4655 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
4656 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED;
4657 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc);
4658 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed);
4659 }
4660
4661 queue_type = VIRTNET_Q_TYPE_TX;
4662
4663 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
4664 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC;
4665 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc);
4666 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic);
4667 }
4668
4669 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
4670 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO;
4671 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc);
4672 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso);
4673 }
4674
4675 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
4676 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED;
4677 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc);
4678 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed);
4679 }
4680}
4681
4682/* stats_sum_queue - Calculate the sum of the same fields in sq or rq.
4683 * @sum: the position to store the sum values
4684 * @num: field num
4685 * @q_value: the first queue fields
4686 * @q_num: number of the queues
4687 */
4688static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num)
4689{
4690 u32 step = num;
4691 int i, j;
4692 u64 *p;
4693
4694 for (i = 0; i < num; ++i) {
4695 p = sum + i;
4696 *p = 0;
4697
4698 for (j = 0; j < q_num; ++j)
4699 *p += *(q_value + i + j * step);
4700 }
4701}
4702
4703static void virtnet_fill_total_fields(struct virtnet_info *vi,
4704 struct virtnet_stats_ctx *ctx)
4705{
4706 u64 *data, *first_rx_q, *first_tx_q;
4707 u32 num_cq, num_rx, num_tx;
4708
4709 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ];
4710 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX];
4711 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX];
4712
4713 first_rx_q = ctx->data + num_rx + num_tx + num_cq;
4714 first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx;
4715
4716 data = ctx->data;
4717
4718 stats_sum_queue(sum: data, num: num_rx, q_value: first_rx_q, q_num: vi->curr_queue_pairs);
4719
4720 data = ctx->data + num_rx;
4721
4722 stats_sum_queue(sum: data, num: num_tx, q_value: first_tx_q, q_num: vi->curr_queue_pairs);
4723}
4724
4725static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid,
4726 struct virtnet_stats_ctx *ctx,
4727 const u8 *base, bool drv_stats, u8 reply_type)
4728{
4729 const struct virtnet_stat_desc *desc;
4730 const u64_stats_t *v_stat;
4731 u64 offset, bitmap;
4732 const __le64 *v;
4733 u32 queue_type;
4734 int i, num;
4735
4736 queue_type = vq_type(vi, qid);
4737 bitmap = ctx->bitmap[queue_type];
4738
4739 if (drv_stats) {
4740 if (queue_type == VIRTNET_Q_TYPE_RX) {
4741 desc = &virtnet_rq_stats_desc_qstat[0];
4742 num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat);
4743 } else {
4744 desc = &virtnet_sq_stats_desc_qstat[0];
4745 num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat);
4746 }
4747
4748 for (i = 0; i < num; ++i) {
4749 offset = desc[i].qstat_offset / sizeof(*ctx->data);
4750 v_stat = (const u64_stats_t *)(base + desc[i].offset);
4751 ctx->data[offset] = u64_stats_read(p: v_stat);
4752 }
4753 return;
4754 }
4755
4756 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
4757 desc = &virtnet_stats_rx_basic_desc_qstat[0];
4758 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat);
4759 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC)
4760 goto found;
4761 }
4762
4763 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
4764 desc = &virtnet_stats_rx_csum_desc_qstat[0];
4765 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat);
4766 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM)
4767 goto found;
4768 }
4769
4770 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) {
4771 desc = &virtnet_stats_rx_gso_desc_qstat[0];
4772 num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat);
4773 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO)
4774 goto found;
4775 }
4776
4777 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
4778 desc = &virtnet_stats_rx_speed_desc_qstat[0];
4779 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat);
4780 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED)
4781 goto found;
4782 }
4783
4784 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
4785 desc = &virtnet_stats_tx_basic_desc_qstat[0];
4786 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat);
4787 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC)
4788 goto found;
4789 }
4790
4791 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) {
4792 desc = &virtnet_stats_tx_csum_desc_qstat[0];
4793 num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat);
4794 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM)
4795 goto found;
4796 }
4797
4798 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
4799 desc = &virtnet_stats_tx_gso_desc_qstat[0];
4800 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat);
4801 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO)
4802 goto found;
4803 }
4804
4805 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
4806 desc = &virtnet_stats_tx_speed_desc_qstat[0];
4807 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat);
4808 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED)
4809 goto found;
4810 }
4811
4812 return;
4813
4814found:
4815 for (i = 0; i < num; ++i) {
4816 offset = desc[i].qstat_offset / sizeof(*ctx->data);
4817 v = (const __le64 *)(base + desc[i].offset);
4818 ctx->data[offset] = le64_to_cpu(*v);
4819 }
4820}
4821
4822/* virtnet_fill_stats - copy the stats to qstats or ethtool -S
4823 * The stats source is the device or the driver.
4824 *
4825 * @vi: virtio net info
4826 * @qid: the vq id
4827 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init())
4828 * @base: pointer to the device reply or the driver stats structure.
4829 * @drv_stats: designate the base type (device reply, driver stats)
4830 * @type: the type of the device reply (if drv_stats is true, this must be zero)
4831 */
4832static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid,
4833 struct virtnet_stats_ctx *ctx,
4834 const u8 *base, bool drv_stats, u8 reply_type)
4835{
4836 u32 queue_type, num_rx, num_tx, num_cq;
4837 const struct virtnet_stat_desc *desc;
4838 const u64_stats_t *v_stat;
4839 u64 offset, bitmap;
4840 const __le64 *v;
4841 int i, num;
4842
4843 if (ctx->to_qstat)
4844 return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type);
4845
4846 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ];
4847 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX];
4848 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX];
4849
4850 queue_type = vq_type(vi, qid);
4851 bitmap = ctx->bitmap[queue_type];
4852
4853 /* skip the total fields of pairs */
4854 offset = num_rx + num_tx;
4855
4856 if (queue_type == VIRTNET_Q_TYPE_TX) {
4857 offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2);
4858
4859 num = ARRAY_SIZE(virtnet_sq_stats_desc);
4860 if (drv_stats) {
4861 desc = &virtnet_sq_stats_desc[0];
4862 goto drv_stats;
4863 }
4864
4865 offset += num;
4866
4867 } else if (queue_type == VIRTNET_Q_TYPE_RX) {
4868 offset += num_cq + num_rx * (qid / 2);
4869
4870 num = ARRAY_SIZE(virtnet_rq_stats_desc);
4871 if (drv_stats) {
4872 desc = &virtnet_rq_stats_desc[0];
4873 goto drv_stats;
4874 }
4875
4876 offset += num;
4877 }
4878
4879 if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) {
4880 desc = &virtnet_stats_cvq_desc[0];
4881 num = ARRAY_SIZE(virtnet_stats_cvq_desc);
4882 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ)
4883 goto found;
4884
4885 offset += num;
4886 }
4887
4888 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
4889 desc = &virtnet_stats_rx_basic_desc[0];
4890 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc);
4891 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC)
4892 goto found;
4893
4894 offset += num;
4895 }
4896
4897 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
4898 desc = &virtnet_stats_rx_csum_desc[0];
4899 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc);
4900 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM)
4901 goto found;
4902
4903 offset += num;
4904 }
4905
4906 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
4907 desc = &virtnet_stats_rx_speed_desc[0];
4908 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc);
4909 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED)
4910 goto found;
4911
4912 offset += num;
4913 }
4914
4915 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
4916 desc = &virtnet_stats_tx_basic_desc[0];
4917 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc);
4918 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC)
4919 goto found;
4920
4921 offset += num;
4922 }
4923
4924 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
4925 desc = &virtnet_stats_tx_gso_desc[0];
4926 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc);
4927 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO)
4928 goto found;
4929
4930 offset += num;
4931 }
4932
4933 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
4934 desc = &virtnet_stats_tx_speed_desc[0];
4935 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc);
4936 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED)
4937 goto found;
4938
4939 offset += num;
4940 }
4941
4942 return;
4943
4944found:
4945 for (i = 0; i < num; ++i) {
4946 v = (const __le64 *)(base + desc[i].offset);
4947 ctx->data[offset + i] = le64_to_cpu(*v);
4948 }
4949
4950 return;
4951
4952drv_stats:
4953 for (i = 0; i < num; ++i) {
4954 v_stat = (const u64_stats_t *)(base + desc[i].offset);
4955 ctx->data[offset + i] = u64_stats_read(p: v_stat);
4956 }
4957}
4958
4959static int __virtnet_get_hw_stats(struct virtnet_info *vi,
4960 struct virtnet_stats_ctx *ctx,
4961 struct virtio_net_ctrl_queue_stats *req,
4962 int req_size, void *reply, int res_size)
4963{
4964 struct virtio_net_stats_reply_hdr *hdr;
4965 struct scatterlist sgs_in, sgs_out;
4966 void *p;
4967 u32 qid;
4968 int ok;
4969
4970 sg_init_one(&sgs_out, req, req_size);
4971 sg_init_one(&sgs_in, reply, res_size);
4972
4973 ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS,
4974 VIRTIO_NET_CTRL_STATS_GET,
4975 out: &sgs_out, in: &sgs_in);
4976
4977 if (!ok)
4978 return ok;
4979
4980 for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) {
4981 hdr = p;
4982 qid = le16_to_cpu(hdr->vq_index);
4983 virtnet_fill_stats(vi, qid, ctx, base: p, drv_stats: false, reply_type: hdr->type);
4984 }
4985
4986 return 0;
4987}
4988
4989static void virtnet_make_stat_req(struct virtnet_info *vi,
4990 struct virtnet_stats_ctx *ctx,
4991 struct virtio_net_ctrl_queue_stats *req,
4992 int qid, int *idx)
4993{
4994 int qtype = vq_type(vi, qid);
4995 u64 bitmap = ctx->bitmap[qtype];
4996
4997 if (!bitmap)
4998 return;
4999
5000 req->stats[*idx].vq_index = cpu_to_le16(qid);
5001 req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap);
5002 *idx += 1;
5003}
5004
5005/* qid: -1: get stats of all vq.
5006 * > 0: get the stats for the special vq. This must not be cvq.
5007 */
5008static int virtnet_get_hw_stats(struct virtnet_info *vi,
5009 struct virtnet_stats_ctx *ctx, int qid)
5010{
5011 int qnum, i, j, res_size, qtype, last_vq, first_vq;
5012 struct virtio_net_ctrl_queue_stats *req;
5013 bool enable_cvq;
5014 void *reply;
5015 int ok;
5016
5017 if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_DEVICE_STATS))
5018 return 0;
5019
5020 if (qid == -1) {
5021 last_vq = vi->curr_queue_pairs * 2 - 1;
5022 first_vq = 0;
5023 enable_cvq = true;
5024 } else {
5025 last_vq = qid;
5026 first_vq = qid;
5027 enable_cvq = false;
5028 }
5029
5030 qnum = 0;
5031 res_size = 0;
5032 for (i = first_vq; i <= last_vq ; ++i) {
5033 qtype = vq_type(vi, qid: i);
5034 if (ctx->bitmap[qtype]) {
5035 ++qnum;
5036 res_size += ctx->size[qtype];
5037 }
5038 }
5039
5040 if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) {
5041 res_size += ctx->size[VIRTNET_Q_TYPE_CQ];
5042 qnum += 1;
5043 }
5044
5045 req = kcalloc(qnum, sizeof(*req), GFP_KERNEL);
5046 if (!req)
5047 return -ENOMEM;
5048
5049 reply = kmalloc(res_size, GFP_KERNEL);
5050 if (!reply) {
5051 kfree(objp: req);
5052 return -ENOMEM;
5053 }
5054
5055 j = 0;
5056 for (i = first_vq; i <= last_vq ; ++i)
5057 virtnet_make_stat_req(vi, ctx, req, qid: i, idx: &j);
5058
5059 if (enable_cvq)
5060 virtnet_make_stat_req(vi, ctx, req, qid: vi->max_queue_pairs * 2, idx: &j);
5061
5062 ok = __virtnet_get_hw_stats(vi, ctx, req, req_size: sizeof(*req) * j, reply, res_size);
5063
5064 kfree(objp: req);
5065 kfree(objp: reply);
5066
5067 return ok;
5068}
5069
5070static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
5071{
5072 struct virtnet_info *vi = netdev_priv(dev);
5073 unsigned int i;
5074 u8 *p = data;
5075
5076 switch (stringset) {
5077 case ETH_SS_STATS:
5078 /* Generate the total field names. */
5079 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, qid: -1, data: &p);
5080 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, qid: -1, data: &p);
5081
5082 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, qid: 0, data: &p);
5083
5084 for (i = 0; i < vi->curr_queue_pairs; ++i)
5085 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, qid: i, data: &p);
5086
5087 for (i = 0; i < vi->curr_queue_pairs; ++i)
5088 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, qid: i, data: &p);
5089 break;
5090 }
5091}
5092
5093static int virtnet_get_sset_count(struct net_device *dev, int sset)
5094{
5095 struct virtnet_info *vi = netdev_priv(dev);
5096 struct virtnet_stats_ctx ctx = {0};
5097 u32 pair_count;
5098
5099 switch (sset) {
5100 case ETH_SS_STATS:
5101 virtnet_stats_ctx_init(vi, ctx: &ctx, NULL, to_qstat: false);
5102
5103 pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX];
5104
5105 return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] +
5106 vi->curr_queue_pairs * pair_count;
5107 default:
5108 return -EOPNOTSUPP;
5109 }
5110}
5111
5112static void virtnet_get_ethtool_stats(struct net_device *dev,
5113 struct ethtool_stats *stats, u64 *data)
5114{
5115 struct virtnet_info *vi = netdev_priv(dev);
5116 struct virtnet_stats_ctx ctx = {0};
5117 unsigned int start, i;
5118 const u8 *stats_base;
5119
5120 virtnet_stats_ctx_init(vi, ctx: &ctx, data, to_qstat: false);
5121 if (virtnet_get_hw_stats(vi, ctx: &ctx, qid: -1))
5122 dev_warn(&vi->dev->dev, "Failed to get hw stats.\n");
5123
5124 for (i = 0; i < vi->curr_queue_pairs; i++) {
5125 struct receive_queue *rq = &vi->rq[i];
5126 struct send_queue *sq = &vi->sq[i];
5127
5128 stats_base = (const u8 *)&rq->stats;
5129 do {
5130 start = u64_stats_fetch_begin(syncp: &rq->stats.syncp);
5131 virtnet_fill_stats(vi, qid: i * 2, ctx: &ctx, base: stats_base, drv_stats: true, reply_type: 0);
5132 } while (u64_stats_fetch_retry(syncp: &rq->stats.syncp, start));
5133
5134 stats_base = (const u8 *)&sq->stats;
5135 do {
5136 start = u64_stats_fetch_begin(syncp: &sq->stats.syncp);
5137 virtnet_fill_stats(vi, qid: i * 2 + 1, ctx: &ctx, base: stats_base, drv_stats: true, reply_type: 0);
5138 } while (u64_stats_fetch_retry(syncp: &sq->stats.syncp, start));
5139 }
5140
5141 virtnet_fill_total_fields(vi, ctx: &ctx);
5142}
5143
5144static void virtnet_get_channels(struct net_device *dev,
5145 struct ethtool_channels *channels)
5146{
5147 struct virtnet_info *vi = netdev_priv(dev);
5148
5149 channels->combined_count = vi->curr_queue_pairs;
5150 channels->max_combined = vi->max_queue_pairs;
5151 channels->max_other = 0;
5152 channels->rx_count = 0;
5153 channels->tx_count = 0;
5154 channels->other_count = 0;
5155}
5156
5157static int virtnet_set_link_ksettings(struct net_device *dev,
5158 const struct ethtool_link_ksettings *cmd)
5159{
5160 struct virtnet_info *vi = netdev_priv(dev);
5161
5162 return ethtool_virtdev_set_link_ksettings(dev, cmd,
5163 dev_speed: &vi->speed, dev_duplex: &vi->duplex);
5164}
5165
5166static int virtnet_get_link_ksettings(struct net_device *dev,
5167 struct ethtool_link_ksettings *cmd)
5168{
5169 struct virtnet_info *vi = netdev_priv(dev);
5170
5171 cmd->base.speed = vi->speed;
5172 cmd->base.duplex = vi->duplex;
5173 cmd->base.port = PORT_OTHER;
5174
5175 return 0;
5176}
5177
5178static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi,
5179 struct ethtool_coalesce *ec)
5180{
5181 struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL;
5182 struct scatterlist sgs_tx;
5183 int i;
5184
5185 coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL);
5186 if (!coal_tx)
5187 return -ENOMEM;
5188
5189 coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
5190 coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
5191 sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx));
5192
5193 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
5194 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
5195 out: &sgs_tx))
5196 return -EINVAL;
5197
5198 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs;
5199 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames;
5200 for (i = 0; i < vi->max_queue_pairs; i++) {
5201 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs;
5202 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames;
5203 }
5204
5205 return 0;
5206}
5207
5208static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi,
5209 struct ethtool_coalesce *ec)
5210{
5211 struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL;
5212 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce;
5213 struct scatterlist sgs_rx;
5214 int i;
5215
5216 if (rx_ctrl_dim_on && !virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
5217 return -EOPNOTSUPP;
5218
5219 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs ||
5220 ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets))
5221 return -EINVAL;
5222
5223 if (rx_ctrl_dim_on && !vi->rx_dim_enabled) {
5224 vi->rx_dim_enabled = true;
5225 for (i = 0; i < vi->max_queue_pairs; i++) {
5226 mutex_lock(lock: &vi->rq[i].dim_lock);
5227 vi->rq[i].dim_enabled = true;
5228 mutex_unlock(lock: &vi->rq[i].dim_lock);
5229 }
5230 return 0;
5231 }
5232
5233 coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL);
5234 if (!coal_rx)
5235 return -ENOMEM;
5236
5237 if (!rx_ctrl_dim_on && vi->rx_dim_enabled) {
5238 vi->rx_dim_enabled = false;
5239 for (i = 0; i < vi->max_queue_pairs; i++) {
5240 mutex_lock(lock: &vi->rq[i].dim_lock);
5241 vi->rq[i].dim_enabled = false;
5242 mutex_unlock(lock: &vi->rq[i].dim_lock);
5243 }
5244 }
5245
5246 /* Since the per-queue coalescing params can be set,
5247 * we need apply the global new params even if they
5248 * are not updated.
5249 */
5250 coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
5251 coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
5252 sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx));
5253
5254 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
5255 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
5256 out: &sgs_rx))
5257 return -EINVAL;
5258
5259 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs;
5260 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames;
5261 for (i = 0; i < vi->max_queue_pairs; i++) {
5262 mutex_lock(lock: &vi->rq[i].dim_lock);
5263 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs;
5264 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames;
5265 mutex_unlock(lock: &vi->rq[i].dim_lock);
5266 }
5267
5268 return 0;
5269}
5270
5271static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
5272 struct ethtool_coalesce *ec)
5273{
5274 int err;
5275
5276 err = virtnet_send_tx_notf_coal_cmds(vi, ec);
5277 if (err)
5278 return err;
5279
5280 err = virtnet_send_rx_notf_coal_cmds(vi, ec);
5281 if (err)
5282 return err;
5283
5284 return 0;
5285}
5286
5287static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi,
5288 struct ethtool_coalesce *ec,
5289 u16 queue)
5290{
5291 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce;
5292 u32 max_usecs, max_packets;
5293 bool cur_rx_dim;
5294 int err;
5295
5296 mutex_lock(lock: &vi->rq[queue].dim_lock);
5297 cur_rx_dim = vi->rq[queue].dim_enabled;
5298 max_usecs = vi->rq[queue].intr_coal.max_usecs;
5299 max_packets = vi->rq[queue].intr_coal.max_packets;
5300
5301 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs ||
5302 ec->rx_max_coalesced_frames != max_packets)) {
5303 mutex_unlock(lock: &vi->rq[queue].dim_lock);
5304 return -EINVAL;
5305 }
5306
5307 if (rx_ctrl_dim_on && !cur_rx_dim) {
5308 vi->rq[queue].dim_enabled = true;
5309 mutex_unlock(lock: &vi->rq[queue].dim_lock);
5310 return 0;
5311 }
5312
5313 if (!rx_ctrl_dim_on && cur_rx_dim)
5314 vi->rq[queue].dim_enabled = false;
5315
5316 /* If no params are updated, userspace ethtool will
5317 * reject the modification.
5318 */
5319 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue,
5320 max_usecs: ec->rx_coalesce_usecs,
5321 max_packets: ec->rx_max_coalesced_frames);
5322 mutex_unlock(lock: &vi->rq[queue].dim_lock);
5323 return err;
5324}
5325
5326static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi,
5327 struct ethtool_coalesce *ec,
5328 u16 queue)
5329{
5330 int err;
5331
5332 err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue);
5333 if (err)
5334 return err;
5335
5336 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue,
5337 max_usecs: ec->tx_coalesce_usecs,
5338 max_packets: ec->tx_max_coalesced_frames);
5339 if (err)
5340 return err;
5341
5342 return 0;
5343}
5344
5345static void virtnet_rx_dim_work(struct work_struct *work)
5346{
5347 struct dim *dim = container_of(work, struct dim, work);
5348 struct receive_queue *rq = container_of(dim,
5349 struct receive_queue, dim);
5350 struct virtnet_info *vi = rq->vq->vdev->priv;
5351 struct net_device *dev = vi->dev;
5352 struct dim_cq_moder update_moder;
5353 int qnum, err;
5354
5355 qnum = rq - vi->rq;
5356
5357 mutex_lock(lock: &rq->dim_lock);
5358 if (!rq->dim_enabled)
5359 goto out;
5360
5361 update_moder = net_dim_get_rx_irq_moder(dev, dim);
5362 if (update_moder.usec != rq->intr_coal.max_usecs ||
5363 update_moder.pkts != rq->intr_coal.max_packets) {
5364 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue: qnum,
5365 max_usecs: update_moder.usec,
5366 max_packets: update_moder.pkts);
5367 if (err)
5368 pr_debug("%s: Failed to send dim parameters on rxq%d\n",
5369 dev->name, qnum);
5370 }
5371out:
5372 dim->state = DIM_START_MEASURE;
5373 mutex_unlock(lock: &rq->dim_lock);
5374}
5375
5376static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
5377{
5378 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
5379 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated.
5380 */
5381 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
5382 return -EOPNOTSUPP;
5383
5384 if (ec->tx_max_coalesced_frames > 1 ||
5385 ec->rx_max_coalesced_frames != 1)
5386 return -EINVAL;
5387
5388 return 0;
5389}
5390
5391static int virtnet_should_update_vq_weight(int dev_flags, int weight,
5392 int vq_weight, bool *should_update)
5393{
5394 if (weight ^ vq_weight) {
5395 if (dev_flags & IFF_UP)
5396 return -EBUSY;
5397 *should_update = true;
5398 }
5399
5400 return 0;
5401}
5402
5403static int virtnet_set_coalesce(struct net_device *dev,
5404 struct ethtool_coalesce *ec,
5405 struct kernel_ethtool_coalesce *kernel_coal,
5406 struct netlink_ext_ack *extack)
5407{
5408 struct virtnet_info *vi = netdev_priv(dev);
5409 int ret, queue_number, napi_weight, i;
5410 bool update_napi = false;
5411
5412 /* Can't change NAPI weight if the link is up */
5413 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
5414 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) {
5415 ret = virtnet_should_update_vq_weight(dev_flags: dev->flags, weight: napi_weight,
5416 vq_weight: vi->sq[queue_number].napi.weight,
5417 should_update: &update_napi);
5418 if (ret)
5419 return ret;
5420
5421 if (update_napi) {
5422 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be
5423 * updated for the sake of simplicity, which might not be necessary
5424 */
5425 break;
5426 }
5427 }
5428
5429 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_NOTF_COAL))
5430 ret = virtnet_send_notf_coal_cmds(vi, ec);
5431 else
5432 ret = virtnet_coal_params_supported(ec);
5433
5434 if (ret)
5435 return ret;
5436
5437 if (update_napi) {
5438 /* xsk xmit depends on the tx napi. So if xsk is active,
5439 * prevent modifications to tx napi.
5440 */
5441 for (i = queue_number; i < vi->max_queue_pairs; i++) {
5442 if (vi->sq[i].xsk_pool)
5443 return -EBUSY;
5444 }
5445
5446 for (; queue_number < vi->max_queue_pairs; queue_number++)
5447 vi->sq[queue_number].napi.weight = napi_weight;
5448 }
5449
5450 return ret;
5451}
5452
5453static int virtnet_get_coalesce(struct net_device *dev,
5454 struct ethtool_coalesce *ec,
5455 struct kernel_ethtool_coalesce *kernel_coal,
5456 struct netlink_ext_ack *extack)
5457{
5458 struct virtnet_info *vi = netdev_priv(dev);
5459
5460 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
5461 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs;
5462 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs;
5463 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets;
5464 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets;
5465 ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled;
5466 } else {
5467 ec->rx_max_coalesced_frames = 1;
5468
5469 if (vi->sq[0].napi.weight)
5470 ec->tx_max_coalesced_frames = 1;
5471 }
5472
5473 return 0;
5474}
5475
5476static int virtnet_set_per_queue_coalesce(struct net_device *dev,
5477 u32 queue,
5478 struct ethtool_coalesce *ec)
5479{
5480 struct virtnet_info *vi = netdev_priv(dev);
5481 int ret, napi_weight;
5482 bool update_napi = false;
5483
5484 if (queue >= vi->max_queue_pairs)
5485 return -EINVAL;
5486
5487 /* Can't change NAPI weight if the link is up */
5488 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
5489 ret = virtnet_should_update_vq_weight(dev_flags: dev->flags, weight: napi_weight,
5490 vq_weight: vi->sq[queue].napi.weight,
5491 should_update: &update_napi);
5492 if (ret)
5493 return ret;
5494
5495 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
5496 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue);
5497 else
5498 ret = virtnet_coal_params_supported(ec);
5499
5500 if (ret)
5501 return ret;
5502
5503 if (update_napi)
5504 vi->sq[queue].napi.weight = napi_weight;
5505
5506 return 0;
5507}
5508
5509static int virtnet_get_per_queue_coalesce(struct net_device *dev,
5510 u32 queue,
5511 struct ethtool_coalesce *ec)
5512{
5513 struct virtnet_info *vi = netdev_priv(dev);
5514
5515 if (queue >= vi->max_queue_pairs)
5516 return -EINVAL;
5517
5518 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) {
5519 mutex_lock(lock: &vi->rq[queue].dim_lock);
5520 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs;
5521 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs;
5522 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets;
5523 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets;
5524 ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled;
5525 mutex_unlock(lock: &vi->rq[queue].dim_lock);
5526 } else {
5527 ec->rx_max_coalesced_frames = 1;
5528
5529 if (vi->sq[queue].napi.weight)
5530 ec->tx_max_coalesced_frames = 1;
5531 }
5532
5533 return 0;
5534}
5535
5536static void virtnet_init_settings(struct net_device *dev)
5537{
5538 struct virtnet_info *vi = netdev_priv(dev);
5539
5540 vi->speed = SPEED_UNKNOWN;
5541 vi->duplex = DUPLEX_UNKNOWN;
5542}
5543
5544static u32 virtnet_get_rxfh_key_size(struct net_device *dev)
5545{
5546 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size;
5547}
5548
5549static u32 virtnet_get_rxfh_indir_size(struct net_device *dev)
5550{
5551 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size;
5552}
5553
5554static int virtnet_get_rxfh(struct net_device *dev,
5555 struct ethtool_rxfh_param *rxfh)
5556{
5557 struct virtnet_info *vi = netdev_priv(dev);
5558 int i;
5559
5560 if (rxfh->indir) {
5561 for (i = 0; i < vi->rss_indir_table_size; ++i)
5562 rxfh->indir[i] = le16_to_cpu(vi->rss_hdr->indirection_table[i]);
5563 }
5564
5565 if (rxfh->key)
5566 memcpy(to: rxfh->key, from: vi->rss_hash_key_data, len: vi->rss_key_size);
5567
5568 rxfh->hfunc = ETH_RSS_HASH_TOP;
5569
5570 return 0;
5571}
5572
5573static int virtnet_set_rxfh(struct net_device *dev,
5574 struct ethtool_rxfh_param *rxfh,
5575 struct netlink_ext_ack *extack)
5576{
5577 struct virtnet_info *vi = netdev_priv(dev);
5578 bool update = false;
5579 int i;
5580
5581 if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
5582 rxfh->hfunc != ETH_RSS_HASH_TOP)
5583 return -EOPNOTSUPP;
5584
5585 if (rxfh->indir) {
5586 if (!vi->has_rss)
5587 return -EOPNOTSUPP;
5588
5589 for (i = 0; i < vi->rss_indir_table_size; ++i)
5590 vi->rss_hdr->indirection_table[i] = cpu_to_le16(rxfh->indir[i]);
5591 update = true;
5592 }
5593
5594 if (rxfh->key) {
5595 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the
5596 * device provides hash calculation capabilities, that is,
5597 * hash_key is configured.
5598 */
5599 if (!vi->has_rss && !vi->has_rss_hash_report)
5600 return -EOPNOTSUPP;
5601
5602 memcpy(to: vi->rss_hash_key_data, from: rxfh->key, len: vi->rss_key_size);
5603 update = true;
5604 }
5605
5606 if (update)
5607 virtnet_commit_rss_command(vi);
5608
5609 return 0;
5610}
5611
5612static u32 virtnet_get_rx_ring_count(struct net_device *dev)
5613{
5614 struct virtnet_info *vi = netdev_priv(dev);
5615
5616 return vi->curr_queue_pairs;
5617}
5618
5619static const struct ethtool_ops virtnet_ethtool_ops = {
5620 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
5621 ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
5622 .get_drvinfo = virtnet_get_drvinfo,
5623 .get_link = ethtool_op_get_link,
5624 .get_ringparam = virtnet_get_ringparam,
5625 .set_ringparam = virtnet_set_ringparam,
5626 .get_strings = virtnet_get_strings,
5627 .get_sset_count = virtnet_get_sset_count,
5628 .get_ethtool_stats = virtnet_get_ethtool_stats,
5629 .set_channels = virtnet_set_channels,
5630 .get_channels = virtnet_get_channels,
5631 .get_ts_info = ethtool_op_get_ts_info,
5632 .get_link_ksettings = virtnet_get_link_ksettings,
5633 .set_link_ksettings = virtnet_set_link_ksettings,
5634 .set_coalesce = virtnet_set_coalesce,
5635 .get_coalesce = virtnet_get_coalesce,
5636 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce,
5637 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce,
5638 .get_rxfh_key_size = virtnet_get_rxfh_key_size,
5639 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size,
5640 .get_rxfh = virtnet_get_rxfh,
5641 .set_rxfh = virtnet_set_rxfh,
5642 .get_rxfh_fields = virtnet_get_hashflow,
5643 .set_rxfh_fields = virtnet_set_hashflow,
5644 .get_rx_ring_count = virtnet_get_rx_ring_count,
5645};
5646
5647static void virtnet_get_queue_stats_rx(struct net_device *dev, int i,
5648 struct netdev_queue_stats_rx *stats)
5649{
5650 struct virtnet_info *vi = netdev_priv(dev);
5651 struct receive_queue *rq = &vi->rq[i];
5652 struct virtnet_stats_ctx ctx = {0};
5653
5654 virtnet_stats_ctx_init(vi, ctx: &ctx, data: (void *)stats, to_qstat: true);
5655
5656 virtnet_get_hw_stats(vi, ctx: &ctx, qid: i * 2);
5657 virtnet_fill_stats(vi, qid: i * 2, ctx: &ctx, base: (void *)&rq->stats, drv_stats: true, reply_type: 0);
5658}
5659
5660static void virtnet_get_queue_stats_tx(struct net_device *dev, int i,
5661 struct netdev_queue_stats_tx *stats)
5662{
5663 struct virtnet_info *vi = netdev_priv(dev);
5664 struct send_queue *sq = &vi->sq[i];
5665 struct virtnet_stats_ctx ctx = {0};
5666
5667 virtnet_stats_ctx_init(vi, ctx: &ctx, data: (void *)stats, to_qstat: true);
5668
5669 virtnet_get_hw_stats(vi, ctx: &ctx, qid: i * 2 + 1);
5670 virtnet_fill_stats(vi, qid: i * 2 + 1, ctx: &ctx, base: (void *)&sq->stats, drv_stats: true, reply_type: 0);
5671}
5672
5673static void virtnet_get_base_stats(struct net_device *dev,
5674 struct netdev_queue_stats_rx *rx,
5675 struct netdev_queue_stats_tx *tx)
5676{
5677 struct virtnet_info *vi = netdev_priv(dev);
5678
5679 /* The queue stats of the virtio-net will not be reset. So here we
5680 * return 0.
5681 */
5682 rx->bytes = 0;
5683 rx->packets = 0;
5684
5685 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
5686 rx->hw_drops = 0;
5687 rx->hw_drop_overruns = 0;
5688 }
5689
5690 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
5691 rx->csum_unnecessary = 0;
5692 rx->csum_none = 0;
5693 rx->csum_bad = 0;
5694 }
5695
5696 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) {
5697 rx->hw_gro_packets = 0;
5698 rx->hw_gro_bytes = 0;
5699 rx->hw_gro_wire_packets = 0;
5700 rx->hw_gro_wire_bytes = 0;
5701 }
5702
5703 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED)
5704 rx->hw_drop_ratelimits = 0;
5705
5706 tx->bytes = 0;
5707 tx->packets = 0;
5708 tx->stop = 0;
5709 tx->wake = 0;
5710
5711 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
5712 tx->hw_drops = 0;
5713 tx->hw_drop_errors = 0;
5714 }
5715
5716 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) {
5717 tx->csum_none = 0;
5718 tx->needs_csum = 0;
5719 }
5720
5721 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
5722 tx->hw_gso_packets = 0;
5723 tx->hw_gso_bytes = 0;
5724 tx->hw_gso_wire_packets = 0;
5725 tx->hw_gso_wire_bytes = 0;
5726 }
5727
5728 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED)
5729 tx->hw_drop_ratelimits = 0;
5730
5731 netdev_stat_queue_sum(netdev: dev,
5732 rx_start: dev->real_num_rx_queues, rx_end: vi->max_queue_pairs, rx_sum: rx,
5733 tx_start: dev->real_num_tx_queues, tx_end: vi->max_queue_pairs, tx_sum: tx);
5734}
5735
5736static const struct netdev_stat_ops virtnet_stat_ops = {
5737 .get_queue_stats_rx = virtnet_get_queue_stats_rx,
5738 .get_queue_stats_tx = virtnet_get_queue_stats_tx,
5739 .get_base_stats = virtnet_get_base_stats,
5740};
5741
5742static void virtnet_freeze_down(struct virtio_device *vdev)
5743{
5744 struct virtnet_info *vi = vdev->priv;
5745
5746 /* Make sure no work handler is accessing the device */
5747 flush_work(work: &vi->config_work);
5748 disable_rx_mode_work(vi);
5749 flush_work(work: &vi->rx_mode_work);
5750
5751 if (netif_running(dev: vi->dev)) {
5752 rtnl_lock();
5753 virtnet_close(dev: vi->dev);
5754 rtnl_unlock();
5755 }
5756
5757 netif_tx_lock_bh(dev: vi->dev);
5758 netif_device_detach(dev: vi->dev);
5759 netif_tx_unlock_bh(dev: vi->dev);
5760}
5761
5762static int init_vqs(struct virtnet_info *vi);
5763
5764static int virtnet_restore_up(struct virtio_device *vdev)
5765{
5766 struct virtnet_info *vi = vdev->priv;
5767 int err;
5768
5769 err = init_vqs(vi);
5770 if (err)
5771 return err;
5772
5773 virtio_device_ready(dev: vdev);
5774
5775 enable_delayed_refill(vi);
5776 enable_rx_mode_work(vi);
5777
5778 if (netif_running(dev: vi->dev)) {
5779 rtnl_lock();
5780 err = virtnet_open(dev: vi->dev);
5781 rtnl_unlock();
5782 if (err)
5783 return err;
5784 }
5785
5786 netif_tx_lock_bh(dev: vi->dev);
5787 netif_device_attach(dev: vi->dev);
5788 netif_tx_unlock_bh(dev: vi->dev);
5789 return err;
5790}
5791
5792static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
5793{
5794 __virtio64 *_offloads __free(kfree) = NULL;
5795 struct scatterlist sg;
5796
5797 _offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL);
5798 if (!_offloads)
5799 return -ENOMEM;
5800
5801 *_offloads = cpu_to_virtio64(vdev: vi->vdev, val: offloads);
5802
5803 sg_init_one(&sg, _offloads, sizeof(*_offloads));
5804
5805 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
5806 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, out: &sg)) {
5807 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n");
5808 return -EINVAL;
5809 }
5810
5811 return 0;
5812}
5813
5814static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
5815{
5816 u64 offloads = 0;
5817
5818 if (!vi->guest_offloads)
5819 return 0;
5820
5821 return virtnet_set_guest_offloads(vi, offloads);
5822}
5823
5824static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
5825{
5826 u64 offloads = vi->guest_offloads;
5827
5828 if (!vi->guest_offloads)
5829 return 0;
5830
5831 return virtnet_set_guest_offloads(vi, offloads);
5832}
5833
5834static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq,
5835 struct xsk_buff_pool *pool)
5836{
5837 int err, qindex;
5838
5839 qindex = rq - vi->rq;
5840
5841 if (pool) {
5842 err = xdp_rxq_info_reg(xdp_rxq: &rq->xsk_rxq_info, dev: vi->dev, queue_index: qindex, napi_id: rq->napi.napi_id);
5843 if (err < 0)
5844 return err;
5845
5846 err = xdp_rxq_info_reg_mem_model(xdp_rxq: &rq->xsk_rxq_info,
5847 type: MEM_TYPE_XSK_BUFF_POOL, NULL);
5848 if (err < 0)
5849 goto unreg;
5850
5851 xsk_pool_set_rxq_info(pool, rxq: &rq->xsk_rxq_info);
5852 }
5853
5854 virtnet_rx_pause(vi, rq);
5855
5856 err = virtqueue_reset(vq: rq->vq, recycle: virtnet_rq_unmap_free_buf, NULL);
5857 if (err) {
5858 netdev_err(dev: vi->dev, format: "reset rx fail: rx queue index: %d err: %d\n", qindex, err);
5859
5860 pool = NULL;
5861 }
5862
5863 rq->xsk_pool = pool;
5864
5865 virtnet_rx_resume(vi, rq);
5866
5867 if (pool)
5868 return 0;
5869
5870unreg:
5871 xdp_rxq_info_unreg(xdp_rxq: &rq->xsk_rxq_info);
5872 return err;
5873}
5874
5875static int virtnet_sq_bind_xsk_pool(struct virtnet_info *vi,
5876 struct send_queue *sq,
5877 struct xsk_buff_pool *pool)
5878{
5879 int err, qindex;
5880
5881 qindex = sq - vi->sq;
5882
5883 virtnet_tx_pause(vi, sq);
5884
5885 err = virtqueue_reset(vq: sq->vq, recycle: virtnet_sq_free_unused_buf,
5886 recycle_done: virtnet_sq_free_unused_buf_done);
5887 if (err) {
5888 netdev_err(dev: vi->dev, format: "reset tx fail: tx queue index: %d err: %d\n", qindex, err);
5889 pool = NULL;
5890 }
5891
5892 sq->xsk_pool = pool;
5893
5894 virtnet_tx_resume(vi, sq);
5895
5896 return err;
5897}
5898
5899static int virtnet_xsk_pool_enable(struct net_device *dev,
5900 struct xsk_buff_pool *pool,
5901 u16 qid)
5902{
5903 struct virtnet_info *vi = netdev_priv(dev);
5904 struct receive_queue *rq;
5905 struct device *dma_dev;
5906 struct send_queue *sq;
5907 dma_addr_t hdr_dma;
5908 int err, size;
5909
5910 if (vi->hdr_len > xsk_pool_get_headroom(pool))
5911 return -EINVAL;
5912
5913 /* In big_packets mode, xdp cannot work, so there is no need to
5914 * initialize xsk of rq.
5915 */
5916 if (vi->big_packets && !vi->mergeable_rx_bufs)
5917 return -ENOENT;
5918
5919 if (qid >= vi->curr_queue_pairs)
5920 return -EINVAL;
5921
5922 sq = &vi->sq[qid];
5923 rq = &vi->rq[qid];
5924
5925 /* xsk assumes that tx and rx must have the same dma device. The af-xdp
5926 * may use one buffer to receive from the rx and reuse this buffer to
5927 * send by the tx. So the dma dev of sq and rq must be the same one.
5928 *
5929 * But vq->dma_dev allows every vq has the respective dma dev. So I
5930 * check the dma dev of vq and sq is the same dev.
5931 */
5932 if (virtqueue_dma_dev(vq: rq->vq) != virtqueue_dma_dev(vq: sq->vq))
5933 return -EINVAL;
5934
5935 dma_dev = virtqueue_dma_dev(vq: rq->vq);
5936 if (!dma_dev)
5937 return -EINVAL;
5938
5939 size = virtqueue_get_vring_size(vq: rq->vq);
5940
5941 rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL);
5942 if (!rq->xsk_buffs)
5943 return -ENOMEM;
5944
5945 hdr_dma = virtqueue_map_single_attrs(vq: sq->vq, ptr: &xsk_hdr, size: vi->hdr_len,
5946 dir: DMA_TO_DEVICE, attrs: 0);
5947 if (virtqueue_map_mapping_error(vq: sq->vq, addr: hdr_dma)) {
5948 err = -ENOMEM;
5949 goto err_free_buffs;
5950 }
5951
5952 err = xsk_pool_dma_map(pool, dev: dma_dev, attrs: 0);
5953 if (err)
5954 goto err_xsk_map;
5955
5956 err = virtnet_rq_bind_xsk_pool(vi, rq, pool);
5957 if (err)
5958 goto err_rq;
5959
5960 err = virtnet_sq_bind_xsk_pool(vi, sq, pool);
5961 if (err)
5962 goto err_sq;
5963
5964 /* Now, we do not support tx offload(such as tx csum), so all the tx
5965 * virtnet hdr is zero. So all the tx packets can share a single hdr.
5966 */
5967 sq->xsk_hdr_dma_addr = hdr_dma;
5968
5969 return 0;
5970
5971err_sq:
5972 virtnet_rq_bind_xsk_pool(vi, rq, NULL);
5973err_rq:
5974 xsk_pool_dma_unmap(pool, attrs: 0);
5975err_xsk_map:
5976 virtqueue_unmap_single_attrs(vq: rq->vq, addr: hdr_dma, size: vi->hdr_len,
5977 dir: DMA_TO_DEVICE, attrs: 0);
5978err_free_buffs:
5979 kvfree(addr: rq->xsk_buffs);
5980 return err;
5981}
5982
5983static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid)
5984{
5985 struct virtnet_info *vi = netdev_priv(dev);
5986 struct xsk_buff_pool *pool;
5987 struct receive_queue *rq;
5988 struct send_queue *sq;
5989 int err;
5990
5991 if (qid >= vi->curr_queue_pairs)
5992 return -EINVAL;
5993
5994 sq = &vi->sq[qid];
5995 rq = &vi->rq[qid];
5996
5997 pool = rq->xsk_pool;
5998
5999 err = virtnet_rq_bind_xsk_pool(vi, rq, NULL);
6000 err |= virtnet_sq_bind_xsk_pool(vi, sq, NULL);
6001
6002 xsk_pool_dma_unmap(pool, attrs: 0);
6003
6004 virtqueue_unmap_single_attrs(vq: sq->vq, addr: sq->xsk_hdr_dma_addr,
6005 size: vi->hdr_len, dir: DMA_TO_DEVICE, attrs: 0);
6006 kvfree(addr: rq->xsk_buffs);
6007
6008 return err;
6009}
6010
6011static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp)
6012{
6013 if (xdp->xsk.pool)
6014 return virtnet_xsk_pool_enable(dev, pool: xdp->xsk.pool,
6015 qid: xdp->xsk.queue_id);
6016 else
6017 return virtnet_xsk_pool_disable(dev, qid: xdp->xsk.queue_id);
6018}
6019
6020static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
6021 struct netlink_ext_ack *extack)
6022{
6023 unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM +
6024 sizeof(struct skb_shared_info));
6025 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN;
6026 struct virtnet_info *vi = netdev_priv(dev);
6027 struct bpf_prog *old_prog;
6028 u16 xdp_qp = 0, curr_qp;
6029 int i, err;
6030
6031 if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
6032 && (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
6033 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
6034 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
6035 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
6036 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_CSUM) ||
6037 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO4) ||
6038 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO6))) {
6039 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
6040 return -EOPNOTSUPP;
6041 }
6042
6043 if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
6044 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required");
6045 return -EINVAL;
6046 }
6047
6048 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) {
6049 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags");
6050 netdev_warn(dev, format: "single-buffer XDP requires MTU less than %u\n", max_sz);
6051 return -EINVAL;
6052 }
6053
6054 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
6055 if (prog)
6056 xdp_qp = nr_cpu_ids;
6057
6058 /* XDP requires extra queues for XDP_TX */
6059 if (curr_qp + xdp_qp > vi->max_queue_pairs) {
6060 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
6061 curr_qp + xdp_qp, vi->max_queue_pairs);
6062 xdp_qp = 0;
6063 }
6064
6065 old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
6066 if (!prog && !old_prog)
6067 return 0;
6068
6069 if (prog)
6070 bpf_prog_add(prog, i: vi->max_queue_pairs - 1);
6071
6072 virtnet_rx_pause_all(vi);
6073
6074 /* Make sure NAPI is not using any XDP TX queues for RX. */
6075 if (netif_running(dev)) {
6076 for (i = 0; i < vi->max_queue_pairs; i++)
6077 virtnet_napi_tx_disable(sq: &vi->sq[i]);
6078 }
6079
6080 if (!prog) {
6081 for (i = 0; i < vi->max_queue_pairs; i++) {
6082 rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
6083 if (i == 0)
6084 virtnet_restore_guest_offloads(vi);
6085 }
6086 synchronize_net();
6087 }
6088
6089 err = virtnet_set_queues(vi, queue_pairs: curr_qp + xdp_qp);
6090 if (err)
6091 goto err;
6092 netif_set_real_num_rx_queues(dev, rxq: curr_qp + xdp_qp);
6093 vi->xdp_queue_pairs = xdp_qp;
6094
6095 if (prog) {
6096 vi->xdp_enabled = true;
6097 for (i = 0; i < vi->max_queue_pairs; i++) {
6098 rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
6099 if (i == 0 && !old_prog)
6100 virtnet_clear_guest_offloads(vi);
6101 }
6102 if (!old_prog)
6103 xdp_features_set_redirect_target(dev, support_sg: true);
6104 } else {
6105 xdp_features_clear_redirect_target(dev);
6106 vi->xdp_enabled = false;
6107 }
6108
6109 virtnet_rx_resume_all(vi);
6110 for (i = 0; i < vi->max_queue_pairs; i++) {
6111 if (old_prog)
6112 bpf_prog_put(prog: old_prog);
6113 if (netif_running(dev))
6114 virtnet_napi_tx_enable(sq: &vi->sq[i]);
6115 }
6116
6117 return 0;
6118
6119err:
6120 if (!prog) {
6121 virtnet_clear_guest_offloads(vi);
6122 for (i = 0; i < vi->max_queue_pairs; i++)
6123 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
6124 }
6125
6126 virtnet_rx_resume_all(vi);
6127 if (netif_running(dev)) {
6128 for (i = 0; i < vi->max_queue_pairs; i++)
6129 virtnet_napi_tx_enable(sq: &vi->sq[i]);
6130 }
6131 if (prog)
6132 bpf_prog_sub(prog, i: vi->max_queue_pairs - 1);
6133 return err;
6134}
6135
6136static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
6137{
6138 switch (xdp->command) {
6139 case XDP_SETUP_PROG:
6140 return virtnet_xdp_set(dev, prog: xdp->prog, extack: xdp->extack);
6141 case XDP_SETUP_XSK_POOL:
6142 return virtnet_xsk_pool_setup(dev, xdp);
6143 default:
6144 return -EINVAL;
6145 }
6146}
6147
6148static int virtnet_get_phys_port_name(struct net_device *dev, char *buf,
6149 size_t len)
6150{
6151 struct virtnet_info *vi = netdev_priv(dev);
6152 int ret;
6153
6154 if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STANDBY))
6155 return -EOPNOTSUPP;
6156
6157 ret = snprintf(buf, size: len, fmt: "sby");
6158 if (ret >= len)
6159 return -EOPNOTSUPP;
6160
6161 return 0;
6162}
6163
6164static int virtnet_set_features(struct net_device *dev,
6165 netdev_features_t features)
6166{
6167 struct virtnet_info *vi = netdev_priv(dev);
6168 u64 offloads;
6169 int err;
6170
6171 if ((dev->features ^ features) & NETIF_F_GRO_HW) {
6172 if (vi->xdp_enabled)
6173 return -EBUSY;
6174
6175 if (features & NETIF_F_GRO_HW)
6176 offloads = vi->guest_offloads_capable;
6177 else
6178 offloads = vi->guest_offloads_capable &
6179 ~GUEST_OFFLOAD_GRO_HW_MASK;
6180
6181 err = virtnet_set_guest_offloads(vi, offloads);
6182 if (err)
6183 return err;
6184 vi->guest_offloads = offloads;
6185 }
6186
6187 if ((dev->features ^ features) & NETIF_F_RXHASH) {
6188 if (features & NETIF_F_RXHASH)
6189 vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved);
6190 else
6191 vi->rss_hdr->hash_types = cpu_to_le32(VIRTIO_NET_HASH_REPORT_NONE);
6192
6193 if (!virtnet_commit_rss_command(vi))
6194 return -EINVAL;
6195 }
6196
6197 return 0;
6198}
6199
6200static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue)
6201{
6202 struct virtnet_info *priv = netdev_priv(dev);
6203 struct send_queue *sq = &priv->sq[txqueue];
6204 struct netdev_queue *txq = netdev_get_tx_queue(dev, index: txqueue);
6205
6206 u64_stats_update_begin(syncp: &sq->stats.syncp);
6207 u64_stats_inc(p: &sq->stats.tx_timeouts);
6208 u64_stats_update_end(syncp: &sq->stats.syncp);
6209
6210 netdev_err(dev, format: "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n",
6211 txqueue, sq->name, sq->vq->index, sq->vq->name,
6212 jiffies_to_usecs(j: jiffies - READ_ONCE(txq->trans_start)));
6213}
6214
6215static int virtnet_init_irq_moder(struct virtnet_info *vi)
6216{
6217 u8 profile_flags = 0, coal_flags = 0;
6218 int ret, i;
6219
6220 profile_flags |= DIM_PROFILE_RX;
6221 coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS;
6222 ret = net_dim_init_irq_moder(dev: vi->dev, profile_flags, coal_flags,
6223 rx_mode: DIM_CQ_PERIOD_MODE_START_FROM_EQE,
6224 tx_mode: 0, rx_dim_work: virtnet_rx_dim_work, NULL);
6225
6226 if (ret)
6227 return ret;
6228
6229 for (i = 0; i < vi->max_queue_pairs; i++)
6230 net_dim_setting(dev: vi->dev, dim: &vi->rq[i].dim, is_tx: false);
6231
6232 return 0;
6233}
6234
6235static void virtnet_free_irq_moder(struct virtnet_info *vi)
6236{
6237 if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
6238 return;
6239
6240 rtnl_lock();
6241 net_dim_free_irq_moder(dev: vi->dev);
6242 rtnl_unlock();
6243}
6244
6245static const struct net_device_ops virtnet_netdev = {
6246 .ndo_open = virtnet_open,
6247 .ndo_stop = virtnet_close,
6248 .ndo_start_xmit = start_xmit,
6249 .ndo_validate_addr = eth_validate_addr,
6250 .ndo_set_mac_address = virtnet_set_mac_address,
6251 .ndo_set_rx_mode = virtnet_set_rx_mode,
6252 .ndo_get_stats64 = virtnet_stats,
6253 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
6254 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
6255 .ndo_bpf = virtnet_xdp,
6256 .ndo_xdp_xmit = virtnet_xdp_xmit,
6257 .ndo_xsk_wakeup = virtnet_xsk_wakeup,
6258 .ndo_features_check = passthru_features_check,
6259 .ndo_get_phys_port_name = virtnet_get_phys_port_name,
6260 .ndo_set_features = virtnet_set_features,
6261 .ndo_tx_timeout = virtnet_tx_timeout,
6262};
6263
6264static void virtnet_config_changed_work(struct work_struct *work)
6265{
6266 struct virtnet_info *vi =
6267 container_of(work, struct virtnet_info, config_work);
6268 u16 v;
6269
6270 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
6271 struct virtio_net_config, status, &v) < 0)
6272 return;
6273
6274 if (v & VIRTIO_NET_S_ANNOUNCE) {
6275 netdev_notify_peers(dev: vi->dev);
6276 virtnet_ack_link_announce(vi);
6277 }
6278
6279 /* Ignore unknown (future) status bits */
6280 v &= VIRTIO_NET_S_LINK_UP;
6281
6282 if (vi->status == v)
6283 return;
6284
6285 vi->status = v;
6286
6287 if (vi->status & VIRTIO_NET_S_LINK_UP) {
6288 virtnet_update_settings(vi);
6289 netif_carrier_on(dev: vi->dev);
6290 netif_tx_wake_all_queues(dev: vi->dev);
6291 } else {
6292 netif_carrier_off(dev: vi->dev);
6293 netif_tx_stop_all_queues(dev: vi->dev);
6294 }
6295}
6296
6297static void virtnet_config_changed(struct virtio_device *vdev)
6298{
6299 struct virtnet_info *vi = vdev->priv;
6300
6301 schedule_work(work: &vi->config_work);
6302}
6303
6304static void virtnet_free_queues(struct virtnet_info *vi)
6305{
6306 int i;
6307
6308 for (i = 0; i < vi->max_queue_pairs; i++) {
6309 __netif_napi_del(napi: &vi->rq[i].napi);
6310 __netif_napi_del(napi: &vi->sq[i].napi);
6311 }
6312
6313 /* We called __netif_napi_del(),
6314 * we need to respect an RCU grace period before freeing vi->rq
6315 */
6316 synchronize_net();
6317
6318 kfree(objp: vi->rq);
6319 kfree(objp: vi->sq);
6320 kfree(objp: vi->ctrl);
6321}
6322
6323static void _free_receive_bufs(struct virtnet_info *vi)
6324{
6325 struct bpf_prog *old_prog;
6326 int i;
6327
6328 for (i = 0; i < vi->max_queue_pairs; i++) {
6329 while (vi->rq[i].pages)
6330 __free_pages(page: get_a_page(rq: &vi->rq[i], GFP_KERNEL), order: 0);
6331
6332 old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
6333 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
6334 if (old_prog)
6335 bpf_prog_put(prog: old_prog);
6336 }
6337}
6338
6339static void free_receive_bufs(struct virtnet_info *vi)
6340{
6341 rtnl_lock();
6342 _free_receive_bufs(vi);
6343 rtnl_unlock();
6344}
6345
6346static void free_receive_page_frags(struct virtnet_info *vi)
6347{
6348 int i;
6349 for (i = 0; i < vi->max_queue_pairs; i++)
6350 if (vi->rq[i].alloc_frag.page) {
6351 if (vi->rq[i].last_dma)
6352 virtnet_rq_unmap(rq: &vi->rq[i], buf: vi->rq[i].last_dma, len: 0);
6353 put_page(page: vi->rq[i].alloc_frag.page);
6354 }
6355}
6356
6357static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
6358{
6359 struct virtnet_info *vi = vq->vdev->priv;
6360 struct send_queue *sq;
6361 int i = vq2txq(vq);
6362
6363 sq = &vi->sq[i];
6364
6365 switch (virtnet_xmit_ptr_unpack(ptr: &buf)) {
6366 case VIRTNET_XMIT_TYPE_SKB:
6367 case VIRTNET_XMIT_TYPE_SKB_ORPHAN:
6368 dev_kfree_skb(buf);
6369 break;
6370
6371 case VIRTNET_XMIT_TYPE_XDP:
6372 xdp_return_frame(xdpf: buf);
6373 break;
6374
6375 case VIRTNET_XMIT_TYPE_XSK:
6376 xsk_tx_completed(pool: sq->xsk_pool, nb_entries: 1);
6377 break;
6378 }
6379}
6380
6381static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq)
6382{
6383 struct virtnet_info *vi = vq->vdev->priv;
6384 int i = vq2txq(vq);
6385
6386 netdev_tx_reset_queue(q: netdev_get_tx_queue(dev: vi->dev, index: i));
6387}
6388
6389static void free_unused_bufs(struct virtnet_info *vi)
6390{
6391 void *buf;
6392 int i;
6393
6394 for (i = 0; i < vi->max_queue_pairs; i++) {
6395 struct virtqueue *vq = vi->sq[i].vq;
6396 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
6397 virtnet_sq_free_unused_buf(vq, buf);
6398 cond_resched();
6399 }
6400
6401 for (i = 0; i < vi->max_queue_pairs; i++) {
6402 struct virtqueue *vq = vi->rq[i].vq;
6403
6404 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
6405 virtnet_rq_unmap_free_buf(vq, buf);
6406 cond_resched();
6407 }
6408}
6409
6410static void virtnet_del_vqs(struct virtnet_info *vi)
6411{
6412 struct virtio_device *vdev = vi->vdev;
6413
6414 virtnet_clean_affinity(vi);
6415
6416 vdev->config->del_vqs(vdev);
6417
6418 virtnet_free_queues(vi);
6419}
6420
6421/* How large should a single buffer be so a queue full of these can fit at
6422 * least one full packet?
6423 * Logic below assumes the mergeable buffer header is used.
6424 */
6425static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
6426{
6427 const unsigned int hdr_len = vi->hdr_len;
6428 unsigned int rq_size = virtqueue_get_vring_size(vq);
6429 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
6430 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
6431 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
6432
6433 return max(max(min_buf_len, hdr_len) - hdr_len,
6434 (unsigned int)GOOD_PACKET_LEN);
6435}
6436
6437static int virtnet_find_vqs(struct virtnet_info *vi)
6438{
6439 struct virtqueue_info *vqs_info;
6440 struct virtqueue **vqs;
6441 int ret = -ENOMEM;
6442 int total_vqs;
6443 bool *ctx;
6444 u16 i;
6445
6446 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
6447 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
6448 * possible control vq.
6449 */
6450 total_vqs = vi->max_queue_pairs * 2 +
6451 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_VQ);
6452
6453 /* Allocate space for find_vqs parameters */
6454 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL);
6455 if (!vqs)
6456 goto err_vq;
6457 vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL);
6458 if (!vqs_info)
6459 goto err_vqs_info;
6460 if (!vi->big_packets || vi->mergeable_rx_bufs) {
6461 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL);
6462 if (!ctx)
6463 goto err_ctx;
6464 } else {
6465 ctx = NULL;
6466 }
6467
6468 /* Parameters for control virtqueue, if any */
6469 if (vi->has_cvq) {
6470 vqs_info[total_vqs - 1].name = "control";
6471 }
6472
6473 /* Allocate/initialize parameters for send/receive virtqueues */
6474 for (i = 0; i < vi->max_queue_pairs; i++) {
6475 vqs_info[rxq2vq(rxq: i)].callback = skb_recv_done;
6476 vqs_info[txq2vq(txq: i)].callback = skb_xmit_done;
6477 sprintf(buf: vi->rq[i].name, fmt: "input.%u", i);
6478 sprintf(buf: vi->sq[i].name, fmt: "output.%u", i);
6479 vqs_info[rxq2vq(rxq: i)].name = vi->rq[i].name;
6480 vqs_info[txq2vq(txq: i)].name = vi->sq[i].name;
6481 if (ctx)
6482 vqs_info[rxq2vq(rxq: i)].ctx = true;
6483 }
6484
6485 ret = virtio_find_vqs(vdev: vi->vdev, nvqs: total_vqs, vqs, vqs_info, NULL);
6486 if (ret)
6487 goto err_find;
6488
6489 if (vi->has_cvq) {
6490 vi->cvq = vqs[total_vqs - 1];
6491 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
6492 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
6493 }
6494
6495 for (i = 0; i < vi->max_queue_pairs; i++) {
6496 vi->rq[i].vq = vqs[rxq2vq(rxq: i)];
6497 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vq: vi->rq[i].vq);
6498 vi->sq[i].vq = vqs[txq2vq(txq: i)];
6499 }
6500
6501 /* run here: ret == 0. */
6502
6503
6504err_find:
6505 kfree(objp: ctx);
6506err_ctx:
6507 kfree(objp: vqs_info);
6508err_vqs_info:
6509 kfree(objp: vqs);
6510err_vq:
6511 return ret;
6512}
6513
6514static int virtnet_alloc_queues(struct virtnet_info *vi)
6515{
6516 int i;
6517
6518 if (vi->has_cvq) {
6519 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
6520 if (!vi->ctrl)
6521 goto err_ctrl;
6522 } else {
6523 vi->ctrl = NULL;
6524 }
6525 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL);
6526 if (!vi->sq)
6527 goto err_sq;
6528 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL);
6529 if (!vi->rq)
6530 goto err_rq;
6531
6532 INIT_DELAYED_WORK(&vi->refill, refill_work);
6533 for (i = 0; i < vi->max_queue_pairs; i++) {
6534 vi->rq[i].pages = NULL;
6535 netif_napi_add_config(dev: vi->dev, napi: &vi->rq[i].napi, poll: virtnet_poll,
6536 index: i);
6537 vi->rq[i].napi.weight = napi_weight;
6538 netif_napi_add_tx_weight(dev: vi->dev, napi: &vi->sq[i].napi,
6539 poll: virtnet_poll_tx,
6540 weight: napi_tx ? napi_weight : 0);
6541
6542 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
6543 ewma_pkt_len_init(e: &vi->rq[i].mrg_avg_pkt_len);
6544 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
6545
6546 u64_stats_init(syncp: &vi->rq[i].stats.syncp);
6547 u64_stats_init(syncp: &vi->sq[i].stats.syncp);
6548 mutex_init(&vi->rq[i].dim_lock);
6549 }
6550
6551 return 0;
6552
6553err_rq:
6554 kfree(objp: vi->sq);
6555err_sq:
6556 kfree(objp: vi->ctrl);
6557err_ctrl:
6558 return -ENOMEM;
6559}
6560
6561static int init_vqs(struct virtnet_info *vi)
6562{
6563 int ret;
6564
6565 /* Allocate send & receive queues */
6566 ret = virtnet_alloc_queues(vi);
6567 if (ret)
6568 goto err;
6569
6570 ret = virtnet_find_vqs(vi);
6571 if (ret)
6572 goto err_free;
6573
6574 cpus_read_lock();
6575 virtnet_set_affinity(vi);
6576 cpus_read_unlock();
6577
6578 return 0;
6579
6580err_free:
6581 virtnet_free_queues(vi);
6582err:
6583 return ret;
6584}
6585
6586#ifdef CONFIG_SYSFS
6587static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
6588 char *buf)
6589{
6590 struct virtnet_info *vi = netdev_priv(dev: queue->dev);
6591 unsigned int queue_index = get_netdev_rx_queue_index(queue);
6592 unsigned int headroom = virtnet_get_headroom(vi);
6593 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
6594 struct ewma_pkt_len *avg;
6595
6596 BUG_ON(queue_index >= vi->max_queue_pairs);
6597 avg = &vi->rq[queue_index].mrg_avg_pkt_len;
6598 return sprintf(buf, fmt: "%u\n",
6599 get_mergeable_buf_len(rq: &vi->rq[queue_index], avg_pkt_len: avg,
6600 SKB_DATA_ALIGN(headroom + tailroom)));
6601}
6602
6603static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
6604 __ATTR_RO(mergeable_rx_buffer_size);
6605
6606static struct attribute *virtio_net_mrg_rx_attrs[] = {
6607 &mergeable_rx_buffer_size_attribute.attr,
6608 NULL
6609};
6610
6611static const struct attribute_group virtio_net_mrg_rx_group = {
6612 .name = "virtio_net",
6613 .attrs = virtio_net_mrg_rx_attrs
6614};
6615#endif
6616
6617static bool virtnet_fail_on_feature(struct virtio_device *vdev,
6618 unsigned int fbit,
6619 const char *fname, const char *dname)
6620{
6621 if (!virtio_has_feature(vdev, fbit))
6622 return false;
6623
6624 dev_err(&vdev->dev, "device advertises feature %s but not %s",
6625 fname, dname);
6626
6627 return true;
6628}
6629
6630#define VIRTNET_FAIL_ON(vdev, fbit, dbit) \
6631 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)
6632
6633static bool virtnet_validate_features(struct virtio_device *vdev)
6634{
6635 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) &&
6636 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX,
6637 "VIRTIO_NET_F_CTRL_VQ") ||
6638 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN,
6639 "VIRTIO_NET_F_CTRL_VQ") ||
6640 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE,
6641 "VIRTIO_NET_F_CTRL_VQ") ||
6642 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
6643 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
6644 "VIRTIO_NET_F_CTRL_VQ") ||
6645 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
6646 "VIRTIO_NET_F_CTRL_VQ") ||
6647 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
6648 "VIRTIO_NET_F_CTRL_VQ") ||
6649 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
6650 "VIRTIO_NET_F_CTRL_VQ") ||
6651 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL,
6652 "VIRTIO_NET_F_CTRL_VQ"))) {
6653 return false;
6654 }
6655
6656 return true;
6657}
6658
6659#define MIN_MTU ETH_MIN_MTU
6660#define MAX_MTU ETH_MAX_MTU
6661
6662static int virtnet_validate(struct virtio_device *vdev)
6663{
6664 if (!vdev->config->get) {
6665 dev_err(&vdev->dev, "%s failure: config access disabled\n",
6666 __func__);
6667 return -EINVAL;
6668 }
6669
6670 if (!virtnet_validate_features(vdev))
6671 return -EINVAL;
6672
6673 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
6674 int mtu = virtio_cread16(vdev,
6675 offsetof(struct virtio_net_config,
6676 mtu));
6677 if (mtu < MIN_MTU)
6678 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
6679 }
6680
6681 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) &&
6682 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
6683 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby");
6684 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY);
6685 }
6686
6687 return 0;
6688}
6689
6690static bool virtnet_check_guest_gso(const struct virtnet_info *vi)
6691{
6692 return virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
6693 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
6694 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
6695 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
6696 (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO4) &&
6697 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO6));
6698}
6699
6700static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu)
6701{
6702 bool guest_gso = virtnet_check_guest_gso(vi);
6703
6704 /* If device can receive ANY guest GSO packets, regardless of mtu,
6705 * allocate packets of maximum size, otherwise limit it to only
6706 * mtu size worth only.
6707 */
6708 if (mtu > ETH_DATA_LEN || guest_gso) {
6709 vi->big_packets = true;
6710 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE);
6711 }
6712}
6713
6714#define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10
6715static enum xdp_rss_hash_type
6716virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = {
6717 [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE,
6718 [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4,
6719 [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP,
6720 [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP,
6721 [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6,
6722 [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP,
6723 [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP,
6724 [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX,
6725 [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX,
6726 [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX
6727};
6728
6729static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
6730 enum xdp_rss_hash_type *rss_type)
6731{
6732 const struct xdp_buff *xdp = (void *)_ctx;
6733 struct virtio_net_hdr_v1_hash *hdr_hash;
6734 struct virtnet_info *vi;
6735 u16 hash_report;
6736
6737 if (!(xdp->rxq->dev->features & NETIF_F_RXHASH))
6738 return -ENODATA;
6739
6740 vi = netdev_priv(dev: xdp->rxq->dev);
6741 hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len);
6742 hash_report = __le16_to_cpu(hdr_hash->hash_report);
6743
6744 if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE)
6745 hash_report = VIRTIO_NET_HASH_REPORT_NONE;
6746
6747 *rss_type = virtnet_xdp_rss_type[hash_report];
6748 *hash = __le32_to_cpu(hdr_hash->hash_value);
6749 return 0;
6750}
6751
6752static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
6753 .xmo_rx_hash = virtnet_xdp_rx_hash,
6754};
6755
6756static int virtnet_probe(struct virtio_device *vdev)
6757{
6758 int i, err = -ENOMEM;
6759 struct net_device *dev;
6760 struct virtnet_info *vi;
6761 u16 max_queue_pairs;
6762 int mtu = 0;
6763
6764 /* Find if host supports multiqueue/rss virtio_net device */
6765 max_queue_pairs = 1;
6766 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
6767 max_queue_pairs =
6768 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs));
6769
6770 /* We need at least 2 queue's */
6771 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
6772 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
6773 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
6774 max_queue_pairs = 1;
6775
6776 /* Allocate ourselves a network device with room for our info */
6777 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
6778 if (!dev)
6779 return -ENOMEM;
6780
6781 /* Set up network device as normal. */
6782 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE |
6783 IFF_TX_SKB_NO_LINEAR;
6784 dev->netdev_ops = &virtnet_netdev;
6785 dev->stat_ops = &virtnet_stat_ops;
6786 dev->features = NETIF_F_HIGHDMA;
6787
6788 dev->ethtool_ops = &virtnet_ethtool_ops;
6789 SET_NETDEV_DEV(dev, &vdev->dev);
6790
6791 /* Do we support "hardware" checksums? */
6792 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
6793 /* This opens up the world of extra features. */
6794 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
6795 if (csum)
6796 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
6797
6798 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
6799 dev->hw_features |= NETIF_F_TSO
6800 | NETIF_F_TSO_ECN | NETIF_F_TSO6;
6801 }
6802 /* Individual feature bits: what can host handle? */
6803 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
6804 dev->hw_features |= NETIF_F_TSO;
6805 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
6806 dev->hw_features |= NETIF_F_TSO6;
6807 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
6808 dev->hw_features |= NETIF_F_TSO_ECN;
6809 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO))
6810 dev->hw_features |= NETIF_F_GSO_UDP_L4;
6811
6812 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) {
6813 dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
6814 dev->hw_enc_features = dev->hw_features;
6815 }
6816 if (dev->hw_features & NETIF_F_GSO_UDP_TUNNEL &&
6817 virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM)) {
6818 dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
6819 dev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
6820 }
6821
6822 dev->features |= NETIF_F_GSO_ROBUST;
6823
6824 if (gso)
6825 dev->features |= dev->hw_features;
6826 /* (!csum && gso) case will be fixed by register_netdev() */
6827 }
6828
6829 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't
6830 * need to calculate checksums for partially checksummed packets,
6831 * as they're considered valid by the upper layer.
6832 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only
6833 * receives fully checksummed packets. The device may assist in
6834 * validating these packets' checksums, so the driver won't have to.
6835 */
6836 dev->features |= NETIF_F_RXCSUM;
6837
6838 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
6839 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
6840 dev->features |= NETIF_F_GRO_HW;
6841 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
6842 dev->hw_features |= NETIF_F_GRO_HW;
6843
6844 dev->vlan_features = dev->features;
6845 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
6846 NETDEV_XDP_ACT_XSK_ZEROCOPY;
6847
6848 /* MTU range: 68 - 65535 */
6849 dev->min_mtu = MIN_MTU;
6850 dev->max_mtu = MAX_MTU;
6851
6852 /* Configuration may specify what MAC to use. Otherwise random. */
6853 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
6854 u8 addr[ETH_ALEN];
6855
6856 virtio_cread_bytes(vdev,
6857 offsetof(struct virtio_net_config, mac),
6858 buf: addr, ETH_ALEN);
6859 eth_hw_addr_set(dev, addr);
6860 } else {
6861 eth_hw_addr_random(dev);
6862 dev_info(&vdev->dev, "Assigned random MAC address %pM\n",
6863 dev->dev_addr);
6864 }
6865
6866 /* Set up our device-specific information */
6867 vi = netdev_priv(dev);
6868 vi->dev = dev;
6869 vi->vdev = vdev;
6870 vdev->priv = vi;
6871
6872 INIT_WORK(&vi->config_work, virtnet_config_changed_work);
6873 INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work);
6874 spin_lock_init(&vi->refill_lock);
6875
6876 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) {
6877 vi->mergeable_rx_bufs = true;
6878 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG;
6879 }
6880
6881 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
6882 vi->has_rss_hash_report = true;
6883
6884 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
6885 vi->has_rss = true;
6886
6887 vi->rss_indir_table_size =
6888 virtio_cread16(vdev, offsetof(struct virtio_net_config,
6889 rss_max_indirection_table_length));
6890 }
6891 vi->rss_hdr = devm_kzalloc(dev: &vdev->dev, size: virtnet_rss_hdr_size(vi), GFP_KERNEL);
6892 if (!vi->rss_hdr) {
6893 err = -ENOMEM;
6894 goto free;
6895 }
6896
6897 if (vi->has_rss || vi->has_rss_hash_report) {
6898 vi->rss_key_size =
6899 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
6900 if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
6901 dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n",
6902 vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE);
6903 err = -EINVAL;
6904 goto free;
6905 }
6906
6907 vi->rss_hash_types_supported =
6908 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types));
6909 vi->rss_hash_types_supported &=
6910 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX |
6911 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
6912 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX);
6913
6914 dev->hw_features |= NETIF_F_RXHASH;
6915 dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops;
6916 }
6917
6918 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) ||
6919 virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO))
6920 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash_tunnel);
6921 else if (vi->has_rss_hash_report)
6922 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);
6923 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
6924 virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
6925 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
6926 else
6927 vi->hdr_len = sizeof(struct virtio_net_hdr);
6928
6929 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM))
6930 vi->rx_tnl_csum = true;
6931 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO))
6932 vi->rx_tnl = true;
6933 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO))
6934 vi->tx_tnl = true;
6935
6936 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
6937 virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
6938 vi->any_header_sg = true;
6939
6940 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
6941 vi->has_cvq = true;
6942
6943 mutex_init(&vi->cvq_lock);
6944
6945 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
6946 mtu = virtio_cread16(vdev,
6947 offsetof(struct virtio_net_config,
6948 mtu));
6949 if (mtu < dev->min_mtu) {
6950 /* Should never trigger: MTU was previously validated
6951 * in virtnet_validate.
6952 */
6953 dev_err(&vdev->dev,
6954 "device MTU appears to have changed it is now %d < %d",
6955 mtu, dev->min_mtu);
6956 err = -EINVAL;
6957 goto free;
6958 }
6959
6960 dev->mtu = mtu;
6961 dev->max_mtu = mtu;
6962 }
6963
6964 virtnet_set_big_packets(vi, mtu);
6965
6966 if (vi->any_header_sg)
6967 dev->needed_headroom = vi->hdr_len;
6968
6969 /* Enable multiqueue by default */
6970 if (num_online_cpus() >= max_queue_pairs)
6971 vi->curr_queue_pairs = max_queue_pairs;
6972 else
6973 vi->curr_queue_pairs = num_online_cpus();
6974 vi->max_queue_pairs = max_queue_pairs;
6975
6976 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
6977 err = init_vqs(vi);
6978 if (err)
6979 goto free;
6980
6981 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
6982 vi->intr_coal_rx.max_usecs = 0;
6983 vi->intr_coal_tx.max_usecs = 0;
6984 vi->intr_coal_rx.max_packets = 0;
6985
6986 /* Keep the default values of the coalescing parameters
6987 * aligned with the default napi_tx state.
6988 */
6989 if (vi->sq[0].napi.weight)
6990 vi->intr_coal_tx.max_packets = 1;
6991 else
6992 vi->intr_coal_tx.max_packets = 0;
6993 }
6994
6995 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) {
6996 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */
6997 for (i = 0; i < vi->max_queue_pairs; i++)
6998 if (vi->sq[i].napi.weight)
6999 vi->sq[i].intr_coal.max_packets = 1;
7000
7001 err = virtnet_init_irq_moder(vi);
7002 if (err)
7003 goto free;
7004 }
7005
7006#ifdef CONFIG_SYSFS
7007 if (vi->mergeable_rx_bufs)
7008 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
7009#endif
7010 netif_set_real_num_tx_queues(dev, txq: vi->curr_queue_pairs);
7011 netif_set_real_num_rx_queues(dev, rxq: vi->curr_queue_pairs);
7012
7013 virtnet_init_settings(dev);
7014
7015 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
7016 vi->failover = net_failover_create(standby_dev: vi->dev);
7017 if (IS_ERR(ptr: vi->failover)) {
7018 err = PTR_ERR(ptr: vi->failover);
7019 goto free_vqs;
7020 }
7021 }
7022
7023 if (vi->has_rss || vi->has_rss_hash_report)
7024 virtnet_init_default_rss(vi);
7025
7026 enable_rx_mode_work(vi);
7027
7028 /* serialize netdev register + virtio_device_ready() with ndo_open() */
7029 rtnl_lock();
7030
7031 err = register_netdevice(dev);
7032 if (err) {
7033 pr_debug("virtio_net: registering device failed\n");
7034 rtnl_unlock();
7035 goto free_failover;
7036 }
7037
7038 /* Disable config change notification until ndo_open. */
7039 virtio_config_driver_disable(dev: vi->vdev);
7040
7041 virtio_device_ready(dev: vdev);
7042
7043 if (vi->has_rss || vi->has_rss_hash_report) {
7044 if (!virtnet_commit_rss_command(vi)) {
7045 dev_warn(&vdev->dev, "RSS disabled because committing failed.\n");
7046 dev->hw_features &= ~NETIF_F_RXHASH;
7047 vi->has_rss_hash_report = false;
7048 vi->has_rss = false;
7049 }
7050 }
7051
7052 virtnet_set_queues(vi, queue_pairs: vi->curr_queue_pairs);
7053
7054 /* a random MAC address has been assigned, notify the device.
7055 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there
7056 * because many devices work fine without getting MAC explicitly
7057 */
7058 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
7059 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
7060 struct scatterlist sg;
7061
7062 sg_init_one(&sg, dev->dev_addr, dev->addr_len);
7063 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
7064 VIRTIO_NET_CTRL_MAC_ADDR_SET, out: &sg)) {
7065 pr_debug("virtio_net: setting MAC address failed\n");
7066 rtnl_unlock();
7067 err = -EINVAL;
7068 goto free_unregister_netdev;
7069 }
7070 }
7071
7072 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) {
7073 struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL;
7074 struct scatterlist sg;
7075 __le64 v;
7076
7077 stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL);
7078 if (!stats_cap) {
7079 rtnl_unlock();
7080 err = -ENOMEM;
7081 goto free_unregister_netdev;
7082 }
7083
7084 sg_init_one(&sg, stats_cap, sizeof(*stats_cap));
7085
7086 if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS,
7087 VIRTIO_NET_CTRL_STATS_QUERY,
7088 NULL, in: &sg)) {
7089 pr_debug("virtio_net: fail to get stats capability\n");
7090 rtnl_unlock();
7091 err = -EINVAL;
7092 goto free_unregister_netdev;
7093 }
7094
7095 v = stats_cap->supported_stats_types[0];
7096 vi->device_stats_cap = le64_to_cpu(v);
7097 }
7098
7099 /* Assume link up if device can't report link status,
7100 otherwise get link status from config. */
7101 netif_carrier_off(dev);
7102 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STATUS)) {
7103 virtio_config_changed(dev: vi->vdev);
7104 } else {
7105 vi->status = VIRTIO_NET_S_LINK_UP;
7106 virtnet_update_settings(vi);
7107 netif_carrier_on(dev);
7108 }
7109
7110 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) {
7111 unsigned int fbit;
7112
7113 fbit = virtio_offload_to_feature(obit: guest_offloads[i]);
7114 if (virtio_has_feature(vdev: vi->vdev, fbit))
7115 set_bit(nr: guest_offloads[i], addr: &vi->guest_offloads);
7116 }
7117 vi->guest_offloads_capable = vi->guest_offloads;
7118
7119 rtnl_unlock();
7120
7121 err = virtnet_cpu_notif_add(vi);
7122 if (err) {
7123 pr_debug("virtio_net: registering cpu notifier failed\n");
7124 goto free_unregister_netdev;
7125 }
7126
7127 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
7128 dev->name, max_queue_pairs);
7129
7130 return 0;
7131
7132free_unregister_netdev:
7133 unregister_netdev(dev);
7134free_failover:
7135 net_failover_destroy(failover: vi->failover);
7136free_vqs:
7137 virtio_reset_device(dev: vdev);
7138 cancel_delayed_work_sync(dwork: &vi->refill);
7139 free_receive_page_frags(vi);
7140 virtnet_del_vqs(vi);
7141free:
7142 free_netdev(dev);
7143 return err;
7144}
7145
7146static void remove_vq_common(struct virtnet_info *vi)
7147{
7148 int i;
7149
7150 virtio_reset_device(dev: vi->vdev);
7151
7152 /* Free unused buffers in both send and recv, if any. */
7153 free_unused_bufs(vi);
7154
7155 /*
7156 * Rule of thumb is netdev_tx_reset_queue() should follow any
7157 * skb freeing not followed by netdev_tx_completed_queue()
7158 */
7159 for (i = 0; i < vi->max_queue_pairs; i++)
7160 netdev_tx_reset_queue(q: netdev_get_tx_queue(dev: vi->dev, index: i));
7161
7162 free_receive_bufs(vi);
7163
7164 free_receive_page_frags(vi);
7165
7166 virtnet_del_vqs(vi);
7167}
7168
7169static void virtnet_remove(struct virtio_device *vdev)
7170{
7171 struct virtnet_info *vi = vdev->priv;
7172
7173 virtnet_cpu_notif_remove(vi);
7174
7175 /* Make sure no work handler is accessing the device. */
7176 flush_work(work: &vi->config_work);
7177 disable_rx_mode_work(vi);
7178 flush_work(work: &vi->rx_mode_work);
7179
7180 virtnet_free_irq_moder(vi);
7181
7182 unregister_netdev(dev: vi->dev);
7183
7184 net_failover_destroy(failover: vi->failover);
7185
7186 remove_vq_common(vi);
7187
7188 free_netdev(dev: vi->dev);
7189}
7190
7191static __maybe_unused int virtnet_freeze(struct virtio_device *vdev)
7192{
7193 struct virtnet_info *vi = vdev->priv;
7194
7195 virtnet_cpu_notif_remove(vi);
7196 virtnet_freeze_down(vdev);
7197 remove_vq_common(vi);
7198
7199 return 0;
7200}
7201
7202static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
7203{
7204 struct virtnet_info *vi = vdev->priv;
7205 int err;
7206
7207 err = virtnet_restore_up(vdev);
7208 if (err)
7209 return err;
7210 virtnet_set_queues(vi, queue_pairs: vi->curr_queue_pairs);
7211
7212 err = virtnet_cpu_notif_add(vi);
7213 if (err) {
7214 virtnet_freeze_down(vdev);
7215 remove_vq_common(vi);
7216 return err;
7217 }
7218
7219 return 0;
7220}
7221
7222static struct virtio_device_id id_table[] = {
7223 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
7224 { 0 },
7225};
7226
7227#define VIRTNET_FEATURES \
7228 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
7229 VIRTIO_NET_F_MAC, \
7230 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
7231 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
7232 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
7233 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \
7234 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
7235 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
7236 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
7237 VIRTIO_NET_F_CTRL_MAC_ADDR, \
7238 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
7239 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
7240 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \
7241 VIRTIO_NET_F_VQ_NOTF_COAL, \
7242 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS
7243
7244static unsigned int features[] = {
7245 VIRTNET_FEATURES,
7246 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO,
7247 VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM,
7248 VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO,
7249 VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM,
7250};
7251
7252static unsigned int features_legacy[] = {
7253 VIRTNET_FEATURES,
7254 VIRTIO_NET_F_GSO,
7255 VIRTIO_F_ANY_LAYOUT,
7256};
7257
7258static struct virtio_driver virtio_net_driver = {
7259 .feature_table = features,
7260 .feature_table_size = ARRAY_SIZE(features),
7261 .feature_table_legacy = features_legacy,
7262 .feature_table_size_legacy = ARRAY_SIZE(features_legacy),
7263 .driver.name = KBUILD_MODNAME,
7264 .id_table = id_table,
7265 .validate = virtnet_validate,
7266 .probe = virtnet_probe,
7267 .remove = virtnet_remove,
7268 .config_changed = virtnet_config_changed,
7269#ifdef CONFIG_PM_SLEEP
7270 .freeze = virtnet_freeze,
7271 .restore = virtnet_restore,
7272#endif
7273};
7274
7275static __init int virtio_net_driver_init(void)
7276{
7277 int ret;
7278
7279 ret = cpuhp_setup_state_multi(state: CPUHP_AP_ONLINE_DYN, name: "virtio/net:online",
7280 startup: virtnet_cpu_online,
7281 teardown: virtnet_cpu_down_prep);
7282 if (ret < 0)
7283 goto out;
7284 virtionet_online = ret;
7285 ret = cpuhp_setup_state_multi(state: CPUHP_VIRT_NET_DEAD, name: "virtio/net:dead",
7286 NULL, teardown: virtnet_cpu_dead);
7287 if (ret)
7288 goto err_dead;
7289 ret = register_virtio_driver(&virtio_net_driver);
7290 if (ret)
7291 goto err_virtio;
7292 return 0;
7293err_virtio:
7294 cpuhp_remove_multi_state(state: CPUHP_VIRT_NET_DEAD);
7295err_dead:
7296 cpuhp_remove_multi_state(state: virtionet_online);
7297out:
7298 return ret;
7299}
7300module_init(virtio_net_driver_init);
7301
7302static __exit void virtio_net_driver_exit(void)
7303{
7304 unregister_virtio_driver(drv: &virtio_net_driver);
7305 cpuhp_remove_multi_state(state: CPUHP_VIRT_NET_DEAD);
7306 cpuhp_remove_multi_state(state: virtionet_online);
7307}
7308module_exit(virtio_net_driver_exit);
7309
7310MODULE_DEVICE_TABLE(virtio, id_table);
7311MODULE_DESCRIPTION("Virtio network driver");
7312MODULE_LICENSE("GPL");
7313