virtio_net.c source code [Linux/drivers/net/virtio_net.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/ A network driver using virtio.*
3	*
4	* Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
5	*/
6	//#define DEBUG
7	#include <linux/netdevice.h>
8	#include <linux/etherdevice.h>
9	#include <linux/ethtool.h>
10	#include <linux/module.h>
11	#include <linux/virtio.h>
12	#include <linux/virtio_net.h>
13	#include <linux/bpf.h>
14	#include <linux/bpf_trace.h>
15	#include <linux/scatterlist.h>
16	#include <linux/if_vlan.h>
17	#include <linux/slab.h>
18	#include <linux/cpu.h>
19	#include <linux/average.h>
20	#include <linux/filter.h>
21	#include <linux/kernel.h>
22	#include <linux/dim.h>
23	#include <net/route.h>
24	#include <net/xdp.h>
25	#include <net/net_failover.h>
26	#include <net/netdev_rx_queue.h>
27	#include <net/netdev_queues.h>
28	#include <net/xdp_sock_drv.h>
29
30	static int napi_weight = NAPI_POLL_WEIGHT;
31	module_param(napi_weight, int, `0444`);
32
33	static bool csum = true, gso = true, napi_tx = true;
34	module_param(csum, bool, `0444`);
35	module_param(gso, bool, `0444`);
36	module_param(napi_tx, bool, `0644`);
37
38	#define VIRTIO_OFFLOAD_MAP_MIN 46
39	#define VIRTIO_OFFLOAD_MAP_MAX 47
40	#define VIRTIO_FEATURES_MAP_MIN 65
41	#define VIRTIO_O2F_DELTA (VIRTIO_FEATURES_MAP_MIN - \
42	VIRTIO_OFFLOAD_MAP_MIN)
43
44	static bool virtio_is_mapped_offload(unsigned int obit)
45	{
46	return obit >= VIRTIO_OFFLOAD_MAP_MIN &&
47	obit <= VIRTIO_OFFLOAD_MAP_MAX;
48	}
49
50	static unsigned int virtio_offload_to_feature(unsigned int obit)
51	{
52	return virtio_is_mapped_offload(obit) ? obit + VIRTIO_O2F_DELTA : obit;
53	}
54
55	/ FIXME: MTU in config. /
56	#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
57	#define GOOD_COPY_LEN 128
58
59	#define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
60
61	/ Separating two types of XDP xmit /
62	#define VIRTIO_XDP_TX BIT(0)
63	#define VIRTIO_XDP_REDIR BIT(1)
64
65	/ RX packet size EWMA. The average packet size is used to determine the packet*
66	* buffer size when refilling RX rings. As the entire RX ring may be refilled
67	* at once, the weight is chosen so that the EWMA will be insensitive to short-
68	* term, transient changes in packet size.
69	*/
70	DECLARE_EWMA(pkt_len, `0`, `64`)
71
72	#define VIRTNET_DRIVER_VERSION "1.0.0"
73
74	static const unsigned long guest_offloads[] = {
75	VIRTIO_NET_F_GUEST_TSO4,
76	VIRTIO_NET_F_GUEST_TSO6,
77	VIRTIO_NET_F_GUEST_ECN,
78	VIRTIO_NET_F_GUEST_UFO,
79	VIRTIO_NET_F_GUEST_CSUM,
80	VIRTIO_NET_F_GUEST_USO4,
81	VIRTIO_NET_F_GUEST_USO6,
82	VIRTIO_NET_F_GUEST_HDRLEN,
83	VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED,
84	VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED,
85	};
86
87	#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) \| \
88	(1ULL << VIRTIO_NET_F_GUEST_TSO6) \| \
89	(1ULL << VIRTIO_NET_F_GUEST_ECN) \| \
90	(1ULL << VIRTIO_NET_F_GUEST_UFO) \| \
91	(1ULL << VIRTIO_NET_F_GUEST_USO4) \| \
92	(1ULL << VIRTIO_NET_F_GUEST_USO6) \| \
93	(1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED) \| \
94	(1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED))
95
96	struct virtnet_stat_desc {
97	char desc[ETH_GSTRING_LEN];
98	size_t offset;
99	size_t qstat_offset;
100	};
101
102	struct virtnet_sq_free_stats {
103	u64 packets;
104	u64 bytes;
105	u64 napi_packets;
106	u64 napi_bytes;
107	u64 xsk;
108	};
109
110	struct virtnet_sq_stats {
111	struct u64_stats_sync syncp;
112	u64_stats_t packets;
113	u64_stats_t bytes;
114	u64_stats_t xdp_tx;
115	u64_stats_t xdp_tx_drops;
116	u64_stats_t kicks;
117	u64_stats_t tx_timeouts;
118	u64_stats_t stop;
119	u64_stats_t wake;
120	};
121
122	struct virtnet_rq_stats {
123	struct u64_stats_sync syncp;
124	u64_stats_t packets;
125	u64_stats_t bytes;
126	u64_stats_t drops;
127	u64_stats_t xdp_packets;
128	u64_stats_t xdp_tx;
129	u64_stats_t xdp_redirects;
130	u64_stats_t xdp_drops;
131	u64_stats_t kicks;
132	};
133
134	#define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1}
135	#define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1}
136
137	#define VIRTNET_SQ_STAT_QSTAT(name, m) \
138	{ \
139	name, \
140	offsetof(struct virtnet_sq_stats, m), \
141	offsetof(struct netdev_queue_stats_tx, m), \
142	}
143
144	#define VIRTNET_RQ_STAT_QSTAT(name, m) \
145	{ \
146	name, \
147	offsetof(struct virtnet_rq_stats, m), \
148	offsetof(struct netdev_queue_stats_rx, m), \
149	}
150
151	static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = {
152	VIRTNET_SQ_STAT("xdp_tx", xdp_tx),
153	VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops),
154	VIRTNET_SQ_STAT("kicks", kicks),
155	VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts),
156	};
157
158	static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
159	VIRTNET_RQ_STAT("drops", drops),
160	VIRTNET_RQ_STAT("xdp_packets", xdp_packets),
161	VIRTNET_RQ_STAT("xdp_tx", xdp_tx),
162	VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects),
163	VIRTNET_RQ_STAT("xdp_drops", xdp_drops),
164	VIRTNET_RQ_STAT("kicks", kicks),
165	};
166
167	static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = {
168	VIRTNET_SQ_STAT_QSTAT("packets", packets),
169	VIRTNET_SQ_STAT_QSTAT("bytes", bytes),
170	VIRTNET_SQ_STAT_QSTAT("stop", stop),
171	VIRTNET_SQ_STAT_QSTAT("wake", wake),
172	};
173
174	static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = {
175	VIRTNET_RQ_STAT_QSTAT("packets", packets),
176	VIRTNET_RQ_STAT_QSTAT("bytes", bytes),
177	};
178
179	#define VIRTNET_STATS_DESC_CQ(name) \
180	{#name, offsetof(struct virtio_net_stats_cvq, name), -1}
181
182	#define VIRTNET_STATS_DESC_RX(class, name) \
183	{#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1}
184
185	#define VIRTNET_STATS_DESC_TX(class, name) \
186	{#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1}
187
188
189	static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = {
190	VIRTNET_STATS_DESC_CQ(command_num),
191	VIRTNET_STATS_DESC_CQ(ok_num),
192	};
193
194	static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = {
195	VIRTNET_STATS_DESC_RX(basic, packets),
196	VIRTNET_STATS_DESC_RX(basic, bytes),
197
198	VIRTNET_STATS_DESC_RX(basic, notifications),
199	VIRTNET_STATS_DESC_RX(basic, interrupts),
200	};
201
202	static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = {
203	VIRTNET_STATS_DESC_TX(basic, packets),
204	VIRTNET_STATS_DESC_TX(basic, bytes),
205
206	VIRTNET_STATS_DESC_TX(basic, notifications),
207	VIRTNET_STATS_DESC_TX(basic, interrupts),
208	};
209
210	static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = {
211	VIRTNET_STATS_DESC_RX(csum, needs_csum),
212	};
213
214	static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = {
215	VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg),
216	VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg),
217	};
218
219	static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = {
220	VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes),
221	};
222
223	static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = {
224	VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes),
225	};
226
227	#define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \
228	{ \
229	#name, \
230	offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \
231	offsetof(struct netdev_queue_stats_rx, qstat_field), \
232	}
233
234	#define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \
235	{ \
236	#name, \
237	offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \
238	offsetof(struct netdev_queue_stats_tx, qstat_field), \
239	}
240
241	static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = {
242	VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops),
243	VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns),
244	};
245
246	static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = {
247	VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops),
248	VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors),
249	};
250
251	static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = {
252	VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary),
253	VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none),
254	VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad),
255	};
256
257	static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = {
258	VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none),
259	VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum),
260	};
261
262	static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = {
263	VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets),
264	VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes),
265	VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets),
266	VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes),
267	};
268
269	static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = {
270	VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets),
271	VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes),
272	VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets),
273	VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes),
274	};
275
276	static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = {
277	VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits),
278	};
279
280	static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = {
281	VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits),
282	};
283
284	#define VIRTNET_Q_TYPE_RX 0
285	#define VIRTNET_Q_TYPE_TX 1
286	#define VIRTNET_Q_TYPE_CQ 2
287
288	struct virtnet_interrupt_coalesce {
289	u32 max_packets;
290	u32 max_usecs;
291	};
292
293	/ The dma information of pages allocated at a time. /
294	struct virtnet_rq_dma {
295	dma_addr_t addr;
296	u32 ref;
297	u16 len;
298	u16 need_sync;
299	};
300
301	/ Internal representation of a send virtqueue /
302	struct send_queue {
303	/ Virtqueue associated with this send _queue /
304	struct virtqueue *vq;
305
306	/ TX: fragments + linear part + virtio header /
307	struct scatterlist sg[MAX_SKB_FRAGS + `2`];
308
309	/ Name of the send queue: output.$index /
310	char name[`16`];
311
312	struct virtnet_sq_stats stats;
313
314	struct virtnet_interrupt_coalesce intr_coal;
315
316	struct napi_struct napi;
317
318	/ Record whether sq is in reset state. /
319	bool reset;
320
321	struct xsk_buff_pool *xsk_pool;
322
323	dma_addr_t xsk_hdr_dma_addr;
324	};
325
326	/ Internal representation of a receive virtqueue /
327	struct receive_queue {
328	/ Virtqueue associated with this receive_queue /
329	struct virtqueue *vq;
330
331	struct napi_struct napi;
332
333	struct bpf_prog __rcu *xdp_prog;
334
335	struct virtnet_rq_stats stats;
336
337	/ The number of rx notifications /
338	u16 calls;
339
340	/ Is dynamic interrupt moderation enabled? /
341	bool dim_enabled;
342
343	/ Used to protect dim_enabled and inter_coal /
344	struct mutex dim_lock;
345
346	/ Dynamic Interrupt Moderation /
347	struct dim dim;
348
349	u32 packets_in_napi;
350
351	struct virtnet_interrupt_coalesce intr_coal;
352
353	/ Chain pages by the private ptr. /
354	struct page *pages;
355
356	/ Average packet length for mergeable receive buffers. /
357	struct ewma_pkt_len mrg_avg_pkt_len;
358
359	/ Page frag for packet buffer allocation. /
360	struct page_frag alloc_frag;
361
362	/ RX: fragments + linear part + virtio header /
363	struct scatterlist sg[MAX_SKB_FRAGS + `2`];
364
365	/ Min single buffer size for mergeable buffers case. /
366	unsigned int min_buf_len;
367
368	/ Name of this receive queue: input.$index /
369	char name[`16`];
370
371	struct xdp_rxq_info xdp_rxq;
372
373	/ Record the last dma info to free after new pages is allocated. /
374	struct virtnet_rq_dma *last_dma;
375
376	struct xsk_buff_pool *xsk_pool;
377
378	/ xdp rxq used by xsk /
379	struct xdp_rxq_info xsk_rxq_info;
380
381	struct xdp_buff **xsk_buffs;
382	};
383
384	#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40
385
386	/ Control VQ buffers: protected by the rtnl lock /
387	struct control_buf {
388	struct virtio_net_ctrl_hdr hdr;
389	virtio_net_ctrl_ack status;
390	};
391
392	struct virtnet_info {
393	struct virtio_device *vdev;
394	struct virtqueue *cvq;
395	struct net_device *dev;
396	struct send_queue *sq;
397	struct receive_queue *rq;
398	unsigned int status;
399
400	/ Max # of queue pairs supported by the device /
401	u16 max_queue_pairs;
402
403	/ # of queue pairs currently used by the driver /
404	u16 curr_queue_pairs;
405
406	/ # of XDP queue pairs currently used by the driver /
407	u16 xdp_queue_pairs;
408
409	/ xdp_queue_pairs may be 0, when xdp is already loaded. So add this. /
410	bool xdp_enabled;
411
412	/ I like... big packets and I cannot lie! /
413	bool big_packets;
414
415	/ number of sg entries allocated for big packets /
416	unsigned int big_packets_num_skbfrags;
417
418	/ Host will merge rx buffers for big packets (shake it! shake it!) /
419	bool mergeable_rx_bufs;
420
421	/ Host supports rss and/or hash report /
422	bool has_rss;
423	bool has_rss_hash_report;
424	u8 rss_key_size;
425	u16 rss_indir_table_size;
426	u32 rss_hash_types_supported;
427	u32 rss_hash_types_saved;
428	struct virtio_net_rss_config_hdr *rss_hdr;
429	struct virtio_net_rss_config_trailer rss_trailer;
430	u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE];
431
432	/ Has control virtqueue /
433	bool has_cvq;
434
435	/ Lock to protect the control VQ /
436	struct mutex cvq_lock;
437
438	/ Host can handle any s/g split between our header and packet data /
439	bool any_header_sg;
440
441	/ Packet virtio header size /
442	u8 hdr_len;
443
444	/ Work struct for delayed refilling if we run low on memory. /
445	struct delayed_work refill;
446
447	/ UDP tunnel support /
448	bool tx_tnl;
449
450	bool rx_tnl;
451
452	bool rx_tnl_csum;
453
454	/ Is delayed refill enabled? /
455	bool refill_enabled;
456
457	/ The lock to synchronize the access to refill_enabled /
458	spinlock_t refill_lock;
459
460	/ Work struct for config space updates /
461	struct work_struct config_work;
462
463	/ Work struct for setting rx mode /
464	struct work_struct rx_mode_work;
465
466	/ OK to queue work setting RX mode? /
467	bool rx_mode_work_enabled;
468
469	/ Does the affinity hint is set for virtqueues? /
470	bool affinity_hint_set;
471
472	/ CPU hotplug instances for online & dead /
473	struct hlist_node node;
474	struct hlist_node node_dead;
475
476	struct control_buf *ctrl;
477
478	/ Ethtool settings /
479	u8 duplex;
480	u32 speed;
481
482	/ Is rx dynamic interrupt moderation enabled? /
483	bool rx_dim_enabled;
484
485	/ Interrupt coalescing settings /
486	struct virtnet_interrupt_coalesce intr_coal_tx;
487	struct virtnet_interrupt_coalesce intr_coal_rx;
488
489	unsigned long guest_offloads;
490	unsigned long guest_offloads_capable;
491
492	/ failover when STANDBY feature enabled /
493	struct failover *failover;
494
495	u64 device_stats_cap;
496	};
497
498	struct padded_vnet_hdr {
499	struct virtio_net_hdr_v1_hash hdr;
500	/*
501	* hdr is in a separate sg buffer, and data sg buffer shares same page
502	* with this header sg. This padding makes next sg 16 byte aligned
503	* after the header.
504	*/
505	char padding[`12`];
506	};
507
508	struct virtio_net_common_hdr {
509	union {
510	struct virtio_net_hdr hdr;
511	struct virtio_net_hdr_mrg_rxbuf mrg_hdr;
512	struct virtio_net_hdr_v1_hash hash_v1_hdr;
513	struct virtio_net_hdr_v1_hash_tunnel tnl_hdr;
514	};
515	};
516
517	static struct virtio_net_common_hdr xsk_hdr;
518
519	static void virtnet_sq_free_unused_buf(struct virtqueue vq, void* *buf);
520	static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq);
521	static int virtnet_xdp_handler(struct bpf_prog xdp_prog, struct* xdp_buff *xdp,
522	struct net_device *dev,
523	unsigned int *xdp_xmit,
524	struct virtnet_rq_stats *stats);
525	static void virtnet_receive_done(struct virtnet_info vi, struct* receive_queue *rq,
526	struct sk_buff *skb, u8 flags);
527	static struct sk_buff virtnet_skb_append_frag(struct* sk_buff *head_skb,
528	struct sk_buff *curr_skb,
529	struct page page, void* *buf,
530	int len, int truesize);
531	static void virtnet_xsk_completed(struct send_queue sq, int* num);
532
533	enum virtnet_xmit_type {
534	VIRTNET_XMIT_TYPE_SKB,
535	VIRTNET_XMIT_TYPE_SKB_ORPHAN,
536	VIRTNET_XMIT_TYPE_XDP,
537	VIRTNET_XMIT_TYPE_XSK,
538	};
539
540	static size_t virtnet_rss_hdr_size(const struct virtnet_info *vi)
541	{
542	u16 indir_table_size = vi->has_rss ? vi->rss_indir_table_size : `1`;
543
544	return struct_size(vi->rss_hdr, indirection_table, indir_table_size);
545	}
546
547	static size_t virtnet_rss_trailer_size(const struct virtnet_info *vi)
548	{
549	return struct_size(&vi->rss_trailer, hash_key_data, vi->rss_key_size);
550	}
551
552	/ We use the last two bits of the pointer to distinguish the xmit type. /
553	#define VIRTNET_XMIT_TYPE_MASK (BIT(0) \| BIT(1))
554
555	#define VIRTIO_XSK_FLAG_OFFSET 2
556
557	static enum virtnet_xmit_type virtnet_xmit_ptr_unpack(void **ptr)
558	{
559	unsigned long p = (unsigned long)*ptr;
560
561	ptr = (void* *)(p & ~VIRTNET_XMIT_TYPE_MASK);
562
563	return p & VIRTNET_XMIT_TYPE_MASK;
564	}
565
566	static void virtnet_xmit_ptr_pack(void* ptr, enum* virtnet_xmit_type type)
567	{
568	return (void )((unsigned* long)ptr \| type);
569	}
570
571	static int virtnet_add_outbuf(struct send_queue sq, int* num, void *data,
572	enum virtnet_xmit_type type)
573	{
574	return virtqueue_add_outbuf(vq: sq->vq, sg: sq->sg, num,
575	data: virtnet_xmit_ptr_pack(ptr: data, type),
576	GFP_ATOMIC);
577	}
578
579	static u32 virtnet_ptr_to_xsk_buff_len(void *ptr)
580	{
581	return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET;
582	}
583
584	static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
585	{
586	sg_dma_address(sg) = addr;
587	sg_dma_len(sg) = len;
588	}
589
590	static void __free_old_xmit(struct send_queue sq, struct* netdev_queue *txq,
591	bool in_napi, struct virtnet_sq_free_stats *stats)
592	{
593	struct xdp_frame *frame;
594	struct sk_buff *skb;
595	unsigned int len;
596	void *ptr;
597
598	while ((ptr = virtqueue_get_buf(vq: sq->vq, len: &len)) != NULL) {
599	switch (virtnet_xmit_ptr_unpack(ptr: &ptr)) {
600	case VIRTNET_XMIT_TYPE_SKB:
601	skb = ptr;
602
603	pr_debug("Sent skb %p\n", skb);
604	stats->napi_packets++;
605	stats->napi_bytes += skb->len;
606	napi_consume_skb(skb, budget: in_napi);
607	break;
608
609	case VIRTNET_XMIT_TYPE_SKB_ORPHAN:
610	skb = ptr;
611
612	stats->packets++;
613	stats->bytes += skb->len;
614	napi_consume_skb(skb, budget: in_napi);
615	break;
616
617	case VIRTNET_XMIT_TYPE_XDP:
618	frame = ptr;
619
620	stats->packets++;
621	stats->bytes += xdp_get_frame_len(xdpf: frame);
622	xdp_return_frame(xdpf: frame);
623	break;
624
625	case VIRTNET_XMIT_TYPE_XSK:
626	stats->bytes += virtnet_ptr_to_xsk_buff_len(ptr);
627	stats->xsk++;
628	break;
629	}
630	}
631	netdev_tx_completed_queue(dev_queue: txq, pkts: stats->napi_packets, bytes: stats->napi_bytes);
632	}
633
634	static void virtnet_free_old_xmit(struct send_queue *sq,
635	struct netdev_queue *txq,
636	bool in_napi,
637	struct virtnet_sq_free_stats *stats)
638	{
639	__free_old_xmit(sq, txq, in_napi, stats);
640
641	if (stats->xsk)
642	virtnet_xsk_completed(sq, num: stats->xsk);
643	}
644
645	/ Converting between virtqueue no. and kernel tx/rx queue no.*
646	* 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
647	*/
648	static int vq2txq(struct virtqueue *vq)
649	{
650	return (vq->index - `1`) / `2`;
651	}
652
653	static int txq2vq(int txq)
654	{
655	return txq * `2` + `1`;
656	}
657
658	static int vq2rxq(struct virtqueue *vq)
659	{
660	return vq->index / `2`;
661	}
662
663	static int rxq2vq(int rxq)
664	{
665	return rxq * `2`;
666	}
667
668	static int vq_type(struct virtnet_info vi, int* qid)
669	{
670	if (qid == vi->max_queue_pairs * `2`)
671	return VIRTNET_Q_TYPE_CQ;
672
673	if (qid % `2`)
674	return VIRTNET_Q_TYPE_TX;
675
676	return VIRTNET_Q_TYPE_RX;
677	}
678
679	static inline struct virtio_net_common_hdr *
680	skb_vnet_common_hdr(struct sk_buff *skb)
681	{
682	return (struct virtio_net_common_hdr *)skb->cb;
683	}
684
685	/*
686	* private is used to chain pages for big packets, put the whole
687	* most recent used list in the beginning for reuse
688	*/
689	static void give_pages(struct receive_queue rq, struct* page *page)
690	{
691	struct page *end;
692
693	/ Find end of list, sew whole thing into vi->rq.pages. /
694	for (end = page; end->private; end = (struct page *)end->private);
695	end->private = (unsigned long)rq->pages;
696	rq->pages = page;
697	}
698
699	static struct page get_a_page(struct* receive_queue *rq, gfp_t gfp_mask)
700	{
701	struct page *p = rq->pages;
702
703	if (p) {
704	rq->pages = (struct page *)p->private;
705	/ clear private here, it is used to chain pages /
706	p->private = `0`;
707	} else
708	p = alloc_page(gfp_mask);
709	return p;
710	}
711
712	static void virtnet_rq_free_buf(struct virtnet_info *vi,
713	struct receive_queue rq, void* *buf)
714	{
715	if (vi->mergeable_rx_bufs)
716	put_page(page: virt_to_head_page(x: buf));
717	else if (vi->big_packets)
718	give_pages(rq, page: buf);
719	else
720	put_page(page: virt_to_head_page(x: buf));
721	}
722
723	static void enable_delayed_refill(struct virtnet_info *vi)
724	{
725	spin_lock_bh(lock: &vi->refill_lock);
726	vi->refill_enabled = true;
727	spin_unlock_bh(lock: &vi->refill_lock);
728	}
729
730	static void disable_delayed_refill(struct virtnet_info *vi)
731	{
732	spin_lock_bh(lock: &vi->refill_lock);
733	vi->refill_enabled = false;
734	spin_unlock_bh(lock: &vi->refill_lock);
735	}
736
737	static void enable_rx_mode_work(struct virtnet_info *vi)
738	{
739	rtnl_lock();
740	vi->rx_mode_work_enabled = true;
741	rtnl_unlock();
742	}
743
744	static void disable_rx_mode_work(struct virtnet_info *vi)
745	{
746	rtnl_lock();
747	vi->rx_mode_work_enabled = false;
748	rtnl_unlock();
749	}
750
751	static void virtqueue_napi_schedule(struct napi_struct *napi,
752	struct virtqueue *vq)
753	{
754	if (napi_schedule_prep(n: napi)) {
755	virtqueue_disable_cb(vq);
756	__napi_schedule(n: napi);
757	}
758	}
759
760	static bool virtqueue_napi_complete(struct napi_struct *napi,
761	struct virtqueue vq, int* processed)
762	{
763	int opaque;
764
765	opaque = virtqueue_enable_cb_prepare(vq);
766	if (napi_complete_done(n: napi, work_done: processed)) {
767	if (unlikely(virtqueue_poll(vq, opaque)))
768	virtqueue_napi_schedule(napi, vq);
769	else
770	return true;
771	} else {
772	virtqueue_disable_cb(vq);
773	}
774
775	return false;
776	}
777
778	static void skb_xmit_done(struct virtqueue *vq)
779	{
780	struct virtnet_info *vi = vq->vdev->priv;
781	struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi;
782
783	/ Suppress further interrupts. /
784	virtqueue_disable_cb(vq);
785
786	if (napi->weight)
787	virtqueue_napi_schedule(napi, vq);
788	else
789	/ We were probably waiting for more output buffers. /
790	netif_wake_subqueue(dev: vi->dev, queue_index: vq2txq(vq));
791	}
792
793	#define MRG_CTX_HEADER_SHIFT 22
794	static void mergeable_len_to_ctx(unsigned* int truesize,
795	unsigned int headroom)
796	{
797	return (void )(unsigned* long)((headroom << MRG_CTX_HEADER_SHIFT) \| truesize);
798	}
799
800	static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
801	{
802	return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
803	}
804
805	static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
806	{
807	return (unsigned long)mrg_ctx & ((`1` << MRG_CTX_HEADER_SHIFT) - `1`);
808	}
809
810	static int check_mergeable_len(struct net_device dev, void* *mrg_ctx,
811	unsigned int len)
812	{
813	unsigned int headroom, tailroom, room, truesize;
814
815	truesize = mergeable_ctx_to_truesize(mrg_ctx);
816	headroom = mergeable_ctx_to_headroom(mrg_ctx);
817	tailroom = headroom ? sizeof(struct skb_shared_info) : `0`;
818	room = SKB_DATA_ALIGN(headroom + tailroom);
819
820	if (len > truesize - room) {
821	pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
822	dev->name, len, (unsigned long)(truesize - room));
823	DEV_STATS_INC(dev, rx_length_errors);
824	return -`1`;
825	}
826
827	return `0`;
828	}
829
830	static struct sk_buff virtnet_build_skb(void* buf, unsigned* int buflen,
831	unsigned int headroom,
832	unsigned int len)
833	{
834	struct sk_buff *skb;
835
836	skb = build_skb(data: buf, frag_size: buflen);
837	if (unlikely(!skb))
838	return NULL;
839
840	skb_reserve(skb, len: headroom);
841	skb_put(skb, len);
842
843	return skb;
844	}
845
846	/ Called from bottom half context /
847	static struct sk_buff page_to_skb(struct* virtnet_info *vi,
848	struct receive_queue *rq,
849	struct page page, unsigned* int offset,
850	unsigned int len, unsigned int truesize,
851	unsigned int headroom)
852	{
853	struct sk_buff *skb;
854	struct virtio_net_common_hdr *hdr;
855	unsigned int copy, hdr_len, hdr_padded_len;
856	struct page *page_to_free = NULL;
857	int tailroom, shinfo_size;
858	char p, hdr_p, *buf;
859
860	p = page_address(page) + offset;
861	hdr_p = p;
862
863	hdr_len = vi->hdr_len;
864	if (vi->mergeable_rx_bufs)
865	hdr_padded_len = hdr_len;
866	else
867	hdr_padded_len = sizeof(struct padded_vnet_hdr);
868
869	buf = p - headroom;
870	len -= hdr_len;
871	offset += hdr_padded_len;
872	p += hdr_padded_len;
873	tailroom = truesize - headroom - hdr_padded_len - len;
874
875	shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
876
877	if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
878	skb = virtnet_build_skb(buf, buflen: truesize, headroom: p - buf, len);
879	if (unlikely(!skb))
880	return NULL;
881
882	page = (struct page *)page->private;
883	if (page)
884	give_pages(rq, page);
885	goto ok;
886	}
887
888	/ copy small packet so we can reuse these pages for small data /
889	skb = napi_alloc_skb(napi: &rq->napi, GOOD_COPY_LEN);
890	if (unlikely(!skb))
891	return NULL;
892
893	/ Copy all frame if it fits skb->head, otherwise*
894	* we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
895	*/
896	if (len <= skb_tailroom(skb))
897	copy = len;
898	else
899	copy = ETH_HLEN;
900	skb_put_data(skb, data: p, len: copy);
901
902	len -= copy;
903	offset += copy;
904
905	if (vi->mergeable_rx_bufs) {
906	if (len)
907	skb_add_rx_frag(skb, i: `0`, page, off: offset, size: len, truesize);
908	else
909	page_to_free = page;
910	goto ok;
911	}
912
913	/*
914	* Verify that we can indeed put this data into a skb.
915	* This is here to handle cases when the device erroneously
916	* tries to receive more than is possible. This is usually
917	* the case of a broken device.
918	*/
919	if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
920	net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
921	dev_kfree_skb(skb);
922	return NULL;
923	}
924	BUG_ON(offset >= PAGE_SIZE);
925	while (len) {
926	unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
927	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, off: offset,
928	size: frag_size, truesize);
929	len -= frag_size;
930	page = (struct page *)page->private;
931	offset = `0`;
932	}
933
934	if (page)
935	give_pages(rq, page);
936
937	ok:
938	hdr = skb_vnet_common_hdr(skb);
939	memcpy(to: hdr, from: hdr_p, len: hdr_len);
940	if (page_to_free)
941	put_page(page: page_to_free);
942
943	return skb;
944	}
945
946	static void virtnet_rq_unmap(struct receive_queue rq, void* *buf, u32 len)
947	{
948	struct virtnet_info *vi = rq->vq->vdev->priv;
949	struct page *page = virt_to_head_page(x: buf);
950	struct virtnet_rq_dma *dma;
951	void *head;
952	int offset;
953
954	BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs);
955
956	head = page_address(page);
957
958	dma = head;
959
960	--dma->ref;
961
962	if (dma->need_sync && len) {
963	offset = buf - (head + sizeof(*dma));
964
965	virtqueue_map_sync_single_range_for_cpu(vq: rq->vq, addr: dma->addr,
966	offset, size: len,
967	dir: DMA_FROM_DEVICE);
968	}
969
970	if (dma->ref)
971	return;
972
973	virtqueue_unmap_single_attrs(vq: rq->vq, addr: dma->addr, size: dma->len,
974	dir: DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
975	put_page(page);
976	}
977
978	static void virtnet_rq_get_buf(struct* receive_queue rq, u32 len, void **ctx)
979	{
980	struct virtnet_info *vi = rq->vq->vdev->priv;
981	void *buf;
982
983	BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs);
984
985	buf = virtqueue_get_buf_ctx(vq: rq->vq, len, ctx);
986	if (buf)
987	virtnet_rq_unmap(rq, buf, len: *len);
988
989	return buf;
990	}
991
992	static void virtnet_rq_init_one_sg(struct receive_queue rq, void* *buf, u32 len)
993	{
994	struct virtnet_info *vi = rq->vq->vdev->priv;
995	struct virtnet_rq_dma *dma;
996	dma_addr_t addr;
997	u32 offset;
998	void *head;
999
1000	BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs);
1001
1002	head = page_address(rq->alloc_frag.page);
1003
1004	offset = buf - head;
1005
1006	dma = head;
1007
1008	addr = dma->addr - sizeof(*dma) + offset;
1009
1010	sg_init_table(rq->sg, `1`);
1011	sg_fill_dma(sg: rq->sg, addr, len);
1012	}
1013
1014	static void virtnet_rq_alloc(struct* receive_queue *rq, u32 size, gfp_t gfp)
1015	{
1016	struct page_frag *alloc_frag = &rq->alloc_frag;
1017	struct virtnet_info *vi = rq->vq->vdev->priv;
1018	struct virtnet_rq_dma *dma;
1019	void buf, head;
1020	dma_addr_t addr;
1021
1022	BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs);
1023
1024	head = page_address(alloc_frag->page);
1025
1026	dma = head;
1027
1028	/ new pages /
1029	if (!alloc_frag->offset) {
1030	if (rq->last_dma) {
1031	/ Now, the new page is allocated, the last dma*
1032	* will not be used. So the dma can be unmapped
1033	* if the ref is 0.
1034	*/
1035	virtnet_rq_unmap(rq, buf: rq->last_dma, len: `0`);
1036	rq->last_dma = NULL;
1037	}
1038
1039	dma->len = alloc_frag->size - sizeof(*dma);
1040
1041	addr = virtqueue_map_single_attrs(vq: rq->vq, ptr: dma + `1`,
1042	size: dma->len, dir: DMA_FROM_DEVICE, attrs: `0`);
1043	if (virtqueue_map_mapping_error(vq: rq->vq, addr))
1044	return NULL;
1045
1046	dma->addr = addr;
1047	dma->need_sync = virtqueue_map_need_sync(vq: rq->vq, addr);
1048
1049	/ Add a reference to dma to prevent the entire dma from*
1050	* being released during error handling. This reference
1051	* will be freed after the pages are no longer used.
1052	*/
1053	get_page(page: alloc_frag->page);
1054	dma->ref = `1`;
1055	alloc_frag->offset = sizeof(*dma);
1056
1057	rq->last_dma = dma;
1058	}
1059
1060	++dma->ref;
1061
1062	buf = head + alloc_frag->offset;
1063
1064	get_page(page: alloc_frag->page);
1065	alloc_frag->offset += size;
1066
1067	return buf;
1068	}
1069
1070	static void virtnet_rq_unmap_free_buf(struct virtqueue vq, void* *buf)
1071	{
1072	struct virtnet_info *vi = vq->vdev->priv;
1073	struct receive_queue *rq;
1074	int i = vq2rxq(vq);
1075
1076	rq = &vi->rq[i];
1077
1078	if (rq->xsk_pool) {
1079	xsk_buff_free(xdp: (struct xdp_buff *)buf);
1080	return;
1081	}
1082
1083	if (!vi->big_packets \|\| vi->mergeable_rx_bufs)
1084	virtnet_rq_unmap(rq, buf, len: `0`);
1085
1086	virtnet_rq_free_buf(vi, rq, buf);
1087	}
1088
1089	static void free_old_xmit(struct send_queue sq, struct* netdev_queue *txq,
1090	bool in_napi)
1091	{
1092	struct virtnet_sq_free_stats stats = {`0`};
1093
1094	virtnet_free_old_xmit(sq, txq, in_napi, stats: &stats);
1095
1096	/ Avoid overhead when no packets have been processed*
1097	* happens when called speculatively from start_xmit.
1098	*/
1099	if (!stats.packets && !stats.napi_packets)
1100	return;
1101
1102	u64_stats_update_begin(syncp: &sq->stats.syncp);
1103	u64_stats_add(p: &sq->stats.bytes, val: stats.bytes + stats.napi_bytes);
1104	u64_stats_add(p: &sq->stats.packets, val: stats.packets + stats.napi_packets);
1105	u64_stats_update_end(syncp: &sq->stats.syncp);
1106	}
1107
1108	static bool is_xdp_raw_buffer_queue(struct virtnet_info vi, int* q)
1109	{
1110	if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
1111	return false;
1112	else if (q < vi->curr_queue_pairs)
1113	return true;
1114	else
1115	return false;
1116	}
1117
1118	static bool tx_may_stop(struct virtnet_info *vi,
1119	struct net_device *dev,
1120	struct send_queue *sq)
1121	{
1122	int qnum;
1123
1124	qnum = sq - vi->sq;
1125
1126	/ If running out of space, stop queue to avoid getting packets that we*
1127	* are then unable to transmit.
1128	* An alternative would be to force queuing layer to requeue the skb by
1129	* returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
1130	* returned in a normal path of operation: it means that driver is not
1131	* maintaining the TX queue stop/start state properly, and causes
1132	* the stack to do a non-trivial amount of useless work.
1133	* Since most packets only take 1 or 2 ring slots, stopping the queue
1134	* early means 16 slots are typically wasted.
1135	*/
1136	if (sq->vq->num_free < MAX_SKB_FRAGS + `2`) {
1137	struct netdev_queue *txq = netdev_get_tx_queue(dev, index: qnum);
1138
1139	netif_tx_stop_queue(dev_queue: txq);
1140	u64_stats_update_begin(syncp: &sq->stats.syncp);
1141	u64_stats_inc(p: &sq->stats.stop);
1142	u64_stats_update_end(syncp: &sq->stats.syncp);
1143
1144	return true;
1145	}
1146
1147	return false;
1148	}
1149
1150	static void check_sq_full_and_disable(struct virtnet_info *vi,
1151	struct net_device *dev,
1152	struct send_queue *sq)
1153	{
1154	bool use_napi = sq->napi.weight;
1155	int qnum;
1156
1157	qnum = sq - vi->sq;
1158
1159	if (tx_may_stop(vi, dev, sq)) {
1160	struct netdev_queue *txq = netdev_get_tx_queue(dev, index: qnum);
1161
1162	if (use_napi) {
1163	if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
1164	virtqueue_napi_schedule(napi: &sq->napi, vq: sq->vq);
1165	} else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
1166	/ More just got used, free them then recheck. /
1167	free_old_xmit(sq, txq, in_napi: false);
1168	if (sq->vq->num_free >= MAX_SKB_FRAGS + `2`) {
1169	netif_start_subqueue(dev, queue_index: qnum);
1170	u64_stats_update_begin(syncp: &sq->stats.syncp);
1171	u64_stats_inc(p: &sq->stats.wake);
1172	u64_stats_update_end(syncp: &sq->stats.syncp);
1173	virtqueue_disable_cb(vq: sq->vq);
1174	}
1175	}
1176	}
1177	}
1178
1179	/ Note that @len is the length of received data without virtio header /
1180	static struct xdp_buff buf_to_xdp(struct* virtnet_info *vi,
1181	struct receive_queue rq, void* *buf,
1182	u32 len, bool first_buf)
1183	{
1184	struct xdp_buff *xdp;
1185	u32 bufsize;
1186
1187	xdp = (struct xdp_buff *)buf;
1188
1189	/ In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for*
1190	* virtio header and ask the vhost to fill data from
1191	* hard_start + XDP_PACKET_HEADROOM - vi->hdr_len
1192	* The first buffer has virtio header so the remaining region for frame
1193	* data is
1194	* xsk_pool_get_rx_frame_size()
1195	* While other buffers than the first one do not have virtio header, so
1196	* the maximum frame data's length can be
1197	* xsk_pool_get_rx_frame_size() + vi->hdr_len
1198	*/
1199	bufsize = xsk_pool_get_rx_frame_size(pool: rq->xsk_pool);
1200	if (!first_buf)
1201	bufsize += vi->hdr_len;
1202
1203	if (unlikely(len > bufsize)) {
1204	pr_debug("%s: rx error: len %u exceeds truesize %u\n",
1205	vi->dev->name, len, bufsize);
1206	DEV_STATS_INC(vi->dev, rx_length_errors);
1207	xsk_buff_free(xdp);
1208	return NULL;
1209	}
1210
1211	if (first_buf) {
1212	xsk_buff_set_size(xdp, size: len);
1213	} else {
1214	xdp_prepare_buff(xdp, hard_start: xdp->data_hard_start,
1215	XDP_PACKET_HEADROOM - vi->hdr_len, data_len: len, meta_valid: `1`);
1216	xdp->flags = `0`;
1217	}
1218
1219	xsk_buff_dma_sync_for_cpu(xdp);
1220
1221	return xdp;
1222	}
1223
1224	static struct sk_buff xsk_construct_skb(struct* receive_queue *rq,
1225	struct xdp_buff *xdp)
1226	{
1227	unsigned int metasize = xdp->data - xdp->data_meta;
1228	struct sk_buff *skb;
1229	unsigned int size;
1230
1231	size = xdp->data_end - xdp->data_hard_start;
1232	skb = napi_alloc_skb(napi: &rq->napi, length: size);
1233	if (unlikely(!skb)) {
1234	xsk_buff_free(xdp);
1235	return NULL;
1236	}
1237
1238	skb_reserve(skb, len: xdp->data_meta - xdp->data_hard_start);
1239
1240	size = xdp->data_end - xdp->data_meta;
1241	memcpy(to: __skb_put(skb, len: size), from: xdp->data_meta, len: size);
1242
1243	if (metasize) {
1244	__skb_pull(skb, len: metasize);
1245	skb_metadata_set(skb, meta_len: metasize);
1246	}
1247
1248	xsk_buff_free(xdp);
1249
1250	return skb;
1251	}
1252
1253	static struct sk_buff virtnet_receive_xsk_small(struct* net_device dev, struct* virtnet_info *vi,
1254	struct receive_queue rq, struct* xdp_buff *xdp,
1255	unsigned int *xdp_xmit,
1256	struct virtnet_rq_stats *stats)
1257	{
1258	struct bpf_prog *prog;
1259	u32 ret;
1260
1261	ret = XDP_PASS;
1262	rcu_read_lock();
1263	prog = rcu_dereference(rq->xdp_prog);
1264	if (prog)
1265	ret = virtnet_xdp_handler(xdp_prog: prog, xdp, dev, xdp_xmit, stats);
1266	rcu_read_unlock();
1267
1268	switch (ret) {
1269	case XDP_PASS:
1270	return xsk_construct_skb(rq, xdp);
1271
1272	case XDP_TX:
1273	case XDP_REDIRECT:
1274	return NULL;
1275
1276	default:
1277	/ drop packet /
1278	xsk_buff_free(xdp);
1279	u64_stats_inc(p: &stats->drops);
1280	return NULL;
1281	}
1282	}
1283
1284	static void xsk_drop_follow_bufs(struct net_device *dev,
1285	struct receive_queue *rq,
1286	u32 num_buf,
1287	struct virtnet_rq_stats *stats)
1288	{
1289	struct xdp_buff *xdp;
1290	u32 len;
1291
1292	while (num_buf-- > `1`) {
1293	xdp = virtqueue_get_buf(vq: rq->vq, len: &len);
1294	if (unlikely(!xdp)) {
1295	pr_debug("%s: rx error: %d buffers missing\n",
1296	dev->name, num_buf);
1297	DEV_STATS_INC(dev, rx_length_errors);
1298	break;
1299	}
1300	u64_stats_add(p: &stats->bytes, val: len);
1301	xsk_buff_free(xdp);
1302	}
1303	}
1304
1305	static int xsk_append_merge_buffer(struct virtnet_info *vi,
1306	struct receive_queue *rq,
1307	struct sk_buff *head_skb,
1308	u32 num_buf,
1309	struct virtio_net_hdr_mrg_rxbuf *hdr,
1310	struct virtnet_rq_stats *stats)
1311	{
1312	struct sk_buff *curr_skb;
1313	struct xdp_buff *xdp;
1314	u32 len, truesize;
1315	struct page *page;
1316	void *buf;
1317
1318	curr_skb = head_skb;
1319
1320	while (--num_buf) {
1321	buf = virtqueue_get_buf(vq: rq->vq, len: &len);
1322	if (unlikely(!buf)) {
1323	pr_debug("%s: rx error: %d buffers out of %d missing\n",
1324	vi->dev->name, num_buf,
1325	virtio16_to_cpu(vi->vdev,
1326	hdr->num_buffers));
1327	DEV_STATS_INC(vi->dev, rx_length_errors);
1328	return -EINVAL;
1329	}
1330
1331	u64_stats_add(p: &stats->bytes, val: len);
1332
1333	xdp = buf_to_xdp(vi, rq, buf, len, first_buf: false);
1334	if (!xdp)
1335	goto err;
1336
1337	buf = napi_alloc_frag(fragsz: len);
1338	if (!buf) {
1339	xsk_buff_free(xdp);
1340	goto err;
1341	}
1342
1343	memcpy(to: buf, from: xdp->data, len);
1344
1345	xsk_buff_free(xdp);
1346
1347	page = virt_to_page(buf);
1348
1349	truesize = len;
1350
1351	curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page,
1352	buf, len, truesize);
1353	if (!curr_skb) {
1354	put_page(page);
1355	goto err;
1356	}
1357	}
1358
1359	return `0`;
1360
1361	err:
1362	xsk_drop_follow_bufs(dev: vi->dev, rq, num_buf, stats);
1363	return -EINVAL;
1364	}
1365
1366	static struct sk_buff virtnet_receive_xsk_merge(struct* net_device dev, struct* virtnet_info *vi,
1367	struct receive_queue rq, struct* xdp_buff *xdp,
1368	unsigned int *xdp_xmit,
1369	struct virtnet_rq_stats *stats)
1370	{
1371	struct virtio_net_hdr_mrg_rxbuf *hdr;
1372	struct bpf_prog *prog;
1373	struct sk_buff *skb;
1374	u32 ret, num_buf;
1375
1376	hdr = xdp->data - vi->hdr_len;
1377	num_buf = virtio16_to_cpu(vdev: vi->vdev, val: hdr->num_buffers);
1378
1379	ret = XDP_PASS;
1380	rcu_read_lock();
1381	prog = rcu_dereference(rq->xdp_prog);
1382	/ TODO: support multi buffer. /
1383	if (prog && num_buf == `1`)
1384	ret = virtnet_xdp_handler(xdp_prog: prog, xdp, dev, xdp_xmit, stats);
1385	rcu_read_unlock();
1386
1387	switch (ret) {
1388	case XDP_PASS:
1389	skb = xsk_construct_skb(rq, xdp);
1390	if (!skb)
1391	goto drop_bufs;
1392
1393	if (xsk_append_merge_buffer(vi, rq, head_skb: skb, num_buf, hdr, stats)) {
1394	dev_kfree_skb(skb);
1395	goto drop;
1396	}
1397
1398	return skb;
1399
1400	case XDP_TX:
1401	case XDP_REDIRECT:
1402	return NULL;
1403
1404	default:
1405	/ drop packet /
1406	xsk_buff_free(xdp);
1407	}
1408
1409	drop_bufs:
1410	xsk_drop_follow_bufs(dev, rq, num_buf, stats);
1411
1412	drop:
1413	u64_stats_inc(p: &stats->drops);
1414	return NULL;
1415	}
1416
1417	static void virtnet_receive_xsk_buf(struct virtnet_info vi, struct* receive_queue *rq,
1418	void *buf, u32 len,
1419	unsigned int *xdp_xmit,
1420	struct virtnet_rq_stats *stats)
1421	{
1422	struct net_device *dev = vi->dev;
1423	struct sk_buff *skb = NULL;
1424	struct xdp_buff *xdp;
1425	u8 flags;
1426
1427	len -= vi->hdr_len;
1428
1429	u64_stats_add(p: &stats->bytes, val: len);
1430
1431	xdp = buf_to_xdp(vi, rq, buf, len, first_buf: true);
1432	if (!xdp)
1433	return;
1434
1435	if (unlikely(len < ETH_HLEN)) {
1436	pr_debug("%s: short packet %i\n", dev->name, len);
1437	DEV_STATS_INC(dev, rx_length_errors);
1438	xsk_buff_free(xdp);
1439	return;
1440	}
1441
1442	flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags;
1443
1444	if (!vi->mergeable_rx_bufs)
1445	skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats);
1446	else
1447	skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats);
1448
1449	if (skb)
1450	virtnet_receive_done(vi, rq, skb, flags);
1451	}
1452
1453	static int virtnet_add_recvbuf_xsk(struct virtnet_info vi, struct* receive_queue *rq,
1454	struct xsk_buff_pool *pool, gfp_t gfp)
1455	{
1456	struct xdp_buff **xsk_buffs;
1457	dma_addr_t addr;
1458	int err = `0`;
1459	u32 len, i;
1460	int num;
1461
1462	xsk_buffs = rq->xsk_buffs;
1463
1464	num = xsk_buff_alloc_batch(pool, xdp: xsk_buffs, max: rq->vq->num_free);
1465	if (!num)
1466	return -ENOMEM;
1467
1468	len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len;
1469
1470	for (i = `0`; i < num; ++i) {
1471	/ Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space.*
1472	* We assume XDP_PACKET_HEADROOM is larger than hdr->len.
1473	* (see function virtnet_xsk_pool_enable)
1474	*/
1475	addr = xsk_buff_xdp_get_dma(xdp: xsk_buffs[i]) - vi->hdr_len;
1476
1477	sg_init_table(rq->sg, `1`);
1478	sg_fill_dma(sg: rq->sg, addr, len);
1479
1480	err = virtqueue_add_inbuf_premapped(vq: rq->vq, sg: rq->sg, num: `1`,
1481	data: xsk_buffs[i], NULL, gfp);
1482	if (err)
1483	goto err;
1484	}
1485
1486	return num;
1487
1488	err:
1489	for (; i < num; ++i)
1490	xsk_buff_free(xdp: xsk_buffs[i]);
1491
1492	return err;
1493	}
1494
1495	static void *virtnet_xsk_to_ptr(u32 len)
1496	{
1497	unsigned long p;
1498
1499	p = len << VIRTIO_XSK_FLAG_OFFSET;
1500
1501	return virtnet_xmit_ptr_pack(ptr: (void *)p, type: VIRTNET_XMIT_TYPE_XSK);
1502	}
1503
1504	static int virtnet_xsk_xmit_one(struct send_queue *sq,
1505	struct xsk_buff_pool *pool,
1506	struct xdp_desc *desc)
1507	{
1508	struct virtnet_info *vi;
1509	dma_addr_t addr;
1510
1511	vi = sq->vq->vdev->priv;
1512
1513	addr = xsk_buff_raw_get_dma(pool, addr: desc->addr);
1514	xsk_buff_raw_dma_sync_for_device(pool, dma: addr, size: desc->len);
1515
1516	sg_init_table(sq->sg, `2`);
1517	sg_fill_dma(sg: sq->sg, addr: sq->xsk_hdr_dma_addr, len: vi->hdr_len);
1518	sg_fill_dma(sg: sq->sg + `1`, addr, len: desc->len);
1519
1520	return virtqueue_add_outbuf_premapped(vq: sq->vq, sg: sq->sg, num: `2`,
1521	data: virtnet_xsk_to_ptr(len: desc->len),
1522	GFP_ATOMIC);
1523	}
1524
1525	static int virtnet_xsk_xmit_batch(struct send_queue *sq,
1526	struct xsk_buff_pool *pool,
1527	unsigned int budget,
1528	u64 *kicks)
1529	{
1530	struct xdp_desc *descs = pool->tx_descs;
1531	bool kick = false;
1532	u32 nb_pkts, i;
1533	int err;
1534
1535	budget = min_t(u32, budget, sq->vq->num_free);
1536
1537	nb_pkts = xsk_tx_peek_release_desc_batch(pool, max: budget);
1538	if (!nb_pkts)
1539	return `0`;
1540
1541	for (i = `0`; i < nb_pkts; i++) {
1542	err = virtnet_xsk_xmit_one(sq, pool, desc: &descs[i]);
1543	if (unlikely(err)) {
1544	xsk_tx_completed(pool: sq->xsk_pool, nb_entries: nb_pkts - i);
1545	break;
1546	}
1547
1548	kick = true;
1549	}
1550
1551	if (kick && virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq))
1552	(*kicks)++;
1553
1554	return i;
1555	}
1556
1557	static bool virtnet_xsk_xmit(struct send_queue sq, struct* xsk_buff_pool *pool,
1558	int budget)
1559	{
1560	struct virtnet_info *vi = sq->vq->vdev->priv;
1561	struct virtnet_sq_free_stats stats = {};
1562	struct net_device *dev = vi->dev;
1563	u64 kicks = `0`;
1564	int sent;
1565
1566	/ Avoid to wakeup napi meanless, so call __free_old_xmit instead of*
1567	* free_old_xmit().
1568	*/
1569	__free_old_xmit(sq, txq: netdev_get_tx_queue(dev, index: sq - vi->sq), in_napi: true, stats: &stats);
1570
1571	if (stats.xsk)
1572	xsk_tx_completed(pool: sq->xsk_pool, nb_entries: stats.xsk);
1573
1574	sent = virtnet_xsk_xmit_batch(sq, pool, budget, kicks: &kicks);
1575
1576	if (!is_xdp_raw_buffer_queue(vi, q: sq - vi->sq))
1577	check_sq_full_and_disable(vi, dev: vi->dev, sq);
1578
1579	if (sent) {
1580	struct netdev_queue *txq;
1581
1582	txq = netdev_get_tx_queue(dev: vi->dev, index: sq - vi->sq);
1583	txq_trans_cond_update(txq);
1584	}
1585
1586	u64_stats_update_begin(syncp: &sq->stats.syncp);
1587	u64_stats_add(p: &sq->stats.packets, val: stats.packets);
1588	u64_stats_add(p: &sq->stats.bytes, val: stats.bytes);
1589	u64_stats_add(p: &sq->stats.kicks, val: kicks);
1590	u64_stats_add(p: &sq->stats.xdp_tx, val: sent);
1591	u64_stats_update_end(syncp: &sq->stats.syncp);
1592
1593	if (xsk_uses_need_wakeup(pool))
1594	xsk_set_tx_need_wakeup(pool);
1595
1596	return sent;
1597	}
1598
1599	static void xsk_wakeup(struct send_queue *sq)
1600	{
1601	if (napi_if_scheduled_mark_missed(n: &sq->napi))
1602	return;
1603
1604	local_bh_disable();
1605	virtqueue_napi_schedule(napi: &sq->napi, vq: sq->vq);
1606	local_bh_enable();
1607	}
1608
1609	static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag)
1610	{
1611	struct virtnet_info *vi = netdev_priv(dev);
1612	struct send_queue *sq;
1613
1614	if (!netif_running(dev))
1615	return -ENETDOWN;
1616
1617	if (qid >= vi->curr_queue_pairs)
1618	return -EINVAL;
1619
1620	sq = &vi->sq[qid];
1621
1622	xsk_wakeup(sq);
1623	return `0`;
1624	}
1625
1626	static void virtnet_xsk_completed(struct send_queue sq, int* num)
1627	{
1628	xsk_tx_completed(pool: sq->xsk_pool, nb_entries: num);
1629
1630	/ If this is called by rx poll, start_xmit and xdp xmit we should*
1631	* wakeup the tx napi to consume the xsk tx queue, because the tx
1632	* interrupt may not be triggered.
1633	*/
1634	xsk_wakeup(sq);
1635	}
1636
1637	static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
1638	struct send_queue *sq,
1639	struct xdp_frame *xdpf)
1640	{
1641	struct virtio_net_hdr_mrg_rxbuf *hdr;
1642	struct skb_shared_info *shinfo;
1643	u8 nr_frags = `0`;
1644	int err, i;
1645
1646	if (unlikely(xdpf->headroom < vi->hdr_len))
1647	return -EOVERFLOW;
1648
1649	if (unlikely(xdp_frame_has_frags(xdpf))) {
1650	shinfo = xdp_get_shared_info_from_frame(frame: xdpf);
1651	nr_frags = shinfo->nr_frags;
1652	}
1653
1654	/ In wrapping function virtnet_xdp_xmit(), we need to free*
1655	* up the pending old buffers, where we need to calculate the
1656	* position of skb_shared_info in xdp_get_frame_len() and
1657	* xdp_return_frame(), which will involve to xdpf->data and
1658	* xdpf->headroom. Therefore, we need to update the value of
1659	* headroom synchronously here.
1660	*/
1661	xdpf->headroom -= vi->hdr_len;
1662	xdpf->data -= vi->hdr_len;
1663	/ Zero header and leave csum up to XDP layers /
1664	hdr = xdpf->data;
1665	memset(s: hdr, c: `0`, n: vi->hdr_len);
1666	xdpf->len += vi->hdr_len;
1667
1668	sg_init_table(sq->sg, nr_frags + `1`);
1669	sg_set_buf(sg: sq->sg, buf: xdpf->data, buflen: xdpf->len);
1670	for (i = `0`; i < nr_frags; i++) {
1671	skb_frag_t *frag = &shinfo->frags[i];
1672
1673	sg_set_page(sg: &sq->sg[i + `1`], page: skb_frag_page(frag),
1674	len: skb_frag_size(frag), offset: skb_frag_off(frag));
1675	}
1676
1677	err = virtnet_add_outbuf(sq, num: nr_frags + `1`, data: xdpf, type: VIRTNET_XMIT_TYPE_XDP);
1678	if (unlikely(err))
1679	return -ENOSPC; / Caller handle free/refcnt /
1680
1681	return `0`;
1682	}
1683
1684	/ when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on*
1685	* the current cpu, so it does not need to be locked.
1686	*
1687	* Here we use marco instead of inline functions because we have to deal with
1688	* three issues at the same time: 1. the choice of sq. 2. judge and execute the
1689	* lock/unlock of txq 3. make sparse happy. It is difficult for two inline
1690	* functions to perfectly solve these three problems at the same time.
1691	*/
1692	#define virtnet_xdp_get_sq(vi) ({ \
1693	int cpu = smp_processor_id(); \
1694	struct netdev_queue *txq; \
1695	typeof(vi) v = (vi); \
1696	unsigned int qp; \
1697	\
1698	if (v->curr_queue_pairs > nr_cpu_ids) { \
1699	qp = v->curr_queue_pairs - v->xdp_queue_pairs; \
1700	qp += cpu; \
1701	txq = netdev_get_tx_queue(v->dev, qp); \
1702	__netif_tx_acquire(txq); \
1703	} else { \
1704	qp = cpu % v->curr_queue_pairs; \
1705	txq = netdev_get_tx_queue(v->dev, qp); \
1706	__netif_tx_lock(txq, cpu); \
1707	} \
1708	v->sq + qp; \
1709	})
1710
1711	#define virtnet_xdp_put_sq(vi, q) { \
1712	struct netdev_queue *txq; \
1713	typeof(vi) v = (vi); \
1714	\
1715	txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \
1716	if (v->curr_queue_pairs > nr_cpu_ids) \
1717	__netif_tx_release(txq); \
1718	else \
1719	__netif_tx_unlock(txq); \
1720	}
1721
1722	static int virtnet_xdp_xmit(struct net_device *dev,
1723	int n, struct xdp_frame **frames, u32 flags)
1724	{
1725	struct virtnet_info *vi = netdev_priv(dev);
1726	struct virtnet_sq_free_stats stats = {`0`};
1727	struct receive_queue *rq = vi->rq;
1728	struct bpf_prog *xdp_prog;
1729	struct send_queue *sq;
1730	int nxmit = `0`;
1731	int kicks = `0`;
1732	int ret;
1733	int i;
1734
1735	/ Only allow ndo_xdp_xmit if XDP is loaded on dev, as this*
1736	* indicate XDP resources have been successfully allocated.
1737	*/
1738	xdp_prog = rcu_access_pointer(rq->xdp_prog);
1739	if (!xdp_prog)
1740	return -ENXIO;
1741
1742	sq = virtnet_xdp_get_sq(vi);
1743
1744	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
1745	ret = -EINVAL;
1746	goto out;
1747	}
1748
1749	/ Free up any pending old buffers before queueing new ones. /
1750	virtnet_free_old_xmit(sq, txq: netdev_get_tx_queue(dev, index: sq - vi->sq),
1751	in_napi: false, stats: &stats);
1752
1753	for (i = `0`; i < n; i++) {
1754	struct xdp_frame *xdpf = frames[i];
1755
1756	if (__virtnet_xdp_xmit_one(vi, sq, xdpf))
1757	break;
1758	nxmit++;
1759	}
1760	ret = nxmit;
1761
1762	if (!is_xdp_raw_buffer_queue(vi, q: sq - vi->sq))
1763	check_sq_full_and_disable(vi, dev, sq);
1764
1765	if (flags & XDP_XMIT_FLUSH) {
1766	if (virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq))
1767	kicks = `1`;
1768	}
1769	out:
1770	u64_stats_update_begin(syncp: &sq->stats.syncp);
1771	u64_stats_add(p: &sq->stats.bytes, val: stats.bytes);
1772	u64_stats_add(p: &sq->stats.packets, val: stats.packets);
1773	u64_stats_add(p: &sq->stats.xdp_tx, val: n);
1774	u64_stats_add(p: &sq->stats.xdp_tx_drops, val: n - nxmit);
1775	u64_stats_add(p: &sq->stats.kicks, val: kicks);
1776	u64_stats_update_end(syncp: &sq->stats.syncp);
1777
1778	virtnet_xdp_put_sq(vi, sq);
1779	return ret;
1780	}
1781
1782	static void put_xdp_frags(struct xdp_buff *xdp)
1783	{
1784	struct skb_shared_info *shinfo;
1785	struct page *xdp_page;
1786	int i;
1787
1788	if (xdp_buff_has_frags(xdp)) {
1789	shinfo = xdp_get_shared_info_from_buff(xdp);
1790	for (i = `0`; i < shinfo->nr_frags; i++) {
1791	xdp_page = skb_frag_page(frag: &shinfo->frags[i]);
1792	put_page(page: xdp_page);
1793	}
1794	}
1795	}
1796
1797	static int virtnet_xdp_handler(struct bpf_prog xdp_prog, struct* xdp_buff *xdp,
1798	struct net_device *dev,
1799	unsigned int *xdp_xmit,
1800	struct virtnet_rq_stats *stats)
1801	{
1802	struct xdp_frame *xdpf;
1803	int err;
1804	u32 act;
1805
1806	act = bpf_prog_run_xdp(prog: xdp_prog, xdp);
1807	u64_stats_inc(p: &stats->xdp_packets);
1808
1809	switch (act) {
1810	case XDP_PASS:
1811	return act;
1812
1813	case XDP_TX:
1814	u64_stats_inc(p: &stats->xdp_tx);
1815	xdpf = xdp_convert_buff_to_frame(xdp);
1816	if (unlikely(!xdpf)) {
1817	netdev_dbg(dev, "convert buff to frame failed for xdp\n");
1818	return XDP_DROP;
1819	}
1820
1821	err = virtnet_xdp_xmit(dev, n: `1`, frames: &xdpf, flags: `0`);
1822	if (unlikely(!err)) {
1823	xdp_return_frame_rx_napi(xdpf);
1824	} else if (unlikely(err < `0`)) {
1825	trace_xdp_exception(dev, xdp: xdp_prog, act);
1826	return XDP_DROP;
1827	}
1828	*xdp_xmit \|= VIRTIO_XDP_TX;
1829	return act;
1830
1831	case XDP_REDIRECT:
1832	u64_stats_inc(p: &stats->xdp_redirects);
1833	err = xdp_do_redirect(dev, xdp, prog: xdp_prog);
1834	if (err)
1835	return XDP_DROP;
1836
1837	*xdp_xmit \|= VIRTIO_XDP_REDIR;
1838	return act;
1839
1840	default:
1841	bpf_warn_invalid_xdp_action(dev, prog: xdp_prog, act);
1842	fallthrough;
1843	case XDP_ABORTED:
1844	trace_xdp_exception(dev, xdp: xdp_prog, act);
1845	fallthrough;
1846	case XDP_DROP:
1847	return XDP_DROP;
1848	}
1849	}
1850
1851	static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
1852	{
1853	return vi->xdp_enabled ? XDP_PACKET_HEADROOM : `0`;
1854	}
1855
1856	/ We copy the packet for XDP in the following cases:*
1857	*
1858	* 1) Packet is scattered across multiple rx buffers.
1859	* 2) Headroom space is insufficient.
1860	*
1861	* This is inefficient but it's a temporary condition that
1862	* we hit right after XDP is enabled and until queue is refilled
1863	* with large buffers with sufficient headroom - so it should affect
1864	* at most queue size packets.
1865	* Afterwards, the conditions to enable
1866	* XDP should preclude the underlying device from sending packets
1867	* across multiple buffers (num_buf > 1), and we make sure buffers
1868	* have enough headroom.
1869	*/
1870	static struct page xdp_linearize_page(struct* net_device *dev,
1871	struct receive_queue *rq,
1872	int *num_buf,
1873	struct page *p,
1874	int offset,
1875	int page_off,
1876	unsigned int *len)
1877	{
1878	int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1879	struct page *page;
1880
1881	if (page_off + *len + tailroom > PAGE_SIZE)
1882	return NULL;
1883
1884	page = alloc_page(GFP_ATOMIC);
1885	if (!page)
1886	return NULL;
1887
1888	memcpy(page_address(page) + page_off, page_address(p) + offset, len: *len);
1889	page_off += *len;
1890
1891	/ Only mergeable mode can go inside this while loop. In small mode,*
1892	* *num_buf == 1, so it cannot go inside.
1893	*/
1894	while (--*num_buf) {
1895	unsigned int buflen;
1896	void *buf;
1897	void *ctx;
1898	int off;
1899
1900	buf = virtnet_rq_get_buf(rq, len: &buflen, ctx: &ctx);
1901	if (unlikely(!buf))
1902	goto err_buf;
1903
1904	p = virt_to_head_page(x: buf);
1905	off = buf - page_address(p);
1906
1907	if (check_mergeable_len(dev, mrg_ctx: ctx, len: buflen)) {
1908	put_page(page: p);
1909	goto err_buf;
1910	}
1911
1912	/ guard against a misconfigured or uncooperative backend that*
1913	* is sending packet larger than the MTU.
1914	*/
1915	if ((page_off + buflen + tailroom) > PAGE_SIZE) {
1916	put_page(page: p);
1917	goto err_buf;
1918	}
1919
1920	memcpy(page_address(page) + page_off,
1921	page_address(p) + off, len: buflen);
1922	page_off += buflen;
1923	put_page(page: p);
1924	}
1925
1926	/ Headroom does not contribute to packet length /
1927	*len = page_off - XDP_PACKET_HEADROOM;
1928	return page;
1929	err_buf:
1930	__free_pages(page, order: `0`);
1931	return NULL;
1932	}
1933
1934	static struct sk_buff receive_small_build_skb(struct* virtnet_info *vi,
1935	unsigned int xdp_headroom,
1936	void *buf,
1937	unsigned int len)
1938	{
1939	unsigned int header_offset;
1940	unsigned int headroom;
1941	unsigned int buflen;
1942	struct sk_buff *skb;
1943
1944	header_offset = VIRTNET_RX_PAD + xdp_headroom;
1945	headroom = vi->hdr_len + header_offset;
1946	buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
1947	SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1948
1949	skb = virtnet_build_skb(buf, buflen, headroom, len);
1950	if (unlikely(!skb))
1951	return NULL;
1952
1953	buf += header_offset;
1954	memcpy(to: skb_vnet_common_hdr(skb), from: buf, len: vi->hdr_len);
1955
1956	return skb;
1957	}
1958
1959	static struct sk_buff receive_small_xdp(struct* net_device *dev,
1960	struct virtnet_info *vi,
1961	struct receive_queue *rq,
1962	struct bpf_prog *xdp_prog,
1963	void *buf,
1964	unsigned int xdp_headroom,
1965	unsigned int len,
1966	unsigned int *xdp_xmit,
1967	struct virtnet_rq_stats *stats)
1968	{
1969	unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
1970	unsigned int headroom = vi->hdr_len + header_offset;
1971	struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
1972	struct page *page = virt_to_head_page(x: buf);
1973	struct page *xdp_page;
1974	unsigned int buflen;
1975	struct xdp_buff xdp;
1976	struct sk_buff *skb;
1977	unsigned int metasize = `0`;
1978	u32 act;
1979
1980	if (unlikely(hdr->hdr.gso_type))
1981	goto err_xdp;
1982
1983	/ Partially checksummed packets must be dropped. /
1984	if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
1985	goto err_xdp;
1986
1987	buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
1988	SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1989
1990	if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
1991	int offset = buf - page_address(page) + header_offset;
1992	unsigned int tlen = len + vi->hdr_len;
1993	int num_buf = `1`;
1994
1995	xdp_headroom = virtnet_get_headroom(vi);
1996	header_offset = VIRTNET_RX_PAD + xdp_headroom;
1997	headroom = vi->hdr_len + header_offset;
1998	buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
1999	SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
2000	xdp_page = xdp_linearize_page(dev, rq, num_buf: &num_buf, p: page,
2001	offset, page_off: header_offset,
2002	len: &tlen);
2003	if (!xdp_page)
2004	goto err_xdp;
2005
2006	buf = page_address(xdp_page);
2007	put_page(page);
2008	page = xdp_page;
2009	}
2010
2011	xdp_init_buff(xdp: &xdp, frame_sz: buflen, rxq: &rq->xdp_rxq);
2012	xdp_prepare_buff(xdp: &xdp, hard_start: buf + VIRTNET_RX_PAD + vi->hdr_len,
2013	headroom: xdp_headroom, data_len: len, meta_valid: true);
2014
2015	act = virtnet_xdp_handler(xdp_prog, xdp: &xdp, dev, xdp_xmit, stats);
2016
2017	switch (act) {
2018	case XDP_PASS:
2019	/ Recalculate length in case bpf program changed it /
2020	len = xdp.data_end - xdp.data;
2021	metasize = xdp.data - xdp.data_meta;
2022	break;
2023
2024	case XDP_TX:
2025	case XDP_REDIRECT:
2026	goto xdp_xmit;
2027
2028	default:
2029	goto err_xdp;
2030	}
2031
2032	skb = virtnet_build_skb(buf, buflen, headroom: xdp.data - buf, len);
2033	if (unlikely(!skb))
2034	goto err;
2035
2036	if (metasize)
2037	skb_metadata_set(skb, meta_len: metasize);
2038
2039	return skb;
2040
2041	err_xdp:
2042	u64_stats_inc(p: &stats->xdp_drops);
2043	err:
2044	u64_stats_inc(p: &stats->drops);
2045	put_page(page);
2046	xdp_xmit:
2047	return NULL;
2048	}
2049
2050	static struct sk_buff receive_small(struct* net_device *dev,
2051	struct virtnet_info *vi,
2052	struct receive_queue *rq,
2053	void buf, void* *ctx,
2054	unsigned int len,
2055	unsigned int *xdp_xmit,
2056	struct virtnet_rq_stats *stats)
2057	{
2058	unsigned int xdp_headroom = (unsigned long)ctx;
2059	struct page *page = virt_to_head_page(x: buf);
2060	struct sk_buff *skb;
2061
2062	/ We passed the address of virtnet header to virtio-core,*
2063	* so truncate the padding.
2064	*/
2065	buf -= VIRTNET_RX_PAD + xdp_headroom;
2066
2067	len -= vi->hdr_len;
2068	u64_stats_add(p: &stats->bytes, val: len);
2069
2070	if (unlikely(len > GOOD_PACKET_LEN)) {
2071	pr_debug("%s: rx error: len %u exceeds max size %d\n",
2072	dev->name, len, GOOD_PACKET_LEN);
2073	DEV_STATS_INC(dev, rx_length_errors);
2074	goto err;
2075	}
2076
2077	if (unlikely(vi->xdp_enabled)) {
2078	struct bpf_prog *xdp_prog;
2079
2080	rcu_read_lock();
2081	xdp_prog = rcu_dereference(rq->xdp_prog);
2082	if (xdp_prog) {
2083	skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf,
2084	xdp_headroom, len, xdp_xmit,
2085	stats);
2086	rcu_read_unlock();
2087	return skb;
2088	}
2089	rcu_read_unlock();
2090	}
2091
2092	skb = receive_small_build_skb(vi, xdp_headroom, buf, len);
2093	if (likely(skb))
2094	return skb;
2095
2096	err:
2097	u64_stats_inc(p: &stats->drops);
2098	put_page(page);
2099	return NULL;
2100	}
2101
2102	static struct sk_buff receive_big(struct* net_device *dev,
2103	struct virtnet_info *vi,
2104	struct receive_queue *rq,
2105	void *buf,
2106	unsigned int len,
2107	struct virtnet_rq_stats *stats)
2108	{
2109	struct page *page = buf;
2110	struct sk_buff *skb =
2111	page_to_skb(vi, rq, page, offset: `0`, len, PAGE_SIZE, headroom: `0`);
2112
2113	u64_stats_add(p: &stats->bytes, val: len - vi->hdr_len);
2114	if (unlikely(!skb))
2115	goto err;
2116
2117	return skb;
2118
2119	err:
2120	u64_stats_inc(p: &stats->drops);
2121	give_pages(rq, page);
2122	return NULL;
2123	}
2124
2125	static void mergeable_buf_free(struct receive_queue rq, int* num_buf,
2126	struct net_device *dev,
2127	struct virtnet_rq_stats *stats)
2128	{
2129	struct page *page;
2130	void *buf;
2131	int len;
2132
2133	while (num_buf-- > `1`) {
2134	buf = virtnet_rq_get_buf(rq, len: &len, NULL);
2135	if (unlikely(!buf)) {
2136	pr_debug("%s: rx error: %d buffers missing\n",
2137	dev->name, num_buf);
2138	DEV_STATS_INC(dev, rx_length_errors);
2139	break;
2140	}
2141	u64_stats_add(p: &stats->bytes, val: len);
2142	page = virt_to_head_page(x: buf);
2143	put_page(page);
2144	}
2145	}
2146
2147	/ Why not use xdp_build_skb_from_frame() ?*
2148	* XDP core assumes that xdp frags are PAGE_SIZE in length, while in
2149	* virtio-net there are 2 points that do not match its requirements:
2150	* 1. The size of the prefilled buffer is not fixed before xdp is set.
2151	* 2. xdp_build_skb_from_frame() does more checks that we don't need,
2152	* like eth_type_trans() (which virtio-net does in receive_buf()).
2153	*/
2154	static struct sk_buff build_skb_from_xdp_buff(struct* net_device *dev,
2155	struct virtnet_info *vi,
2156	struct xdp_buff *xdp,
2157	unsigned int xdp_frags_truesz)
2158	{
2159	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
2160	unsigned int headroom, data_len;
2161	struct sk_buff *skb;
2162	int metasize;
2163	u8 nr_frags;
2164
2165	if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) {
2166	pr_debug("Error building skb as missing reserved tailroom for xdp");
2167	return NULL;
2168	}
2169
2170	if (unlikely(xdp_buff_has_frags(xdp)))
2171	nr_frags = sinfo->nr_frags;
2172
2173	skb = build_skb(data: xdp->data_hard_start, frag_size: xdp->frame_sz);
2174	if (unlikely(!skb))
2175	return NULL;
2176
2177	headroom = xdp->data - xdp->data_hard_start;
2178	data_len = xdp->data_end - xdp->data;
2179	skb_reserve(skb, len: headroom);
2180	__skb_put(skb, len: data_len);
2181
2182	metasize = xdp->data - xdp->data_meta;
2183	metasize = metasize > `0` ? metasize : `0`;
2184	if (metasize)
2185	skb_metadata_set(skb, meta_len: metasize);
2186
2187	if (unlikely(xdp_buff_has_frags(xdp)))
2188	xdp_update_skb_frags_info(skb, nr_frags, size: sinfo->xdp_frags_size,
2189	truesize: xdp_frags_truesz,
2190	xdp_flags: xdp_buff_get_skb_flags(xdp));
2191
2192	return skb;
2193	}
2194
2195	/ TODO: build xdp in big mode /
2196	static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
2197	struct virtnet_info *vi,
2198	struct receive_queue *rq,
2199	struct xdp_buff *xdp,
2200	void *buf,
2201	unsigned int len,
2202	unsigned int frame_sz,
2203	int *num_buf,
2204	unsigned int *xdp_frags_truesize,
2205	struct virtnet_rq_stats *stats)
2206	{
2207	struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
2208	struct skb_shared_info *shinfo;
2209	unsigned int xdp_frags_truesz = `0`;
2210	unsigned int truesize;
2211	struct page *page;
2212	skb_frag_t *frag;
2213	int offset;
2214	void *ctx;
2215
2216	xdp_init_buff(xdp, frame_sz, rxq: &rq->xdp_rxq);
2217	xdp_prepare_buff(xdp, hard_start: buf - XDP_PACKET_HEADROOM,
2218	XDP_PACKET_HEADROOM + vi->hdr_len, data_len: len - vi->hdr_len, meta_valid: true);
2219
2220	if (!*num_buf)
2221	return `0`;
2222
2223	if (*num_buf > `1`) {
2224	/ If we want to build multi-buffer xdp, we need*
2225	* to specify that the flags of xdp_buff have the
2226	* XDP_FLAGS_HAS_FRAG bit.
2227	*/
2228	if (!xdp_buff_has_frags(xdp))
2229	xdp_buff_set_frags_flag(xdp);
2230
2231	shinfo = xdp_get_shared_info_from_buff(xdp);
2232	shinfo->nr_frags = `0`;
2233	shinfo->xdp_frags_size = `0`;
2234	}
2235
2236	if (*num_buf > MAX_SKB_FRAGS + `1`)
2237	return -EINVAL;
2238
2239	while (--*num_buf > `0`) {
2240	buf = virtnet_rq_get_buf(rq, len: &len, ctx: &ctx);
2241	if (unlikely(!buf)) {
2242	pr_debug("%s: rx error: %d buffers out of %d missing\n",
2243	dev->name, *num_buf,
2244	virtio16_to_cpu(vi->vdev, hdr->num_buffers));
2245	DEV_STATS_INC(dev, rx_length_errors);
2246	goto err;
2247	}
2248
2249	u64_stats_add(p: &stats->bytes, val: len);
2250	page = virt_to_head_page(x: buf);
2251	offset = buf - page_address(page);
2252
2253	if (check_mergeable_len(dev, mrg_ctx: ctx, len)) {
2254	put_page(page);
2255	goto err;
2256	}
2257
2258	truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx);
2259	xdp_frags_truesz += truesize;
2260
2261	frag = &shinfo->frags[shinfo->nr_frags++];
2262	skb_frag_fill_page_desc(frag, page, off: offset, size: len);
2263	if (page_is_pfmemalloc(page))
2264	xdp_buff_set_frag_pfmemalloc(xdp);
2265
2266	shinfo->xdp_frags_size += len;
2267	}
2268
2269	*xdp_frags_truesize = xdp_frags_truesz;
2270	return `0`;
2271
2272	err:
2273	put_xdp_frags(xdp);
2274	return -EINVAL;
2275	}
2276
2277	static void mergeable_xdp_get_buf(struct* virtnet_info *vi,
2278	struct receive_queue *rq,
2279	struct bpf_prog *xdp_prog,
2280	void *ctx,
2281	unsigned int *frame_sz,
2282	int *num_buf,
2283	struct page **page,
2284	int offset,
2285	unsigned int *len,
2286	struct virtio_net_hdr_mrg_rxbuf *hdr)
2287	{
2288	unsigned int truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx);
2289	unsigned int headroom = mergeable_ctx_to_headroom(mrg_ctx: ctx);
2290	struct page *xdp_page;
2291	unsigned int xdp_room;
2292
2293	/ Transient failure which in theory could occur if*
2294	* in-flight packets from before XDP was enabled reach
2295	* the receive path after XDP is loaded.
2296	*/
2297	if (unlikely(hdr->hdr.gso_type))
2298	return NULL;
2299
2300	/ Partially checksummed packets must be dropped. /
2301	if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
2302	return NULL;
2303
2304	/ Now XDP core assumes frag size is PAGE_SIZE, but buffers*
2305	* with headroom may add hole in truesize, which
2306	* make their length exceed PAGE_SIZE. So we disabled the
2307	* hole mechanism for xdp. See add_recvbuf_mergeable().
2308	*/
2309	*frame_sz = truesize;
2310
2311	if (likely(headroom >= virtnet_get_headroom(vi) &&
2312	(*num_buf == `1` \|\| xdp_prog->aux->xdp_has_frags))) {
2313	return page_address(*page) + offset;
2314	}
2315
2316	/ This happens when headroom is not enough because*
2317	* of the buffer was prefilled before XDP is set.
2318	* This should only happen for the first several packets.
2319	* In fact, vq reset can be used here to help us clean up
2320	* the prefilled buffers, but many existing devices do not
2321	* support it, and we don't want to bother users who are
2322	* using xdp normally.
2323	*/
2324	if (!xdp_prog->aux->xdp_has_frags) {
2325	/ linearize data for XDP /
2326	xdp_page = xdp_linearize_page(dev: vi->dev, rq, num_buf,
2327	p: *page, offset,
2328	XDP_PACKET_HEADROOM,
2329	len);
2330	if (!xdp_page)
2331	return NULL;
2332	} else {
2333	xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM +
2334	sizeof(struct skb_shared_info));
2335	if (*len + xdp_room > PAGE_SIZE)
2336	return NULL;
2337
2338	xdp_page = alloc_page(GFP_ATOMIC);
2339	if (!xdp_page)
2340	return NULL;
2341
2342	memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM,
2343	page_address(page) + offset, len: len);
2344	}
2345
2346	*frame_sz = PAGE_SIZE;
2347
2348	put_page(page: *page);
2349
2350	*page = xdp_page;
2351
2352	return page_address(*page) + XDP_PACKET_HEADROOM;
2353	}
2354
2355	static struct sk_buff receive_mergeable_xdp(struct* net_device *dev,
2356	struct virtnet_info *vi,
2357	struct receive_queue *rq,
2358	struct bpf_prog *xdp_prog,
2359	void *buf,
2360	void *ctx,
2361	unsigned int len,
2362	unsigned int *xdp_xmit,
2363	struct virtnet_rq_stats *stats)
2364	{
2365	struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
2366	int num_buf = virtio16_to_cpu(vdev: vi->vdev, val: hdr->num_buffers);
2367	struct page *page = virt_to_head_page(x: buf);
2368	int offset = buf - page_address(page);
2369	unsigned int xdp_frags_truesz = `0`;
2370	struct sk_buff *head_skb;
2371	unsigned int frame_sz;
2372	struct xdp_buff xdp;
2373	void *data;
2374	u32 act;
2375	int err;
2376
2377	data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, frame_sz: &frame_sz, num_buf: &num_buf, page: &page,
2378	offset, len: &len, hdr);
2379	if (unlikely(!data))
2380	goto err_xdp;
2381
2382	err = virtnet_build_xdp_buff_mrg(dev, vi, rq, xdp: &xdp, buf: data, len, frame_sz,
2383	num_buf: &num_buf, xdp_frags_truesize: &xdp_frags_truesz, stats);
2384	if (unlikely(err))
2385	goto err_xdp;
2386
2387	act = virtnet_xdp_handler(xdp_prog, xdp: &xdp, dev, xdp_xmit, stats);
2388
2389	switch (act) {
2390	case XDP_PASS:
2391	head_skb = build_skb_from_xdp_buff(dev, vi, xdp: &xdp, xdp_frags_truesz);
2392	if (unlikely(!head_skb))
2393	break;
2394	return head_skb;
2395
2396	case XDP_TX:
2397	case XDP_REDIRECT:
2398	return NULL;
2399
2400	default:
2401	break;
2402	}
2403
2404	put_xdp_frags(xdp: &xdp);
2405
2406	err_xdp:
2407	put_page(page);
2408	mergeable_buf_free(rq, num_buf, dev, stats);
2409
2410	u64_stats_inc(p: &stats->xdp_drops);
2411	u64_stats_inc(p: &stats->drops);
2412	return NULL;
2413	}
2414
2415	static struct sk_buff virtnet_skb_append_frag(struct* sk_buff *head_skb,
2416	struct sk_buff *curr_skb,
2417	struct page page, void* *buf,
2418	int len, int truesize)
2419	{
2420	int num_skb_frags;
2421	int offset;
2422
2423	num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
2424	if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
2425	struct sk_buff *nskb = alloc_skb(size: `0`, GFP_ATOMIC);
2426
2427	if (unlikely(!nskb))
2428	return NULL;
2429
2430	if (curr_skb == head_skb)
2431	skb_shinfo(curr_skb)->frag_list = nskb;
2432	else
2433	curr_skb->next = nskb;
2434	curr_skb = nskb;
2435	head_skb->truesize += nskb->truesize;
2436	num_skb_frags = `0`;
2437	}
2438
2439	if (curr_skb != head_skb) {
2440	head_skb->data_len += len;
2441	head_skb->len += len;
2442	head_skb->truesize += truesize;
2443	}
2444
2445	offset = buf - page_address(page);
2446	if (skb_can_coalesce(skb: curr_skb, i: num_skb_frags, page, off: offset)) {
2447	put_page(page);
2448	skb_coalesce_rx_frag(skb: curr_skb, i: num_skb_frags - `1`,
2449	size: len, truesize);
2450	} else {
2451	skb_add_rx_frag(skb: curr_skb, i: num_skb_frags, page,
2452	off: offset, size: len, truesize);
2453	}
2454
2455	return curr_skb;
2456	}
2457
2458	static struct sk_buff receive_mergeable(struct* net_device *dev,
2459	struct virtnet_info *vi,
2460	struct receive_queue *rq,
2461	void *buf,
2462	void *ctx,
2463	unsigned int len,
2464	unsigned int *xdp_xmit,
2465	struct virtnet_rq_stats *stats)
2466	{
2467	struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
2468	int num_buf = virtio16_to_cpu(vdev: vi->vdev, val: hdr->num_buffers);
2469	struct page *page = virt_to_head_page(x: buf);
2470	int offset = buf - page_address(page);
2471	struct sk_buff head_skb, curr_skb;
2472	unsigned int truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx);
2473	unsigned int headroom = mergeable_ctx_to_headroom(mrg_ctx: ctx);
2474
2475	head_skb = NULL;
2476	u64_stats_add(p: &stats->bytes, val: len - vi->hdr_len);
2477
2478	if (check_mergeable_len(dev, mrg_ctx: ctx, len))
2479	goto err_skb;
2480
2481	if (unlikely(vi->xdp_enabled)) {
2482	struct bpf_prog *xdp_prog;
2483
2484	rcu_read_lock();
2485	xdp_prog = rcu_dereference(rq->xdp_prog);
2486	if (xdp_prog) {
2487	head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx,
2488	len, xdp_xmit, stats);
2489	rcu_read_unlock();
2490	return head_skb;
2491	}
2492	rcu_read_unlock();
2493	}
2494
2495	head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom);
2496	curr_skb = head_skb;
2497
2498	if (unlikely(!curr_skb))
2499	goto err_skb;
2500	while (--num_buf) {
2501	buf = virtnet_rq_get_buf(rq, len: &len, ctx: &ctx);
2502	if (unlikely(!buf)) {
2503	pr_debug("%s: rx error: %d buffers out of %d missing\n",
2504	dev->name, num_buf,
2505	virtio16_to_cpu(vi->vdev,
2506	hdr->num_buffers));
2507	DEV_STATS_INC(dev, rx_length_errors);
2508	goto err_buf;
2509	}
2510
2511	u64_stats_add(p: &stats->bytes, val: len);
2512	page = virt_to_head_page(x: buf);
2513
2514	if (check_mergeable_len(dev, mrg_ctx: ctx, len))
2515	goto err_skb;
2516
2517	truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx);
2518	curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page,
2519	buf, len, truesize);
2520	if (!curr_skb)
2521	goto err_skb;
2522	}
2523
2524	ewma_pkt_len_add(e: &rq->mrg_avg_pkt_len, val: head_skb->len);
2525	return head_skb;
2526
2527	err_skb:
2528	put_page(page);
2529	mergeable_buf_free(rq, num_buf, dev, stats);
2530
2531	err_buf:
2532	u64_stats_inc(p: &stats->drops);
2533	dev_kfree_skb(head_skb);
2534	return NULL;
2535	}
2536
2537	static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash,
2538	struct sk_buff *skb)
2539	{
2540	enum pkt_hash_types rss_hash_type;
2541
2542	if (!hdr_hash \|\| !skb)
2543	return;
2544
2545	switch (__le16_to_cpu(hdr_hash->hash_report)) {
2546	case VIRTIO_NET_HASH_REPORT_TCPv4:
2547	case VIRTIO_NET_HASH_REPORT_UDPv4:
2548	case VIRTIO_NET_HASH_REPORT_TCPv6:
2549	case VIRTIO_NET_HASH_REPORT_UDPv6:
2550	case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
2551	case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
2552	rss_hash_type = PKT_HASH_TYPE_L4;
2553	break;
2554	case VIRTIO_NET_HASH_REPORT_IPv4:
2555	case VIRTIO_NET_HASH_REPORT_IPv6:
2556	case VIRTIO_NET_HASH_REPORT_IPv6_EX:
2557	rss_hash_type = PKT_HASH_TYPE_L3;
2558	break;
2559	case VIRTIO_NET_HASH_REPORT_NONE:
2560	default:
2561	rss_hash_type = PKT_HASH_TYPE_NONE;
2562	}
2563	skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), type: rss_hash_type);
2564	}
2565
2566	static void virtnet_receive_done(struct virtnet_info vi, struct* receive_queue *rq,
2567	struct sk_buff *skb, u8 flags)
2568	{
2569	struct virtio_net_common_hdr *hdr;
2570	struct net_device *dev = vi->dev;
2571
2572	hdr = skb_vnet_common_hdr(skb);
2573	if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report)
2574	virtio_skb_set_hash(hdr_hash: &hdr->hash_v1_hdr, skb);
2575
2576	hdr->hdr.flags = flags;
2577	if (virtio_net_handle_csum_offload(skb, hdr: &hdr->hdr, tnl_csum_negotiated: vi->rx_tnl_csum)) {
2578	net_warn_ratelimited("%s: bad csum: flags: %x, gso_type: %x rx_tnl_csum %d\n",
2579	dev->name, hdr->hdr.flags,
2580	hdr->hdr.gso_type, vi->rx_tnl_csum);
2581	goto frame_err;
2582	}
2583
2584	if (virtio_net_hdr_tnl_to_skb(skb, vhdr: &hdr->tnl_hdr, tnl_hdr_negotiated: vi->rx_tnl,
2585	tnl_csum_negotiated: vi->rx_tnl_csum,
2586	little_endian: virtio_is_little_endian(vdev: vi->vdev))) {
2587	net_warn_ratelimited("%s: bad gso: type: %x, size: %u, flags %x tunnel %d tnl csum %d\n",
2588	dev->name, hdr->hdr.gso_type,
2589	hdr->hdr.gso_size, hdr->hdr.flags,
2590	vi->rx_tnl, vi->rx_tnl_csum);
2591	goto frame_err;
2592	}
2593
2594	skb_record_rx_queue(skb, rx_queue: vq2rxq(vq: rq->vq));
2595	skb->protocol = eth_type_trans(skb, dev);
2596	pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
2597	ntohs(skb->protocol), skb->len, skb->pkt_type);
2598
2599	napi_gro_receive(napi: &rq->napi, skb);
2600	return;
2601
2602	frame_err:
2603	DEV_STATS_INC(dev, rx_frame_errors);
2604	dev_kfree_skb(skb);
2605	}
2606
2607	static void receive_buf(struct virtnet_info vi, struct* receive_queue *rq,
2608	void buf, unsigned* int len, void **ctx,
2609	unsigned int *xdp_xmit,
2610	struct virtnet_rq_stats *stats)
2611	{
2612	struct net_device *dev = vi->dev;
2613	struct sk_buff *skb;
2614	u8 flags;
2615
2616	if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
2617	pr_debug("%s: short packet %i\n", dev->name, len);
2618	DEV_STATS_INC(dev, rx_length_errors);
2619	virtnet_rq_free_buf(vi, rq, buf);
2620	return;
2621	}
2622
2623	/ 1. Save the flags early, as the XDP program might overwrite them.*
2624	* These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID
2625	* stay valid after XDP processing.
2626	* 2. XDP doesn't work with partially checksummed packets (refer to
2627	* virtnet_xdp_set()), so packets marked as
2628	* VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing.
2629	*/
2630	flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags;
2631
2632	if (vi->mergeable_rx_bufs)
2633	skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit,
2634	stats);
2635	else if (vi->big_packets)
2636	skb = receive_big(dev, vi, rq, buf, len, stats);
2637	else
2638	skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats);
2639
2640	if (unlikely(!skb))
2641	return;
2642
2643	virtnet_receive_done(vi, rq, skb, flags);
2644	}
2645
2646	/ Unlike mergeable buffers, all buffers are allocated to the*
2647	* same size, except for the headroom. For this reason we do
2648	* not need to use mergeable_len_to_ctx here - it is enough
2649	* to store the headroom as the context ignoring the truesize.
2650	*/
2651	static int add_recvbuf_small(struct virtnet_info vi, struct* receive_queue *rq,
2652	gfp_t gfp)
2653	{
2654	char *buf;
2655	unsigned int xdp_headroom = virtnet_get_headroom(vi);
2656	void ctx = (void* )(unsigned* long)xdp_headroom;
2657	int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
2658	int err;
2659
2660	len = SKB_DATA_ALIGN(len) +
2661	SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
2662
2663	if (unlikely(!skb_page_frag_refill(len, &rq->alloc_frag, gfp)))
2664	return -ENOMEM;
2665
2666	buf = virtnet_rq_alloc(rq, size: len, gfp);
2667	if (unlikely(!buf))
2668	return -ENOMEM;
2669
2670	buf += VIRTNET_RX_PAD + xdp_headroom;
2671
2672	virtnet_rq_init_one_sg(rq, buf, len: vi->hdr_len + GOOD_PACKET_LEN);
2673
2674	err = virtqueue_add_inbuf_premapped(vq: rq->vq, sg: rq->sg, num: `1`, data: buf, ctx, gfp);
2675	if (err < `0`) {
2676	virtnet_rq_unmap(rq, buf, len: `0`);
2677	put_page(page: virt_to_head_page(x: buf));
2678	}
2679
2680	return err;
2681	}
2682
2683	static int add_recvbuf_big(struct virtnet_info vi, struct* receive_queue *rq,
2684	gfp_t gfp)
2685	{
2686	struct page first, list = NULL;
2687	char *p;
2688	int i, err, offset;
2689
2690	sg_init_table(rq->sg, vi->big_packets_num_skbfrags + `2`);
2691
2692	/ page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail /
2693	for (i = vi->big_packets_num_skbfrags + `1`; i > `1`; --i) {
2694	first = get_a_page(rq, gfp_mask: gfp);
2695	if (!first) {
2696	if (list)
2697	give_pages(rq, page: list);
2698	return -ENOMEM;
2699	}
2700	sg_set_buf(sg: &rq->sg[i], page_address(first), PAGE_SIZE);
2701
2702	/ chain new page in list head to match sg /
2703	first->private = (unsigned long)list;
2704	list = first;
2705	}
2706
2707	first = get_a_page(rq, gfp_mask: gfp);
2708	if (!first) {
2709	give_pages(rq, page: list);
2710	return -ENOMEM;
2711	}
2712	p = page_address(first);
2713
2714	/ rq->sg[0], rq->sg[1] share the same page /
2715	/ a separated rq->sg[0] for header - required in case !any_header_sg /
2716	sg_set_buf(sg: &rq->sg[`0`], buf: p, buflen: vi->hdr_len);
2717
2718	/ rq->sg[1] for data packet, from offset /
2719	offset = sizeof(struct padded_vnet_hdr);
2720	sg_set_buf(sg: &rq->sg[`1`], buf: p + offset, PAGE_SIZE - offset);
2721
2722	/ chain first in list head /
2723	first->private = (unsigned long)list;
2724	err = virtqueue_add_inbuf(vq: rq->vq, sg: rq->sg, num: vi->big_packets_num_skbfrags + `2`,
2725	data: first, gfp);
2726	if (err < `0`)
2727	give_pages(rq, page: first);
2728
2729	return err;
2730	}
2731
2732	static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
2733	struct ewma_pkt_len *avg_pkt_len,
2734	unsigned int room)
2735	{
2736	struct virtnet_info *vi = rq->vq->vdev->priv;
2737	const size_t hdr_len = vi->hdr_len;
2738	unsigned int len;
2739
2740	if (room)
2741	return PAGE_SIZE - room;
2742
2743	len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
2744	rq->min_buf_len, PAGE_SIZE - hdr_len);
2745
2746	return ALIGN(len, L1_CACHE_BYTES);
2747	}
2748
2749	static int add_recvbuf_mergeable(struct virtnet_info *vi,
2750	struct receive_queue *rq, gfp_t gfp)
2751	{
2752	struct page_frag *alloc_frag = &rq->alloc_frag;
2753	unsigned int headroom = virtnet_get_headroom(vi);
2754	unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : `0`;
2755	unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
2756	unsigned int len, hole;
2757	void *ctx;
2758	char *buf;
2759	int err;
2760
2761	/ Extra tailroom is needed to satisfy XDP's assumption. This*
2762	* means rx frags coalescing won't work, but consider we've
2763	* disabled GSO for XDP, it won't be a big issue.
2764	*/
2765	len = get_mergeable_buf_len(rq, avg_pkt_len: &rq->mrg_avg_pkt_len, room);
2766
2767	if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
2768	return -ENOMEM;
2769
2770	if (!alloc_frag->offset && len + room + sizeof(struct virtnet_rq_dma) > alloc_frag->size)
2771	len -= sizeof(struct virtnet_rq_dma);
2772
2773	buf = virtnet_rq_alloc(rq, size: len + room, gfp);
2774	if (unlikely(!buf))
2775	return -ENOMEM;
2776
2777	buf += headroom; / advance address leaving hole at front of pkt /
2778	hole = alloc_frag->size - alloc_frag->offset;
2779	if (hole < len + room) {
2780	/ To avoid internal fragmentation, if there is very likely not*
2781	* enough space for another buffer, add the remaining space to
2782	* the current buffer.
2783	* XDP core assumes that frame_size of xdp_buff and the length
2784	* of the frag are PAGE_SIZE, so we disable the hole mechanism.
2785	*/
2786	if (!headroom)
2787	len += hole;
2788	alloc_frag->offset += hole;
2789	}
2790
2791	virtnet_rq_init_one_sg(rq, buf, len);
2792
2793	ctx = mergeable_len_to_ctx(truesize: len + room, headroom);
2794	err = virtqueue_add_inbuf_premapped(vq: rq->vq, sg: rq->sg, num: `1`, data: buf, ctx, gfp);
2795	if (err < `0`) {
2796	virtnet_rq_unmap(rq, buf, len: `0`);
2797	put_page(page: virt_to_head_page(x: buf));
2798	}
2799
2800	return err;
2801	}
2802
2803	/*
2804	* Returns false if we couldn't fill entirely (OOM).
2805	*
2806	* Normally run in the receive path, but can also be run from ndo_open
2807	* before we're receiving packets, or from refill_work which is
2808	* careful to disable receiving (using napi_disable).
2809	*/
2810	static bool try_fill_recv(struct virtnet_info vi, struct* receive_queue *rq,
2811	gfp_t gfp)
2812	{
2813	int err;
2814
2815	if (rq->xsk_pool) {
2816	err = virtnet_add_recvbuf_xsk(vi, rq, pool: rq->xsk_pool, gfp);
2817	goto kick;
2818	}
2819
2820	do {
2821	if (vi->mergeable_rx_bufs)
2822	err = add_recvbuf_mergeable(vi, rq, gfp);
2823	else if (vi->big_packets)
2824	err = add_recvbuf_big(vi, rq, gfp);
2825	else
2826	err = add_recvbuf_small(vi, rq, gfp);
2827
2828	if (err)
2829	break;
2830	} while (rq->vq->num_free);
2831
2832	kick:
2833	if (virtqueue_kick_prepare(vq: rq->vq) && virtqueue_notify(vq: rq->vq)) {
2834	unsigned long flags;
2835
2836	flags = u64_stats_update_begin_irqsave(syncp: &rq->stats.syncp);
2837	u64_stats_inc(p: &rq->stats.kicks);
2838	u64_stats_update_end_irqrestore(syncp: &rq->stats.syncp, flags);
2839	}
2840
2841	return err != -ENOMEM;
2842	}
2843
2844	static void skb_recv_done(struct virtqueue *rvq)
2845	{
2846	struct virtnet_info *vi = rvq->vdev->priv;
2847	struct receive_queue *rq = &vi->rq[vq2rxq(vq: rvq)];
2848
2849	rq->calls++;
2850	virtqueue_napi_schedule(napi: &rq->napi, vq: rvq);
2851	}
2852
2853	static void virtnet_napi_do_enable(struct virtqueue *vq,
2854	struct napi_struct *napi)
2855	{
2856	napi_enable(n: napi);
2857
2858	/ If all buffers were filled by other side before we napi_enabled, we*
2859	* won't get another interrupt, so process any outstanding packets now.
2860	* Call local_bh_enable after to trigger softIRQ processing.
2861	*/
2862	local_bh_disable();
2863	virtqueue_napi_schedule(napi, vq);
2864	local_bh_enable();
2865	}
2866
2867	static void virtnet_napi_enable(struct receive_queue *rq)
2868	{
2869	struct virtnet_info *vi = rq->vq->vdev->priv;
2870	int qidx = vq2rxq(vq: rq->vq);
2871
2872	virtnet_napi_do_enable(vq: rq->vq, napi: &rq->napi);
2873	netif_queue_set_napi(dev: vi->dev, queue_index: qidx, type: NETDEV_QUEUE_TYPE_RX, napi: &rq->napi);
2874	}
2875
2876	static void virtnet_napi_tx_enable(struct send_queue *sq)
2877	{
2878	struct virtnet_info *vi = sq->vq->vdev->priv;
2879	struct napi_struct *napi = &sq->napi;
2880	int qidx = vq2txq(vq: sq->vq);
2881
2882	if (!napi->weight)
2883	return;
2884
2885	/ Tx napi touches cachelines on the cpu handling tx interrupts. Only*
2886	* enable the feature if this is likely affine with the transmit path.
2887	*/
2888	if (!vi->affinity_hint_set) {
2889	napi->weight = `0`;
2890	return;
2891	}
2892
2893	virtnet_napi_do_enable(vq: sq->vq, napi);
2894	netif_queue_set_napi(dev: vi->dev, queue_index: qidx, type: NETDEV_QUEUE_TYPE_TX, napi);
2895	}
2896
2897	static void virtnet_napi_tx_disable(struct send_queue *sq)
2898	{
2899	struct virtnet_info *vi = sq->vq->vdev->priv;
2900	struct napi_struct *napi = &sq->napi;
2901	int qidx = vq2txq(vq: sq->vq);
2902
2903	if (napi->weight) {
2904	netif_queue_set_napi(dev: vi->dev, queue_index: qidx, type: NETDEV_QUEUE_TYPE_TX, NULL);
2905	napi_disable(n: napi);
2906	}
2907	}
2908
2909	static void virtnet_napi_disable(struct receive_queue *rq)
2910	{
2911	struct virtnet_info *vi = rq->vq->vdev->priv;
2912	struct napi_struct *napi = &rq->napi;
2913	int qidx = vq2rxq(vq: rq->vq);
2914
2915	netif_queue_set_napi(dev: vi->dev, queue_index: qidx, type: NETDEV_QUEUE_TYPE_RX, NULL);
2916	napi_disable(n: napi);
2917	}
2918
2919	static void refill_work(struct work_struct *work)
2920	{
2921	struct virtnet_info *vi =
2922	container_of(work, struct virtnet_info, refill.work);
2923	bool still_empty;
2924	int i;
2925
2926	for (i = `0`; i < vi->curr_queue_pairs; i++) {
2927	struct receive_queue *rq = &vi->rq[i];
2928
2929	/*
2930	* When queue API support is added in the future and the call
2931	* below becomes napi_disable_locked, this driver will need to
2932	* be refactored.
2933	*
2934	* One possible solution would be to:
2935	* - cancel refill_work with cancel_delayed_work (note:
2936	* non-sync)
2937	* - cancel refill_work with cancel_delayed_work_sync in
2938	* virtnet_remove after the netdev is unregistered
2939	* - wrap all of the work in a lock (perhaps the netdev
2940	* instance lock)
2941	* - check netif_running() and return early to avoid a race
2942	*/
2943	napi_disable(n: &rq->napi);
2944	still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
2945	virtnet_napi_do_enable(vq: rq->vq, napi: &rq->napi);
2946
2947	/ In theory, this can happen: if we don't get any buffers in*
2948	* we will never try to fill again.
2949	*/
2950	if (still_empty)
2951	schedule_delayed_work(dwork: &vi->refill, HZ/`2`);
2952	}
2953	}
2954
2955	static int virtnet_receive_xsk_bufs(struct virtnet_info *vi,
2956	struct receive_queue *rq,
2957	int budget,
2958	unsigned int *xdp_xmit,
2959	struct virtnet_rq_stats *stats)
2960	{
2961	unsigned int len;
2962	int packets = `0`;
2963	void *buf;
2964
2965	while (packets < budget) {
2966	buf = virtqueue_get_buf(vq: rq->vq, len: &len);
2967	if (!buf)
2968	break;
2969
2970	virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats);
2971	packets++;
2972	}
2973
2974	return packets;
2975	}
2976
2977	static int virtnet_receive_packets(struct virtnet_info *vi,
2978	struct receive_queue *rq,
2979	int budget,
2980	unsigned int *xdp_xmit,
2981	struct virtnet_rq_stats *stats)
2982	{
2983	unsigned int len;
2984	int packets = `0`;
2985	void *buf;
2986
2987	if (!vi->big_packets \|\| vi->mergeable_rx_bufs) {
2988	void *ctx;
2989	while (packets < budget &&
2990	(buf = virtnet_rq_get_buf(rq, len: &len, ctx: &ctx))) {
2991	receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats);
2992	packets++;
2993	}
2994	} else {
2995	while (packets < budget &&
2996	(buf = virtqueue_get_buf(vq: rq->vq, len: &len)) != NULL) {
2997	receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats);
2998	packets++;
2999	}
3000	}
3001
3002	return packets;
3003	}
3004
3005	static int virtnet_receive(struct receive_queue rq, int* budget,
3006	unsigned int *xdp_xmit)
3007	{
3008	struct virtnet_info *vi = rq->vq->vdev->priv;
3009	struct virtnet_rq_stats stats = {};
3010	int i, packets;
3011
3012	if (rq->xsk_pool)
3013	packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, stats: &stats);
3014	else
3015	packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, stats: &stats);
3016
3017	if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / `2`) {
3018	if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
3019	spin_lock(lock: &vi->refill_lock);
3020	if (vi->refill_enabled)
3021	schedule_delayed_work(dwork: &vi->refill, delay: `0`);
3022	spin_unlock(lock: &vi->refill_lock);
3023	}
3024	}
3025
3026	u64_stats_set(p: &stats.packets, val: packets);
3027	u64_stats_update_begin(syncp: &rq->stats.syncp);
3028	for (i = `0`; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) {
3029	size_t offset = virtnet_rq_stats_desc[i].offset;
3030	u64_stats_t item, src;
3031
3032	item = (u64_stats_t )((u8 )&rq->stats + offset);
3033	src = (u64_stats_t )((u8 )&stats + offset);
3034	u64_stats_add(p: item, val: u64_stats_read(p: src));
3035	}
3036
3037	u64_stats_add(p: &rq->stats.packets, val: u64_stats_read(p: &stats.packets));
3038	u64_stats_add(p: &rq->stats.bytes, val: u64_stats_read(p: &stats.bytes));
3039
3040	u64_stats_update_end(syncp: &rq->stats.syncp);
3041
3042	return packets;
3043	}
3044
3045	static void virtnet_poll_cleantx(struct receive_queue rq, int* budget)
3046	{
3047	struct virtnet_info *vi = rq->vq->vdev->priv;
3048	unsigned int index = vq2rxq(vq: rq->vq);
3049	struct send_queue *sq = &vi->sq[index];
3050	struct netdev_queue *txq = netdev_get_tx_queue(dev: vi->dev, index);
3051
3052	if (!sq->napi.weight \|\| is_xdp_raw_buffer_queue(vi, q: index))
3053	return;
3054
3055	if (__netif_tx_trylock(txq)) {
3056	if (sq->reset) {
3057	__netif_tx_unlock(txq);
3058	return;
3059	}
3060
3061	do {
3062	virtqueue_disable_cb(vq: sq->vq);
3063	free_old_xmit(sq, txq, in_napi: !!budget);
3064	} while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
3065
3066	if (sq->vq->num_free >= MAX_SKB_FRAGS + `2` &&
3067	netif_tx_queue_stopped(dev_queue: txq)) {
3068	u64_stats_update_begin(syncp: &sq->stats.syncp);
3069	u64_stats_inc(p: &sq->stats.wake);
3070	u64_stats_update_end(syncp: &sq->stats.syncp);
3071	netif_tx_wake_queue(dev_queue: txq);
3072	}
3073
3074	__netif_tx_unlock(txq);
3075	}
3076	}
3077
3078	static void virtnet_rx_dim_update(struct virtnet_info vi, struct* receive_queue *rq)
3079	{
3080	struct dim_sample cur_sample = {};
3081
3082	if (!rq->packets_in_napi)
3083	return;
3084
3085	/ Don't need protection when fetching stats, since fetcher and*
3086	* updater of the stats are in same context
3087	*/
3088	dim_update_sample(event_ctr: rq->calls,
3089	packets: u64_stats_read(p: &rq->stats.packets),
3090	bytes: u64_stats_read(p: &rq->stats.bytes),
3091	s: &cur_sample);
3092
3093	net_dim(dim: &rq->dim, end_sample: &cur_sample);
3094	rq->packets_in_napi = `0`;
3095	}
3096
3097	static int virtnet_poll(struct napi_struct napi, int* budget)
3098	{
3099	struct receive_queue *rq =
3100	container_of(napi, struct receive_queue, napi);
3101	struct virtnet_info *vi = rq->vq->vdev->priv;
3102	struct send_queue *sq;
3103	unsigned int received;
3104	unsigned int xdp_xmit = `0`;
3105	bool napi_complete;
3106
3107	virtnet_poll_cleantx(rq, budget);
3108
3109	received = virtnet_receive(rq, budget, xdp_xmit: &xdp_xmit);
3110	rq->packets_in_napi += received;
3111
3112	if (xdp_xmit & VIRTIO_XDP_REDIR)
3113	xdp_do_flush();
3114
3115	/ Out of packets? /
3116	if (received < budget) {
3117	napi_complete = virtqueue_napi_complete(napi, vq: rq->vq, processed: received);
3118	/ Intentionally not taking dim_lock here. This may result in a*
3119	* spurious net_dim call. But if that happens virtnet_rx_dim_work
3120	* will not act on the scheduled work.
3121	*/
3122	if (napi_complete && rq->dim_enabled)
3123	virtnet_rx_dim_update(vi, rq);
3124	}
3125
3126	if (xdp_xmit & VIRTIO_XDP_TX) {
3127	sq = virtnet_xdp_get_sq(vi);
3128	if (virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq)) {
3129	u64_stats_update_begin(syncp: &sq->stats.syncp);
3130	u64_stats_inc(p: &sq->stats.kicks);
3131	u64_stats_update_end(syncp: &sq->stats.syncp);
3132	}
3133	virtnet_xdp_put_sq(vi, sq);
3134	}
3135
3136	return received;
3137	}
3138
3139	static void virtnet_disable_queue_pair(struct virtnet_info vi, int* qp_index)
3140	{
3141	virtnet_napi_tx_disable(sq: &vi->sq[qp_index]);
3142	virtnet_napi_disable(rq: &vi->rq[qp_index]);
3143	xdp_rxq_info_unreg(xdp_rxq: &vi->rq[qp_index].xdp_rxq);
3144	}
3145
3146	static int virtnet_enable_queue_pair(struct virtnet_info vi, int* qp_index)
3147	{
3148	struct net_device *dev = vi->dev;
3149	int err;
3150
3151	err = xdp_rxq_info_reg(xdp_rxq: &vi->rq[qp_index].xdp_rxq, dev, queue_index: qp_index,
3152	napi_id: vi->rq[qp_index].napi.napi_id);
3153	if (err < `0`)
3154	return err;
3155
3156	err = xdp_rxq_info_reg_mem_model(xdp_rxq: &vi->rq[qp_index].xdp_rxq,
3157	type: MEM_TYPE_PAGE_SHARED, NULL);
3158	if (err < `0`)
3159	goto err_xdp_reg_mem_model;
3160
3161	virtnet_napi_enable(rq: &vi->rq[qp_index]);
3162	virtnet_napi_tx_enable(sq: &vi->sq[qp_index]);
3163
3164	return `0`;
3165
3166	err_xdp_reg_mem_model:
3167	xdp_rxq_info_unreg(xdp_rxq: &vi->rq[qp_index].xdp_rxq);
3168	return err;
3169	}
3170
3171	static void virtnet_cancel_dim(struct virtnet_info vi, struct* dim *dim)
3172	{
3173	if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
3174	return;
3175	net_dim_work_cancel(dim);
3176	}
3177
3178	static void virtnet_update_settings(struct virtnet_info *vi)
3179	{
3180	u32 speed;
3181	u8 duplex;
3182
3183	if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX))
3184	return;
3185
3186	virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed);
3187
3188	if (ethtool_validate_speed(speed))
3189	vi->speed = speed;
3190
3191	virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex);
3192
3193	if (ethtool_validate_duplex(duplex))
3194	vi->duplex = duplex;
3195	}
3196
3197	static int virtnet_open(struct net_device *dev)
3198	{
3199	struct virtnet_info *vi = netdev_priv(dev);
3200	int i, err;
3201
3202	enable_delayed_refill(vi);
3203
3204	for (i = `0`; i < vi->max_queue_pairs; i++) {
3205	if (i < vi->curr_queue_pairs)
3206	/ Make sure we have some buffers: if oom use wq. /
3207	if (!try_fill_recv(vi, rq: &vi->rq[i], GFP_KERNEL))
3208	schedule_delayed_work(dwork: &vi->refill, delay: `0`);
3209
3210	err = virtnet_enable_queue_pair(vi, qp_index: i);
3211	if (err < `0`)
3212	goto err_enable_qp;
3213	}
3214
3215	if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STATUS)) {
3216	if (vi->status & VIRTIO_NET_S_LINK_UP)
3217	netif_carrier_on(dev: vi->dev);
3218	virtio_config_driver_enable(dev: vi->vdev);
3219	} else {
3220	vi->status = VIRTIO_NET_S_LINK_UP;
3221	netif_carrier_on(dev);
3222	}
3223
3224	return `0`;
3225
3226	err_enable_qp:
3227	disable_delayed_refill(vi);
3228	cancel_delayed_work_sync(dwork: &vi->refill);
3229
3230	for (i--; i >= `0`; i--) {
3231	virtnet_disable_queue_pair(vi, qp_index: i);
3232	virtnet_cancel_dim(vi, dim: &vi->rq[i].dim);
3233	}
3234
3235	return err;
3236	}
3237
3238	static int virtnet_poll_tx(struct napi_struct napi, int* budget)
3239	{
3240	struct send_queue sq = container_of(napi, struct* send_queue, napi);
3241	struct virtnet_info *vi = sq->vq->vdev->priv;
3242	unsigned int index = vq2txq(vq: sq->vq);
3243	struct netdev_queue *txq;
3244	int opaque, xsk_done = `0`;
3245	bool done;
3246
3247	if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
3248	/ We don't need to enable cb for XDP /
3249	napi_complete_done(n: napi, work_done: `0`);
3250	return `0`;
3251	}
3252
3253	txq = netdev_get_tx_queue(dev: vi->dev, index);
3254	__netif_tx_lock(txq, raw_smp_processor_id());
3255	virtqueue_disable_cb(vq: sq->vq);
3256
3257	if (sq->xsk_pool)
3258	xsk_done = virtnet_xsk_xmit(sq, pool: sq->xsk_pool, budget);
3259	else
3260	free_old_xmit(sq, txq, in_napi: !!budget);
3261
3262	if (sq->vq->num_free >= MAX_SKB_FRAGS + `2` &&
3263	netif_tx_queue_stopped(dev_queue: txq)) {
3264	u64_stats_update_begin(syncp: &sq->stats.syncp);
3265	u64_stats_inc(p: &sq->stats.wake);
3266	u64_stats_update_end(syncp: &sq->stats.syncp);
3267	netif_tx_wake_queue(dev_queue: txq);
3268	}
3269
3270	if (xsk_done >= budget) {
3271	__netif_tx_unlock(txq);
3272	return budget;
3273	}
3274
3275	opaque = virtqueue_enable_cb_prepare(vq: sq->vq);
3276
3277	done = napi_complete_done(n: napi, work_done: `0`);
3278
3279	if (!done)
3280	virtqueue_disable_cb(vq: sq->vq);
3281
3282	__netif_tx_unlock(txq);
3283
3284	if (done) {
3285	if (unlikely(virtqueue_poll(sq->vq, opaque))) {
3286	if (napi_schedule_prep(n: napi)) {
3287	__netif_tx_lock(txq, raw_smp_processor_id());
3288	virtqueue_disable_cb(vq: sq->vq);
3289	__netif_tx_unlock(txq);
3290	__napi_schedule(n: napi);
3291	}
3292	}
3293	}
3294
3295	return `0`;
3296	}
3297
3298	static int xmit_skb(struct send_queue sq, struct* sk_buff *skb, bool orphan)
3299	{
3300	const unsigned char dest = ((struct* ethhdr *)skb->data)->h_dest;
3301	struct virtnet_info *vi = sq->vq->vdev->priv;
3302	struct virtio_net_hdr_v1_hash_tunnel *hdr;
3303	int num_sg;
3304	unsigned hdr_len = vi->hdr_len;
3305	bool can_push;
3306
3307	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
3308
3309	can_push = vi->any_header_sg &&
3310	!((unsigned long)skb->data & (__alignof__(*hdr) - `1`)) &&
3311	!skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
3312	/ Even if we can, don't push here yet as this would skew*
3313	* csum_start offset below. */
3314	if (can_push)
3315	hdr = (struct virtio_net_hdr_v1_hash_tunnel *)(skb->data -
3316	hdr_len);
3317	else
3318	hdr = &skb_vnet_common_hdr(skb)->tnl_hdr;
3319
3320	if (virtio_net_hdr_tnl_from_skb(skb, vhdr: hdr, tnl_hdr_negotiated: vi->tx_tnl,
3321	little_endian: virtio_is_little_endian(vdev: vi->vdev), vlan_hlen: `0`))
3322	return -EPROTO;
3323
3324	if (vi->mergeable_rx_bufs)
3325	hdr->hash_hdr.hdr.num_buffers = `0`;
3326
3327	sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? `1` : `2`));
3328	if (can_push) {
3329	__skb_push(skb, len: hdr_len);
3330	num_sg = skb_to_sgvec(skb, sg: sq->sg, offset: `0`, len: skb->len);
3331	if (unlikely(num_sg < `0`))
3332	return num_sg;
3333	/ Pull header back to avoid skew in tx bytes calculations. /
3334	__skb_pull(skb, len: hdr_len);
3335	} else {
3336	sg_set_buf(sg: sq->sg, buf: hdr, buflen: hdr_len);
3337	num_sg = skb_to_sgvec(skb, sg: sq->sg + `1`, offset: `0`, len: skb->len);
3338	if (unlikely(num_sg < `0`))
3339	return num_sg;
3340	num_sg++;
3341	}
3342
3343	return virtnet_add_outbuf(sq, num: num_sg, data: skb,
3344	type: orphan ? VIRTNET_XMIT_TYPE_SKB_ORPHAN : VIRTNET_XMIT_TYPE_SKB);
3345	}
3346
3347	static netdev_tx_t start_xmit(struct sk_buff skb, struct* net_device *dev)
3348	{
3349	struct virtnet_info *vi = netdev_priv(dev);
3350	int qnum = skb_get_queue_mapping(skb);
3351	struct send_queue *sq = &vi->sq[qnum];
3352	int err;
3353	struct netdev_queue *txq = netdev_get_tx_queue(dev, index: qnum);
3354	bool xmit_more = netdev_xmit_more();
3355	bool use_napi = sq->napi.weight;
3356	bool kick;
3357
3358	if (!use_napi)
3359	free_old_xmit(sq, txq, in_napi: false);
3360	else
3361	virtqueue_disable_cb(vq: sq->vq);
3362
3363	/ timestamp packet in software /
3364	skb_tx_timestamp(skb);
3365
3366	/ Try to transmit /
3367	err = xmit_skb(sq, skb, orphan: !use_napi);
3368
3369	/ This should not happen! /
3370	if (unlikely(err)) {
3371	DEV_STATS_INC(dev, tx_fifo_errors);
3372	if (net_ratelimit())
3373	dev_warn(&dev->dev,
3374	"Unexpected TXQ (%d) queue failure: %d\n",
3375	qnum, err);
3376	DEV_STATS_INC(dev, tx_dropped);
3377	dev_kfree_skb_any(skb);
3378	return NETDEV_TX_OK;
3379	}
3380
3381	/ Don't wait up for transmitted skbs to be freed. /
3382	if (!use_napi) {
3383	skb_orphan(skb);
3384	nf_reset_ct(skb);
3385	}
3386
3387	if (use_napi)
3388	tx_may_stop(vi, dev, sq);
3389	else
3390	check_sq_full_and_disable(vi, dev,sq);
3391
3392	kick = use_napi ? __netdev_tx_sent_queue(dev_queue: txq, bytes: skb->len, xmit_more) :
3393	!xmit_more \|\| netif_xmit_stopped(dev_queue: txq);
3394	if (kick) {
3395	if (virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq)) {
3396	u64_stats_update_begin(syncp: &sq->stats.syncp);
3397	u64_stats_inc(p: &sq->stats.kicks);
3398	u64_stats_update_end(syncp: &sq->stats.syncp);
3399	}
3400	}
3401
3402	if (use_napi && kick && unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
3403	virtqueue_napi_schedule(napi: &sq->napi, vq: sq->vq);
3404
3405	return NETDEV_TX_OK;
3406	}
3407
3408	static void __virtnet_rx_pause(struct virtnet_info *vi,
3409	struct receive_queue *rq)
3410	{
3411	bool running = netif_running(dev: vi->dev);
3412
3413	if (running) {
3414	virtnet_napi_disable(rq);
3415	virtnet_cancel_dim(vi, dim: &rq->dim);
3416	}
3417	}
3418
3419	static void virtnet_rx_pause_all(struct virtnet_info *vi)
3420	{
3421	int i;
3422
3423	/*
3424	* Make sure refill_work does not run concurrently to
3425	* avoid napi_disable race which leads to deadlock.
3426	*/
3427	disable_delayed_refill(vi);
3428	cancel_delayed_work_sync(dwork: &vi->refill);
3429	for (i = `0`; i < vi->max_queue_pairs; i++)
3430	__virtnet_rx_pause(vi, rq: &vi->rq[i]);
3431	}
3432
3433	static void virtnet_rx_pause(struct virtnet_info vi, struct* receive_queue *rq)
3434	{
3435	/*
3436	* Make sure refill_work does not run concurrently to
3437	* avoid napi_disable race which leads to deadlock.
3438	*/
3439	disable_delayed_refill(vi);
3440	cancel_delayed_work_sync(dwork: &vi->refill);
3441	__virtnet_rx_pause(vi, rq);
3442	}
3443
3444	static void __virtnet_rx_resume(struct virtnet_info *vi,
3445	struct receive_queue *rq,
3446	bool refill)
3447	{
3448	bool running = netif_running(dev: vi->dev);
3449	bool schedule_refill = false;
3450
3451	if (refill && !try_fill_recv(vi, rq, GFP_KERNEL))
3452	schedule_refill = true;
3453	if (running)
3454	virtnet_napi_enable(rq);
3455
3456	if (schedule_refill)
3457	schedule_delayed_work(dwork: &vi->refill, delay: `0`);
3458	}
3459
3460	static void virtnet_rx_resume_all(struct virtnet_info *vi)
3461	{
3462	int i;
3463
3464	enable_delayed_refill(vi);
3465	for (i = `0`; i < vi->max_queue_pairs; i++) {
3466	if (i < vi->curr_queue_pairs)
3467	__virtnet_rx_resume(vi, rq: &vi->rq[i], refill: true);
3468	else
3469	__virtnet_rx_resume(vi, rq: &vi->rq[i], refill: false);
3470	}
3471	}
3472
3473	static void virtnet_rx_resume(struct virtnet_info vi, struct* receive_queue *rq)
3474	{
3475	enable_delayed_refill(vi);
3476	__virtnet_rx_resume(vi, rq, refill: true);
3477	}
3478
3479	static int virtnet_rx_resize(struct virtnet_info *vi,
3480	struct receive_queue *rq, u32 ring_num)
3481	{
3482	int err, qindex;
3483
3484	qindex = rq - vi->rq;
3485
3486	virtnet_rx_pause(vi, rq);
3487
3488	err = virtqueue_resize(vq: rq->vq, num: ring_num, recycle: virtnet_rq_unmap_free_buf, NULL);
3489	if (err)
3490	netdev_err(dev: vi->dev, format: "resize rx fail: rx queue index: %d err: %d\n", qindex, err);
3491
3492	virtnet_rx_resume(vi, rq);
3493	return err;
3494	}
3495
3496	static void virtnet_tx_pause(struct virtnet_info vi, struct* send_queue *sq)
3497	{
3498	bool running = netif_running(dev: vi->dev);
3499	struct netdev_queue *txq;
3500	int qindex;
3501
3502	qindex = sq - vi->sq;
3503
3504	if (running)
3505	virtnet_napi_tx_disable(sq);
3506
3507	txq = netdev_get_tx_queue(dev: vi->dev, index: qindex);
3508
3509	/ 1. wait all ximt complete*
3510	* 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue()
3511	*/
3512	__netif_tx_lock_bh(txq);
3513
3514	/ Prevent rx poll from accessing sq. /
3515	sq->reset = true;
3516
3517	/ Prevent the upper layer from trying to send packets. /
3518	netif_stop_subqueue(dev: vi->dev, queue_index: qindex);
3519
3520	__netif_tx_unlock_bh(txq);
3521	}
3522
3523	static void virtnet_tx_resume(struct virtnet_info vi, struct* send_queue *sq)
3524	{
3525	bool running = netif_running(dev: vi->dev);
3526	struct netdev_queue *txq;
3527	int qindex;
3528
3529	qindex = sq - vi->sq;
3530
3531	txq = netdev_get_tx_queue(dev: vi->dev, index: qindex);
3532
3533	__netif_tx_lock_bh(txq);
3534	sq->reset = false;
3535	netif_tx_wake_queue(dev_queue: txq);
3536	__netif_tx_unlock_bh(txq);
3537
3538	if (running)
3539	virtnet_napi_tx_enable(sq);
3540	}
3541
3542	static int virtnet_tx_resize(struct virtnet_info vi, struct* send_queue *sq,
3543	u32 ring_num)
3544	{
3545	int qindex, err;
3546
3547	if (ring_num <= MAX_SKB_FRAGS + `2`) {
3548	netdev_err(dev: vi->dev, format: "tx size (%d) cannot be smaller than %d\n",
3549	ring_num, MAX_SKB_FRAGS + `2`);
3550	return -EINVAL;
3551	}
3552
3553	qindex = sq - vi->sq;
3554
3555	virtnet_tx_pause(vi, sq);
3556
3557	err = virtqueue_resize(vq: sq->vq, num: ring_num, recycle: virtnet_sq_free_unused_buf,
3558	recycle_done: virtnet_sq_free_unused_buf_done);
3559	if (err)
3560	netdev_err(dev: vi->dev, format: "resize tx fail: tx queue index: %d err: %d\n", qindex, err);
3561
3562	virtnet_tx_resume(vi, sq);
3563
3564	return err;
3565	}
3566
3567	/*
3568	* Send command via the control virtqueue and check status. Commands
3569	* supported by the hypervisor, as indicated by feature bits, should
3570	* never fail unless improperly formatted.
3571	*/
3572	static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd,
3573	struct scatterlist *out,
3574	struct scatterlist *in)
3575	{
3576	struct scatterlist *sgs[`5`], hdr, stat;
3577	u32 out_num = `0`, tmp, in_num = `0`;
3578	bool ok;
3579	int ret;
3580
3581	/ Caller should know better /
3582	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
3583
3584	mutex_lock(lock: &vi->cvq_lock);
3585	vi->ctrl->status = ~`0`;
3586	vi->ctrl->hdr.class = class;
3587	vi->ctrl->hdr.cmd = cmd;
3588	/ Add header /
3589	sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr));
3590	sgs[out_num++] = &hdr;
3591
3592	if (out)
3593	sgs[out_num++] = out;
3594
3595	/ Add return status. /
3596	sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status));
3597	sgs[out_num + in_num++] = &stat;
3598
3599	if (in)
3600	sgs[out_num + in_num++] = in;
3601
3602	BUG_ON(out_num + in_num > ARRAY_SIZE(sgs));
3603	ret = virtqueue_add_sgs(vq: vi->cvq, sgs, out_sgs: out_num, in_sgs: in_num, data: vi, GFP_ATOMIC);
3604	if (ret < `0`) {
3605	dev_warn(&vi->vdev->dev,
3606	"Failed to add sgs for command vq: %d\n.", ret);
3607	mutex_unlock(lock: &vi->cvq_lock);
3608	return false;
3609	}
3610
3611	if (unlikely(!virtqueue_kick(vi->cvq)))
3612	goto unlock;
3613
3614	/ Spin for a response, the kick causes an ioport write, trapping*
3615	* into the hypervisor, so the request should be handled immediately.
3616	*/
3617	while (!virtqueue_get_buf(vq: vi->cvq, len: &tmp) &&
3618	!virtqueue_is_broken(vq: vi->cvq)) {
3619	cond_resched();
3620	cpu_relax();
3621	}
3622
3623	unlock:
3624	ok = vi->ctrl->status == VIRTIO_NET_OK;
3625	mutex_unlock(lock: &vi->cvq_lock);
3626	return ok;
3627	}
3628
3629	static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
3630	struct scatterlist *out)
3631	{
3632	return virtnet_send_command_reply(vi, class, cmd, out, NULL);
3633	}
3634
3635	static int virtnet_set_mac_address(struct net_device dev, void* *p)
3636	{
3637	struct virtnet_info *vi = netdev_priv(dev);
3638	struct virtio_device *vdev = vi->vdev;
3639	int ret;
3640	struct sockaddr *addr;
3641	struct scatterlist sg;
3642
3643	if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STANDBY))
3644	return -EOPNOTSUPP;
3645
3646	addr = kmemdup(p, sizeof(*addr), GFP_KERNEL);
3647	if (!addr)
3648	return -ENOMEM;
3649
3650	ret = eth_prepare_mac_addr_change(dev, p: addr);
3651	if (ret)
3652	goto out;
3653
3654	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
3655	sg_init_one(&sg, addr->sa_data, dev->addr_len);
3656	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
3657	VIRTIO_NET_CTRL_MAC_ADDR_SET, out: &sg)) {
3658	dev_warn(&vdev->dev,
3659	"Failed to set mac address by vq command.\n");
3660	ret = -EINVAL;
3661	goto out;
3662	}
3663	} else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
3664	!virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3665	unsigned int i;
3666
3667	/ Naturally, this has an atomicity problem. /
3668	for (i = `0`; i < dev->addr_len; i++)
3669	virtio_cwrite8(vdev,
3670	offsetof(struct virtio_net_config, mac) +
3671	i, val: addr->sa_data[i]);
3672	}
3673
3674	eth_commit_mac_addr_change(dev, p);
3675	ret = `0`;
3676
3677	out:
3678	kfree(objp: addr);
3679	return ret;
3680	}
3681
3682	static void virtnet_stats(struct net_device *dev,
3683	struct rtnl_link_stats64 *tot)
3684	{
3685	struct virtnet_info *vi = netdev_priv(dev);
3686	unsigned int start;
3687	int i;
3688
3689	for (i = `0`; i < vi->max_queue_pairs; i++) {
3690	u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops;
3691	struct receive_queue *rq = &vi->rq[i];
3692	struct send_queue *sq = &vi->sq[i];
3693
3694	do {
3695	start = u64_stats_fetch_begin(syncp: &sq->stats.syncp);
3696	tpackets = u64_stats_read(p: &sq->stats.packets);
3697	tbytes = u64_stats_read(p: &sq->stats.bytes);
3698	terrors = u64_stats_read(p: &sq->stats.tx_timeouts);
3699	} while (u64_stats_fetch_retry(syncp: &sq->stats.syncp, start));
3700
3701	do {
3702	start = u64_stats_fetch_begin(syncp: &rq->stats.syncp);
3703	rpackets = u64_stats_read(p: &rq->stats.packets);
3704	rbytes = u64_stats_read(p: &rq->stats.bytes);
3705	rdrops = u64_stats_read(p: &rq->stats.drops);
3706	} while (u64_stats_fetch_retry(syncp: &rq->stats.syncp, start));
3707
3708	tot->rx_packets += rpackets;
3709	tot->tx_packets += tpackets;
3710	tot->rx_bytes += rbytes;
3711	tot->tx_bytes += tbytes;
3712	tot->rx_dropped += rdrops;
3713	tot->tx_errors += terrors;
3714	}
3715
3716	tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
3717	tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors);
3718	tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors);
3719	tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors);
3720	}
3721
3722	static void virtnet_ack_link_announce(struct virtnet_info *vi)
3723	{
3724	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
3725	VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
3726	dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
3727	}
3728
3729	static bool virtnet_commit_rss_command(struct virtnet_info *vi);
3730
3731	static void virtnet_rss_update_by_qpairs(struct virtnet_info *vi, u16 queue_pairs)
3732	{
3733	u32 indir_val = `0`;
3734	int i = `0`;
3735
3736	for (; i < vi->rss_indir_table_size; ++i) {
3737	indir_val = ethtool_rxfh_indir_default(index: i, n_rx_rings: queue_pairs);
3738	vi->rss_hdr->indirection_table[i] = cpu_to_le16(indir_val);
3739	}
3740	vi->rss_trailer.max_tx_vq = cpu_to_le16(queue_pairs);
3741	}
3742
3743	static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
3744	{
3745	struct virtio_net_ctrl_mq *mq __free(kfree) = NULL;
3746	struct virtio_net_rss_config_hdr *old_rss_hdr;
3747	struct virtio_net_rss_config_trailer old_rss_trailer;
3748	struct net_device *dev = vi->dev;
3749	struct scatterlist sg;
3750
3751	if (!vi->has_cvq \|\| !virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_MQ))
3752	return `0`;
3753
3754	/ Firstly check if we need update rss. Do updating if both (1) rss enabled and*
3755	* (2) no user configuration.
3756	*
3757	* During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is,
3758	* the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs
3759	* update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly.
3760	*/
3761	if (vi->has_rss && !netif_is_rxfh_configured(dev)) {
3762	old_rss_hdr = vi->rss_hdr;
3763	old_rss_trailer = vi->rss_trailer;
3764	vi->rss_hdr = devm_kzalloc(dev: &dev->dev, size: virtnet_rss_hdr_size(vi), GFP_KERNEL);
3765	if (!vi->rss_hdr) {
3766	vi->rss_hdr = old_rss_hdr;
3767	return -ENOMEM;
3768	}
3769
3770	vi->rss_hdr = old_rss_hdr;
3771	virtnet_rss_update_by_qpairs(vi, queue_pairs);
3772
3773	if (!virtnet_commit_rss_command(vi)) {
3774	/ restore ctrl_rss if commit_rss_command failed /
3775	devm_kfree(dev: &dev->dev, p: vi->rss_hdr);
3776	vi->rss_hdr = old_rss_hdr;
3777	vi->rss_trailer = old_rss_trailer;
3778
3779	dev_warn(&dev->dev, "Fail to set num of queue pairs to %d, because committing RSS failed\n",
3780	queue_pairs);
3781	return -EINVAL;
3782	}
3783	devm_kfree(dev: &dev->dev, p: old_rss_hdr);
3784	goto succ;
3785	}
3786
3787	mq = kzalloc(sizeof(*mq), GFP_KERNEL);
3788	if (!mq)
3789	return -ENOMEM;
3790
3791	mq->virtqueue_pairs = cpu_to_virtio16(vdev: vi->vdev, val: queue_pairs);
3792	sg_init_one(&sg, mq, sizeof(*mq));
3793
3794	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
3795	VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, out: &sg)) {
3796	dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
3797	queue_pairs);
3798	return -EINVAL;
3799	}
3800	succ:
3801	vi->curr_queue_pairs = queue_pairs;
3802	/ virtnet_open() will refill when device is going to up. /
3803	spin_lock_bh(lock: &vi->refill_lock);
3804	if (dev->flags & IFF_UP && vi->refill_enabled)
3805	schedule_delayed_work(dwork: &vi->refill, delay: `0`);
3806	spin_unlock_bh(lock: &vi->refill_lock);
3807
3808	return `0`;
3809	}
3810
3811	static int virtnet_close(struct net_device *dev)
3812	{
3813	struct virtnet_info *vi = netdev_priv(dev);
3814	int i;
3815
3816	/ Make sure NAPI doesn't schedule refill work /
3817	disable_delayed_refill(vi);
3818	/ Make sure refill_work doesn't re-enable napi! /
3819	cancel_delayed_work_sync(dwork: &vi->refill);
3820	/ Prevent the config change callback from changing carrier*
3821	* after close
3822	*/
3823	virtio_config_driver_disable(dev: vi->vdev);
3824	/ Stop getting status/speed updates: we don't care until next*
3825	* open
3826	*/
3827	cancel_work_sync(work: &vi->config_work);
3828
3829	for (i = `0`; i < vi->max_queue_pairs; i++) {
3830	virtnet_disable_queue_pair(vi, qp_index: i);
3831	virtnet_cancel_dim(vi, dim: &vi->rq[i].dim);
3832	}
3833
3834	netif_carrier_off(dev);
3835
3836	return `0`;
3837	}
3838
3839	static void virtnet_rx_mode_work(struct work_struct *work)
3840	{
3841	struct virtnet_info *vi =
3842	container_of(work, struct virtnet_info, rx_mode_work);
3843	u8 *promisc_allmulti __free(kfree) = NULL;
3844	struct net_device *dev = vi->dev;
3845	struct scatterlist sg[`2`];
3846	struct virtio_net_ctrl_mac *mac_data;
3847	struct netdev_hw_addr *ha;
3848	int uc_count;
3849	int mc_count;
3850	void *buf;
3851	int i;
3852
3853	/ We can't dynamically set ndo_set_rx_mode, so return gracefully /
3854	if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_RX))
3855	return;
3856
3857	promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL);
3858	if (!promisc_allmulti) {
3859	dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n");
3860	return;
3861	}
3862
3863	rtnl_lock();
3864
3865	*promisc_allmulti = !!(dev->flags & IFF_PROMISC);
3866	sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti));
3867
3868	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
3869	VIRTIO_NET_CTRL_RX_PROMISC, out: sg))
3870	dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
3871	*promisc_allmulti ? "en" : "dis");
3872
3873	*promisc_allmulti = !!(dev->flags & IFF_ALLMULTI);
3874	sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti));
3875
3876	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
3877	VIRTIO_NET_CTRL_RX_ALLMULTI, out: sg))
3878	dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
3879	*promisc_allmulti ? "en" : "dis");
3880
3881	netif_addr_lock_bh(dev);
3882
3883	uc_count = netdev_uc_count(dev);
3884	mc_count = netdev_mc_count(dev);
3885	/ MAC filter - use one buffer for both lists /
3886	buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
3887	(`2` * sizeof(mac_data->entries)), GFP_ATOMIC);
3888	mac_data = buf;
3889	if (!buf) {
3890	netif_addr_unlock_bh(dev);
3891	rtnl_unlock();
3892	return;
3893	}
3894
3895	sg_init_table(sg, `2`);
3896
3897	/ Store the unicast list and count in the front of the buffer /
3898	mac_data->entries = cpu_to_virtio32(vdev: vi->vdev, val: uc_count);
3899	i = `0`;
3900	netdev_for_each_uc_addr(ha, dev)
3901	memcpy(to: &mac_data->macs[i++][`0`], from: ha->addr, ETH_ALEN);
3902
3903	sg_set_buf(sg: &sg[`0`], buf: mac_data,
3904	buflen: sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
3905
3906	/ multicast list and count fill the end /
3907	mac_data = (void *)&mac_data->macs[uc_count][`0`];
3908
3909	mac_data->entries = cpu_to_virtio32(vdev: vi->vdev, val: mc_count);
3910	i = `0`;
3911	netdev_for_each_mc_addr(ha, dev)
3912	memcpy(to: &mac_data->macs[i++][`0`], from: ha->addr, ETH_ALEN);
3913
3914	netif_addr_unlock_bh(dev);
3915
3916	sg_set_buf(sg: &sg[`1`], buf: mac_data,
3917	buflen: sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
3918
3919	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
3920	VIRTIO_NET_CTRL_MAC_TABLE_SET, out: sg))
3921	dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
3922
3923	rtnl_unlock();
3924
3925	kfree(objp: buf);
3926	}
3927
3928	static void virtnet_set_rx_mode(struct net_device *dev)
3929	{
3930	struct virtnet_info *vi = netdev_priv(dev);
3931
3932	if (vi->rx_mode_work_enabled)
3933	schedule_work(work: &vi->rx_mode_work);
3934	}
3935
3936	static int virtnet_vlan_rx_add_vid(struct net_device *dev,
3937	__be16 proto, u16 vid)
3938	{
3939	struct virtnet_info *vi = netdev_priv(dev);
3940	__virtio16 *_vid __free(kfree) = NULL;
3941	struct scatterlist sg;
3942
3943	_vid = kzalloc(sizeof(*_vid), GFP_KERNEL);
3944	if (!_vid)
3945	return -ENOMEM;
3946
3947	*_vid = cpu_to_virtio16(vdev: vi->vdev, val: vid);
3948	sg_init_one(&sg, _vid, sizeof(*_vid));
3949
3950	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
3951	VIRTIO_NET_CTRL_VLAN_ADD, out: &sg))
3952	dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
3953	return `0`;
3954	}
3955
3956	static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
3957	__be16 proto, u16 vid)
3958	{
3959	struct virtnet_info *vi = netdev_priv(dev);
3960	__virtio16 *_vid __free(kfree) = NULL;
3961	struct scatterlist sg;
3962
3963	_vid = kzalloc(sizeof(*_vid), GFP_KERNEL);
3964	if (!_vid)
3965	return -ENOMEM;
3966
3967	*_vid = cpu_to_virtio16(vdev: vi->vdev, val: vid);
3968	sg_init_one(&sg, _vid, sizeof(*_vid));
3969
3970	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
3971	VIRTIO_NET_CTRL_VLAN_DEL, out: &sg))
3972	dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
3973	return `0`;
3974	}
3975
3976	static void virtnet_clean_affinity(struct virtnet_info *vi)
3977	{
3978	int i;
3979
3980	if (vi->affinity_hint_set) {
3981	for (i = `0`; i < vi->max_queue_pairs; i++) {
3982	virtqueue_set_affinity(vq: vi->rq[i].vq, NULL);
3983	virtqueue_set_affinity(vq: vi->sq[i].vq, NULL);
3984	}
3985
3986	vi->affinity_hint_set = false;
3987	}
3988	}
3989
3990	static void virtnet_set_affinity(struct virtnet_info *vi)
3991	{
3992	cpumask_var_t mask;
3993	int stragglers;
3994	int group_size;
3995	int i, start = `0`, cpu;
3996	int num_cpu;
3997	int stride;
3998
3999	if (!zalloc_cpumask_var(mask: &mask, GFP_KERNEL)) {
4000	virtnet_clean_affinity(vi);
4001	return;
4002	}
4003
4004	num_cpu = num_online_cpus();
4005	stride = max_t(int, num_cpu / vi->curr_queue_pairs, `1`);
4006	stragglers = num_cpu >= vi->curr_queue_pairs ?
4007	num_cpu % vi->curr_queue_pairs :
4008	`0`;
4009
4010	for (i = `0`; i < vi->curr_queue_pairs; i++) {
4011	group_size = stride + (i < stragglers ? `1` : `0`);
4012
4013	for_each_online_cpu_wrap(cpu, start) {
4014	if (!group_size--) {
4015	start = cpu;
4016	break;
4017	}
4018	cpumask_set_cpu(cpu, dstp: mask);
4019	}
4020
4021	virtqueue_set_affinity(vq: vi->rq[i].vq, cpu_mask: mask);
4022	virtqueue_set_affinity(vq: vi->sq[i].vq, cpu_mask: mask);
4023	__netif_set_xps_queue(dev: vi->dev, cpumask_bits(mask), index: i, type: XPS_CPUS);
4024	cpumask_clear(dstp: mask);
4025	}
4026
4027	vi->affinity_hint_set = true;
4028	free_cpumask_var(mask);
4029	}
4030
4031	static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
4032	{
4033	struct virtnet_info vi = hlist_entry_safe(node, struct* virtnet_info,
4034	node);
4035	virtnet_set_affinity(vi);
4036	return `0`;
4037	}
4038
4039	static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
4040	{
4041	struct virtnet_info vi = hlist_entry_safe(node, struct* virtnet_info,
4042	node_dead);
4043	virtnet_set_affinity(vi);
4044	return `0`;
4045	}
4046
4047	static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
4048	{
4049	struct virtnet_info vi = hlist_entry_safe(node, struct* virtnet_info,
4050	node);
4051
4052	virtnet_clean_affinity(vi);
4053	return `0`;
4054	}
4055
4056	static enum cpuhp_state virtionet_online;
4057
4058	static int virtnet_cpu_notif_add(struct virtnet_info *vi)
4059	{
4060	int ret;
4061
4062	ret = cpuhp_state_add_instance_nocalls(state: virtionet_online, node: &vi->node);
4063	if (ret)
4064	return ret;
4065	ret = cpuhp_state_add_instance_nocalls(state: CPUHP_VIRT_NET_DEAD,
4066	node: &vi->node_dead);
4067	if (!ret)
4068	return ret;
4069	cpuhp_state_remove_instance_nocalls(state: virtionet_online, node: &vi->node);
4070	return ret;
4071	}
4072
4073	static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
4074	{
4075	cpuhp_state_remove_instance_nocalls(state: virtionet_online, node: &vi->node);
4076	cpuhp_state_remove_instance_nocalls(state: CPUHP_VIRT_NET_DEAD,
4077	node: &vi->node_dead);
4078	}
4079
4080	static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi,
4081	u16 vqn, u32 max_usecs, u32 max_packets)
4082	{
4083	struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL;
4084	struct scatterlist sgs;
4085
4086	coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL);
4087	if (!coal_vq)
4088	return -ENOMEM;
4089
4090	coal_vq->vqn = cpu_to_le16(vqn);
4091	coal_vq->coal.max_usecs = cpu_to_le32(max_usecs);
4092	coal_vq->coal.max_packets = cpu_to_le32(max_packets);
4093	sg_init_one(&sgs, coal_vq, sizeof(*coal_vq));
4094
4095	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
4096	VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET,
4097	out: &sgs))
4098	return -EINVAL;
4099
4100	return `0`;
4101	}
4102
4103	static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi,
4104	u16 queue, u32 max_usecs,
4105	u32 max_packets)
4106	{
4107	int err;
4108
4109	if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
4110	return -EOPNOTSUPP;
4111
4112	err = virtnet_send_ctrl_coal_vq_cmd(vi, vqn: rxq2vq(rxq: queue),
4113	max_usecs, max_packets);
4114	if (err)
4115	return err;
4116
4117	vi->rq[queue].intr_coal.max_usecs = max_usecs;
4118	vi->rq[queue].intr_coal.max_packets = max_packets;
4119
4120	return `0`;
4121	}
4122
4123	static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi,
4124	u16 queue, u32 max_usecs,
4125	u32 max_packets)
4126	{
4127	int err;
4128
4129	if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
4130	return -EOPNOTSUPP;
4131
4132	err = virtnet_send_ctrl_coal_vq_cmd(vi, vqn: txq2vq(txq: queue),
4133	max_usecs, max_packets);
4134	if (err)
4135	return err;
4136
4137	vi->sq[queue].intr_coal.max_usecs = max_usecs;
4138	vi->sq[queue].intr_coal.max_packets = max_packets;
4139
4140	return `0`;
4141	}
4142
4143	static void virtnet_get_ringparam(struct net_device *dev,
4144	struct ethtool_ringparam *ring,
4145	struct kernel_ethtool_ringparam *kernel_ring,
4146	struct netlink_ext_ack *extack)
4147	{
4148	struct virtnet_info *vi = netdev_priv(dev);
4149
4150	ring->rx_max_pending = vi->rq[`0`].vq->num_max;
4151	ring->tx_max_pending = vi->sq[`0`].vq->num_max;
4152	ring->rx_pending = virtqueue_get_vring_size(vq: vi->rq[`0`].vq);
4153	ring->tx_pending = virtqueue_get_vring_size(vq: vi->sq[`0`].vq);
4154	}
4155
4156	static int virtnet_set_ringparam(struct net_device *dev,
4157	struct ethtool_ringparam *ring,
4158	struct kernel_ethtool_ringparam *kernel_ring,
4159	struct netlink_ext_ack *extack)
4160	{
4161	struct virtnet_info *vi = netdev_priv(dev);
4162	u32 rx_pending, tx_pending;
4163	struct receive_queue *rq;
4164	struct send_queue *sq;
4165	int i, err;
4166
4167	if (ring->rx_mini_pending \|\| ring->rx_jumbo_pending)
4168	return -EINVAL;
4169
4170	rx_pending = virtqueue_get_vring_size(vq: vi->rq[`0`].vq);
4171	tx_pending = virtqueue_get_vring_size(vq: vi->sq[`0`].vq);
4172
4173	if (ring->rx_pending == rx_pending &&
4174	ring->tx_pending == tx_pending)
4175	return `0`;
4176
4177	if (ring->rx_pending > vi->rq[`0`].vq->num_max)
4178	return -EINVAL;
4179
4180	if (ring->tx_pending > vi->sq[`0`].vq->num_max)
4181	return -EINVAL;
4182
4183	for (i = `0`; i < vi->max_queue_pairs; i++) {
4184	rq = vi->rq + i;
4185	sq = vi->sq + i;
4186
4187	if (ring->tx_pending != tx_pending) {
4188	err = virtnet_tx_resize(vi, sq, ring_num: ring->tx_pending);
4189	if (err)
4190	return err;
4191
4192	/ Upon disabling and re-enabling a transmit virtqueue, the device must*
4193	* set the coalescing parameters of the virtqueue to those configured
4194	* through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver
4195	* did not set any TX coalescing parameters, to 0.
4196	*/
4197	err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue: i,
4198	max_usecs: vi->intr_coal_tx.max_usecs,
4199	max_packets: vi->intr_coal_tx.max_packets);
4200
4201	/ Don't break the tx resize action if the vq coalescing is not*
4202	* supported. The same is true for rx resize below.
4203	*/
4204	if (err && err != -EOPNOTSUPP)
4205	return err;
4206	}
4207
4208	if (ring->rx_pending != rx_pending) {
4209	err = virtnet_rx_resize(vi, rq, ring_num: ring->rx_pending);
4210	if (err)
4211	return err;
4212
4213	/ The reason is same as the transmit virtqueue reset /
4214	mutex_lock(lock: &vi->rq[i].dim_lock);
4215	err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue: i,
4216	max_usecs: vi->intr_coal_rx.max_usecs,
4217	max_packets: vi->intr_coal_rx.max_packets);
4218	mutex_unlock(lock: &vi->rq[i].dim_lock);
4219	if (err && err != -EOPNOTSUPP)
4220	return err;
4221	}
4222	}
4223
4224	return `0`;
4225	}
4226
4227	static bool virtnet_commit_rss_command(struct virtnet_info *vi)
4228	{
4229	struct net_device *dev = vi->dev;
4230	struct scatterlist sgs[`2`];
4231
4232	/ prepare sgs /
4233	sg_init_table(sgs, `2`);
4234	sg_set_buf(sg: &sgs[`0`], buf: vi->rss_hdr, buflen: virtnet_rss_hdr_size(vi));
4235	sg_set_buf(sg: &sgs[`1`], buf: &vi->rss_trailer, buflen: virtnet_rss_trailer_size(vi));
4236
4237	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
4238	cmd: vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG
4239	: VIRTIO_NET_CTRL_MQ_HASH_CONFIG, out: sgs))
4240	goto err;
4241
4242	return true;
4243
4244	err:
4245	dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n");
4246	return false;
4247
4248	}
4249
4250	static void virtnet_init_default_rss(struct virtnet_info *vi)
4251	{
4252	vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_supported);
4253	vi->rss_hash_types_saved = vi->rss_hash_types_supported;
4254	vi->rss_hdr->indirection_table_mask = vi->rss_indir_table_size
4255	? cpu_to_le16(vi->rss_indir_table_size - `1`) : `0`;
4256	vi->rss_hdr->unclassified_queue = `0`;
4257
4258	virtnet_rss_update_by_qpairs(vi, queue_pairs: vi->curr_queue_pairs);
4259
4260	vi->rss_trailer.hash_key_length = vi->rss_key_size;
4261
4262	netdev_rss_key_fill(buffer: vi->rss_hash_key_data, len: vi->rss_key_size);
4263	}
4264
4265	static int virtnet_get_hashflow(struct net_device *dev,
4266	struct ethtool_rxfh_fields *info)
4267	{
4268	struct virtnet_info *vi = netdev_priv(dev);
4269
4270	info->data = `0`;
4271	switch (info->flow_type) {
4272	case TCP_V4_FLOW:
4273	if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
4274	info->data = RXH_IP_SRC \| RXH_IP_DST \|
4275	RXH_L4_B_0_1 \| RXH_L4_B_2_3;
4276	} else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
4277	info->data = RXH_IP_SRC \| RXH_IP_DST;
4278	}
4279	break;
4280	case TCP_V6_FLOW:
4281	if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
4282	info->data = RXH_IP_SRC \| RXH_IP_DST \|
4283	RXH_L4_B_0_1 \| RXH_L4_B_2_3;
4284	} else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
4285	info->data = RXH_IP_SRC \| RXH_IP_DST;
4286	}
4287	break;
4288	case UDP_V4_FLOW:
4289	if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
4290	info->data = RXH_IP_SRC \| RXH_IP_DST \|
4291	RXH_L4_B_0_1 \| RXH_L4_B_2_3;
4292	} else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
4293	info->data = RXH_IP_SRC \| RXH_IP_DST;
4294	}
4295	break;
4296	case UDP_V6_FLOW:
4297	if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
4298	info->data = RXH_IP_SRC \| RXH_IP_DST \|
4299	RXH_L4_B_0_1 \| RXH_L4_B_2_3;
4300	} else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
4301	info->data = RXH_IP_SRC \| RXH_IP_DST;
4302	}
4303	break;
4304	case IPV4_FLOW:
4305	if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4)
4306	info->data = RXH_IP_SRC \| RXH_IP_DST;
4307
4308	break;
4309	case IPV6_FLOW:
4310	if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6)
4311	info->data = RXH_IP_SRC \| RXH_IP_DST;
4312
4313	break;
4314	default:
4315	info->data = `0`;
4316	break;
4317	}
4318
4319	return `0`;
4320	}
4321
4322	static int virtnet_set_hashflow(struct net_device *dev,
4323	const struct ethtool_rxfh_fields *info,
4324	struct netlink_ext_ack *extack)
4325	{
4326	struct virtnet_info *vi = netdev_priv(dev);
4327	u32 new_hashtypes = vi->rss_hash_types_saved;
4328	bool is_disable = info->data & RXH_DISCARD;
4329	bool is_l4 = info->data == (RXH_IP_SRC \| RXH_IP_DST \| RXH_L4_B_0_1 \| RXH_L4_B_2_3);
4330
4331	/ supports only 'sd', 'sdfn' and 'r' /
4332	if (!((info->data == (RXH_IP_SRC \| RXH_IP_DST)) \| is_l4 \| is_disable))
4333	return -EINVAL;
4334
4335	switch (info->flow_type) {
4336	case TCP_V4_FLOW:
4337	new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 \| VIRTIO_NET_RSS_HASH_TYPE_TCPv4);
4338	if (!is_disable)
4339	new_hashtypes \|= VIRTIO_NET_RSS_HASH_TYPE_IPv4
4340	\| (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : `0`);
4341	break;
4342	case UDP_V4_FLOW:
4343	new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 \| VIRTIO_NET_RSS_HASH_TYPE_UDPv4);
4344	if (!is_disable)
4345	new_hashtypes \|= VIRTIO_NET_RSS_HASH_TYPE_IPv4
4346	\| (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : `0`);
4347	break;
4348	case IPV4_FLOW:
4349	new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4;
4350	if (!is_disable)
4351	new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4;
4352	break;
4353	case TCP_V6_FLOW:
4354	new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 \| VIRTIO_NET_RSS_HASH_TYPE_TCPv6);
4355	if (!is_disable)
4356	new_hashtypes \|= VIRTIO_NET_RSS_HASH_TYPE_IPv6
4357	\| (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : `0`);
4358	break;
4359	case UDP_V6_FLOW:
4360	new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 \| VIRTIO_NET_RSS_HASH_TYPE_UDPv6);
4361	if (!is_disable)
4362	new_hashtypes \|= VIRTIO_NET_RSS_HASH_TYPE_IPv6
4363	\| (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : `0`);
4364	break;
4365	case IPV6_FLOW:
4366	new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6;
4367	if (!is_disable)
4368	new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6;
4369	break;
4370	default:
4371	/ unsupported flow /
4372	return -EINVAL;
4373	}
4374
4375	/ if unsupported hashtype was set /
4376	if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported))
4377	return -EINVAL;
4378
4379	if (new_hashtypes != vi->rss_hash_types_saved) {
4380	vi->rss_hash_types_saved = new_hashtypes;
4381	vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved);
4382	if (vi->dev->features & NETIF_F_RXHASH)
4383	if (!virtnet_commit_rss_command(vi))
4384	return -EINVAL;
4385	}
4386
4387	return `0`;
4388	}
4389
4390	static void virtnet_get_drvinfo(struct net_device *dev,
4391	struct ethtool_drvinfo *info)
4392	{
4393	struct virtnet_info *vi = netdev_priv(dev);
4394	struct virtio_device *vdev = vi->vdev;
4395
4396	strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
4397	strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
4398	strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
4399
4400	}
4401
4402	/ TODO: Eliminate OOO packets during switching /
4403	static int virtnet_set_channels(struct net_device *dev,
4404	struct ethtool_channels *channels)
4405	{
4406	struct virtnet_info *vi = netdev_priv(dev);
4407	u16 queue_pairs = channels->combined_count;
4408	int err;
4409
4410	/ We don't support separate rx/tx channels.*
4411	* We don't allow setting 'other' channels.
4412	*/
4413	if (channels->rx_count \|\| channels->tx_count \|\| channels->other_count)
4414	return -EINVAL;
4415
4416	if (queue_pairs > vi->max_queue_pairs \|\| queue_pairs == `0`)
4417	return -EINVAL;
4418
4419	/ For now we don't support modifying channels while XDP is loaded*
4420	* also when XDP is loaded all RX queues have XDP programs so we only
4421	* need to check a single RX queue.
4422	*/
4423	if (vi->rq[`0`].xdp_prog)
4424	return -EINVAL;
4425
4426	cpus_read_lock();
4427	err = virtnet_set_queues(vi, queue_pairs);
4428	if (err) {
4429	cpus_read_unlock();
4430	goto err;
4431	}
4432	virtnet_set_affinity(vi);
4433	cpus_read_unlock();
4434
4435	netif_set_real_num_tx_queues(dev, txq: queue_pairs);
4436	netif_set_real_num_rx_queues(dev, rxq: queue_pairs);
4437	err:
4438	return err;
4439	}
4440
4441	static void virtnet_stats_sprintf(u8 *p, const* char fmt, const* char *noq_fmt,
4442	int num, int qid, const struct virtnet_stat_desc *desc)
4443	{
4444	int i;
4445
4446	if (qid < `0`) {
4447	for (i = `0`; i < num; ++i)
4448	ethtool_sprintf(data: p, fmt: noq_fmt, desc[i].desc);
4449	} else {
4450	for (i = `0`; i < num; ++i)
4451	ethtool_sprintf(data: p, fmt, qid, desc[i].desc);
4452	}
4453	}
4454
4455	/ qid == -1: for rx/tx queue total field /
4456	static void virtnet_get_stats_string(struct virtnet_info vi, int* type, int qid, u8 **data)
4457	{
4458	const struct virtnet_stat_desc *desc;
4459	const char fmt, noq_fmt;
4460	u8 p = data;
4461	u32 num;
4462
4463	if (type == VIRTNET_Q_TYPE_CQ && qid >= `0`) {
4464	noq_fmt = "cq_hw_%s";
4465
4466	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) {
4467	desc = &virtnet_stats_cvq_desc[`0`];
4468	num = ARRAY_SIZE(virtnet_stats_cvq_desc);
4469
4470	virtnet_stats_sprintf(p: &p, NULL, noq_fmt, num, qid: -`1`, desc);
4471	}
4472	}
4473
4474	if (type == VIRTNET_Q_TYPE_RX) {
4475	fmt = "rx%u_%s";
4476	noq_fmt = "rx_%s";
4477
4478	desc = &virtnet_rq_stats_desc[`0`];
4479	num = ARRAY_SIZE(virtnet_rq_stats_desc);
4480
4481	virtnet_stats_sprintf(p: &p, fmt, noq_fmt, num, qid, desc);
4482
4483	fmt = "rx%u_hw_%s";
4484	noq_fmt = "rx_hw_%s";
4485
4486	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
4487	desc = &virtnet_stats_rx_basic_desc[`0`];
4488	num = ARRAY_SIZE(virtnet_stats_rx_basic_desc);
4489
4490	virtnet_stats_sprintf(p: &p, fmt, noq_fmt, num, qid, desc);
4491	}
4492
4493	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
4494	desc = &virtnet_stats_rx_csum_desc[`0`];
4495	num = ARRAY_SIZE(virtnet_stats_rx_csum_desc);
4496
4497	virtnet_stats_sprintf(p: &p, fmt, noq_fmt, num, qid, desc);
4498	}
4499
4500	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
4501	desc = &virtnet_stats_rx_speed_desc[`0`];
4502	num = ARRAY_SIZE(virtnet_stats_rx_speed_desc);
4503
4504	virtnet_stats_sprintf(p: &p, fmt, noq_fmt, num, qid, desc);
4505	}
4506	}
4507
4508	if (type == VIRTNET_Q_TYPE_TX) {
4509	fmt = "tx%u_%s";
4510	noq_fmt = "tx_%s";
4511
4512	desc = &virtnet_sq_stats_desc[`0`];
4513	num = ARRAY_SIZE(virtnet_sq_stats_desc);
4514
4515	virtnet_stats_sprintf(p: &p, fmt, noq_fmt, num, qid, desc);
4516
4517	fmt = "tx%u_hw_%s";
4518	noq_fmt = "tx_hw_%s";
4519
4520	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
4521	desc = &virtnet_stats_tx_basic_desc[`0`];
4522	num = ARRAY_SIZE(virtnet_stats_tx_basic_desc);
4523
4524	virtnet_stats_sprintf(p: &p, fmt, noq_fmt, num, qid, desc);
4525	}
4526
4527	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
4528	desc = &virtnet_stats_tx_gso_desc[`0`];
4529	num = ARRAY_SIZE(virtnet_stats_tx_gso_desc);
4530
4531	virtnet_stats_sprintf(p: &p, fmt, noq_fmt, num, qid, desc);
4532	}
4533
4534	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
4535	desc = &virtnet_stats_tx_speed_desc[`0`];
4536	num = ARRAY_SIZE(virtnet_stats_tx_speed_desc);
4537
4538	virtnet_stats_sprintf(p: &p, fmt, noq_fmt, num, qid, desc);
4539	}
4540	}
4541
4542	*data = p;
4543	}
4544
4545	struct virtnet_stats_ctx {
4546	/ The stats are write to qstats or ethtool -S /
4547	bool to_qstat;
4548
4549	/ Used to calculate the offset inside the output buffer. /
4550	u32 desc_num[`3`];
4551
4552	/ The actual supported stat types. /
4553	u64 bitmap[`3`];
4554
4555	/ Used to calculate the reply buffer size. /
4556	u32 size[`3`];
4557
4558	/ Record the output buffer. /
4559	u64 *data;
4560	};
4561
4562	static void virtnet_stats_ctx_init(struct virtnet_info *vi,
4563	struct virtnet_stats_ctx *ctx,
4564	u64 *data, bool to_qstat)
4565	{
4566	u32 queue_type;
4567
4568	ctx->data = data;
4569	ctx->to_qstat = to_qstat;
4570
4571	if (to_qstat) {
4572	ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat);
4573	ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat);
4574
4575	queue_type = VIRTNET_Q_TYPE_RX;
4576
4577	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
4578	ctx->bitmap[queue_type] \|= VIRTIO_NET_STATS_TYPE_RX_BASIC;
4579	ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat);
4580	ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic);
4581	}
4582
4583	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
4584	ctx->bitmap[queue_type] \|= VIRTIO_NET_STATS_TYPE_RX_CSUM;
4585	ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat);
4586	ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum);
4587	}
4588
4589	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) {
4590	ctx->bitmap[queue_type] \|= VIRTIO_NET_STATS_TYPE_RX_GSO;
4591	ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat);
4592	ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso);
4593	}
4594
4595	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
4596	ctx->bitmap[queue_type] \|= VIRTIO_NET_STATS_TYPE_RX_SPEED;
4597	ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat);
4598	ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed);
4599	}
4600
4601	queue_type = VIRTNET_Q_TYPE_TX;
4602
4603	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
4604	ctx->bitmap[queue_type] \|= VIRTIO_NET_STATS_TYPE_TX_BASIC;
4605	ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat);
4606	ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic);
4607	}
4608
4609	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) {
4610	ctx->bitmap[queue_type] \|= VIRTIO_NET_STATS_TYPE_TX_CSUM;
4611	ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat);
4612	ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum);
4613	}
4614
4615	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
4616	ctx->bitmap[queue_type] \|= VIRTIO_NET_STATS_TYPE_TX_GSO;
4617	ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat);
4618	ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso);
4619	}
4620
4621	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
4622	ctx->bitmap[queue_type] \|= VIRTIO_NET_STATS_TYPE_TX_SPEED;
4623	ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat);
4624	ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed);
4625	}
4626
4627	return;
4628	}
4629
4630	ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc);
4631	ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc);
4632
4633	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) {
4634	queue_type = VIRTNET_Q_TYPE_CQ;
4635
4636	ctx->bitmap[queue_type] \|= VIRTIO_NET_STATS_TYPE_CVQ;
4637	ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc);
4638	ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq);
4639	}
4640
4641	queue_type = VIRTNET_Q_TYPE_RX;
4642
4643	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
4644	ctx->bitmap[queue_type] \|= VIRTIO_NET_STATS_TYPE_RX_BASIC;
4645	ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc);
4646	ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic);
4647	}
4648
4649	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
4650	ctx->bitmap[queue_type] \|= VIRTIO_NET_STATS_TYPE_RX_CSUM;
4651	ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc);
4652	ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum);
4653	}
4654
4655	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
4656	ctx->bitmap[queue_type] \|= VIRTIO_NET_STATS_TYPE_RX_SPEED;
4657	ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc);
4658	ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed);
4659	}
4660
4661	queue_type = VIRTNET_Q_TYPE_TX;
4662
4663	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
4664	ctx->bitmap[queue_type] \|= VIRTIO_NET_STATS_TYPE_TX_BASIC;
4665	ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc);
4666	ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic);
4667	}
4668
4669	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
4670	ctx->bitmap[queue_type] \|= VIRTIO_NET_STATS_TYPE_TX_GSO;
4671	ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc);
4672	ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso);
4673	}
4674
4675	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
4676	ctx->bitmap[queue_type] \|= VIRTIO_NET_STATS_TYPE_TX_SPEED;
4677	ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc);
4678	ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed);
4679	}
4680	}
4681
4682	/ stats_sum_queue - Calculate the sum of the same fields in sq or rq.*
4683	* @sum: the position to store the sum values
4684	* @num: field num
4685	* @q_value: the first queue fields
4686	* @q_num: number of the queues
4687	*/
4688	static void stats_sum_queue(u64 sum, u32 num, u64 q_value, u32 q_num)
4689	{
4690	u32 step = num;
4691	int i, j;
4692	u64 *p;
4693
4694	for (i = `0`; i < num; ++i) {
4695	p = sum + i;
4696	*p = `0`;
4697
4698	for (j = `0`; j < q_num; ++j)
4699	p += (q_value + i + j * step);
4700	}
4701	}
4702
4703	static void virtnet_fill_total_fields(struct virtnet_info *vi,
4704	struct virtnet_stats_ctx *ctx)
4705	{
4706	u64 data, first_rx_q, *first_tx_q;
4707	u32 num_cq, num_rx, num_tx;
4708
4709	num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ];
4710	num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX];
4711	num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX];
4712
4713	first_rx_q = ctx->data + num_rx + num_tx + num_cq;
4714	first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx;
4715
4716	data = ctx->data;
4717
4718	stats_sum_queue(sum: data, num: num_rx, q_value: first_rx_q, q_num: vi->curr_queue_pairs);
4719
4720	data = ctx->data + num_rx;
4721
4722	stats_sum_queue(sum: data, num: num_tx, q_value: first_tx_q, q_num: vi->curr_queue_pairs);
4723	}
4724
4725	static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid,
4726	struct virtnet_stats_ctx *ctx,
4727	const u8 *base, bool drv_stats, u8 reply_type)
4728	{
4729	const struct virtnet_stat_desc *desc;
4730	const u64_stats_t *v_stat;
4731	u64 offset, bitmap;
4732	const __le64 *v;
4733	u32 queue_type;
4734	int i, num;
4735
4736	queue_type = vq_type(vi, qid);
4737	bitmap = ctx->bitmap[queue_type];
4738
4739	if (drv_stats) {
4740	if (queue_type == VIRTNET_Q_TYPE_RX) {
4741	desc = &virtnet_rq_stats_desc_qstat[`0`];
4742	num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat);
4743	} else {
4744	desc = &virtnet_sq_stats_desc_qstat[`0`];
4745	num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat);
4746	}
4747
4748	for (i = `0`; i < num; ++i) {
4749	offset = desc[i].qstat_offset / sizeof(*ctx->data);
4750	v_stat = (const u64_stats_t *)(base + desc[i].offset);
4751	ctx->data[offset] = u64_stats_read(p: v_stat);
4752	}
4753	return;
4754	}
4755
4756	if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
4757	desc = &virtnet_stats_rx_basic_desc_qstat[`0`];
4758	num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat);
4759	if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC)
4760	goto found;
4761	}
4762
4763	if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
4764	desc = &virtnet_stats_rx_csum_desc_qstat[`0`];
4765	num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat);
4766	if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM)
4767	goto found;
4768	}
4769
4770	if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) {
4771	desc = &virtnet_stats_rx_gso_desc_qstat[`0`];
4772	num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat);
4773	if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO)
4774	goto found;
4775	}
4776
4777	if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
4778	desc = &virtnet_stats_rx_speed_desc_qstat[`0`];
4779	num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat);
4780	if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED)
4781	goto found;
4782	}
4783
4784	if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
4785	desc = &virtnet_stats_tx_basic_desc_qstat[`0`];
4786	num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat);
4787	if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC)
4788	goto found;
4789	}
4790
4791	if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) {
4792	desc = &virtnet_stats_tx_csum_desc_qstat[`0`];
4793	num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat);
4794	if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM)
4795	goto found;
4796	}
4797
4798	if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
4799	desc = &virtnet_stats_tx_gso_desc_qstat[`0`];
4800	num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat);
4801	if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO)
4802	goto found;
4803	}
4804
4805	if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
4806	desc = &virtnet_stats_tx_speed_desc_qstat[`0`];
4807	num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat);
4808	if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED)
4809	goto found;
4810	}
4811
4812	return;
4813
4814	found:
4815	for (i = `0`; i < num; ++i) {
4816	offset = desc[i].qstat_offset / sizeof(*ctx->data);
4817	v = (const __le64 *)(base + desc[i].offset);
4818	ctx->data[offset] = le64_to_cpu(*v);
4819	}
4820	}
4821
4822	/ virtnet_fill_stats - copy the stats to qstats or ethtool -S*
4823	* The stats source is the device or the driver.
4824	*
4825	* @vi: virtio net info
4826	* @qid: the vq id
4827	* @ctx: stats ctx (initiated by virtnet_stats_ctx_init())
4828	* @base: pointer to the device reply or the driver stats structure.
4829	* @drv_stats: designate the base type (device reply, driver stats)
4830	* @type: the type of the device reply (if drv_stats is true, this must be zero)
4831	*/
4832	static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid,
4833	struct virtnet_stats_ctx *ctx,
4834	const u8 *base, bool drv_stats, u8 reply_type)
4835	{
4836	u32 queue_type, num_rx, num_tx, num_cq;
4837	const struct virtnet_stat_desc *desc;
4838	const u64_stats_t *v_stat;
4839	u64 offset, bitmap;
4840	const __le64 *v;
4841	int i, num;
4842
4843	if (ctx->to_qstat)
4844	return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type);
4845
4846	num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ];
4847	num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX];
4848	num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX];
4849
4850	queue_type = vq_type(vi, qid);
4851	bitmap = ctx->bitmap[queue_type];
4852
4853	/ skip the total fields of pairs /
4854	offset = num_rx + num_tx;
4855
4856	if (queue_type == VIRTNET_Q_TYPE_TX) {
4857	offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / `2`);
4858
4859	num = ARRAY_SIZE(virtnet_sq_stats_desc);
4860	if (drv_stats) {
4861	desc = &virtnet_sq_stats_desc[`0`];
4862	goto drv_stats;
4863	}
4864
4865	offset += num;
4866
4867	} else if (queue_type == VIRTNET_Q_TYPE_RX) {
4868	offset += num_cq + num_rx * (qid / `2`);
4869
4870	num = ARRAY_SIZE(virtnet_rq_stats_desc);
4871	if (drv_stats) {
4872	desc = &virtnet_rq_stats_desc[`0`];
4873	goto drv_stats;
4874	}
4875
4876	offset += num;
4877	}
4878
4879	if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) {
4880	desc = &virtnet_stats_cvq_desc[`0`];
4881	num = ARRAY_SIZE(virtnet_stats_cvq_desc);
4882	if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ)
4883	goto found;
4884
4885	offset += num;
4886	}
4887
4888	if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
4889	desc = &virtnet_stats_rx_basic_desc[`0`];
4890	num = ARRAY_SIZE(virtnet_stats_rx_basic_desc);
4891	if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC)
4892	goto found;
4893
4894	offset += num;
4895	}
4896
4897	if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
4898	desc = &virtnet_stats_rx_csum_desc[`0`];
4899	num = ARRAY_SIZE(virtnet_stats_rx_csum_desc);
4900	if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM)
4901	goto found;
4902
4903	offset += num;
4904	}
4905
4906	if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
4907	desc = &virtnet_stats_rx_speed_desc[`0`];
4908	num = ARRAY_SIZE(virtnet_stats_rx_speed_desc);
4909	if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED)
4910	goto found;
4911
4912	offset += num;
4913	}
4914
4915	if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
4916	desc = &virtnet_stats_tx_basic_desc[`0`];
4917	num = ARRAY_SIZE(virtnet_stats_tx_basic_desc);
4918	if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC)
4919	goto found;
4920
4921	offset += num;
4922	}
4923
4924	if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
4925	desc = &virtnet_stats_tx_gso_desc[`0`];
4926	num = ARRAY_SIZE(virtnet_stats_tx_gso_desc);
4927	if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO)
4928	goto found;
4929
4930	offset += num;
4931	}
4932
4933	if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
4934	desc = &virtnet_stats_tx_speed_desc[`0`];
4935	num = ARRAY_SIZE(virtnet_stats_tx_speed_desc);
4936	if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED)
4937	goto found;
4938
4939	offset += num;
4940	}
4941
4942	return;
4943
4944	found:
4945	for (i = `0`; i < num; ++i) {
4946	v = (const __le64 *)(base + desc[i].offset);
4947	ctx->data[offset + i] = le64_to_cpu(*v);
4948	}
4949
4950	return;
4951
4952	drv_stats:
4953	for (i = `0`; i < num; ++i) {
4954	v_stat = (const u64_stats_t *)(base + desc[i].offset);
4955	ctx->data[offset + i] = u64_stats_read(p: v_stat);
4956	}
4957	}
4958
4959	static int __virtnet_get_hw_stats(struct virtnet_info *vi,
4960	struct virtnet_stats_ctx *ctx,
4961	struct virtio_net_ctrl_queue_stats *req,
4962	int req_size, void reply, int* res_size)
4963	{
4964	struct virtio_net_stats_reply_hdr *hdr;
4965	struct scatterlist sgs_in, sgs_out;
4966	void *p;
4967	u32 qid;
4968	int ok;
4969
4970	sg_init_one(&sgs_out, req, req_size);
4971	sg_init_one(&sgs_in, reply, res_size);
4972
4973	ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS,
4974	VIRTIO_NET_CTRL_STATS_GET,
4975	out: &sgs_out, in: &sgs_in);
4976
4977	if (!ok)
4978	return ok;
4979
4980	for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) {
4981	hdr = p;
4982	qid = le16_to_cpu(hdr->vq_index);
4983	virtnet_fill_stats(vi, qid, ctx, base: p, drv_stats: false, reply_type: hdr->type);
4984	}
4985
4986	return `0`;
4987	}
4988
4989	static void virtnet_make_stat_req(struct virtnet_info *vi,
4990	struct virtnet_stats_ctx *ctx,
4991	struct virtio_net_ctrl_queue_stats *req,
4992	int qid, int *idx)
4993	{
4994	int qtype = vq_type(vi, qid);
4995	u64 bitmap = ctx->bitmap[qtype];
4996
4997	if (!bitmap)
4998	return;
4999
5000	req->stats[*idx].vq_index = cpu_to_le16(qid);
5001	req->stats[*idx].types_bitmap[`0`] = cpu_to_le64(bitmap);
5002	*idx += `1`;
5003	}
5004
5005	/ qid: -1: get stats of all vq.*
5006	* > 0: get the stats for the special vq. This must not be cvq.
5007	*/
5008	static int virtnet_get_hw_stats(struct virtnet_info *vi,
5009	struct virtnet_stats_ctx ctx, int* qid)
5010	{
5011	int qnum, i, j, res_size, qtype, last_vq, first_vq;
5012	struct virtio_net_ctrl_queue_stats *req;
5013	bool enable_cvq;
5014	void *reply;
5015	int ok;
5016
5017	if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_DEVICE_STATS))
5018	return `0`;
5019
5020	if (qid == -`1`) {
5021	last_vq = vi->curr_queue_pairs * `2` - `1`;
5022	first_vq = `0`;
5023	enable_cvq = true;
5024	} else {
5025	last_vq = qid;
5026	first_vq = qid;
5027	enable_cvq = false;
5028	}
5029
5030	qnum = `0`;
5031	res_size = `0`;
5032	for (i = first_vq; i <= last_vq ; ++i) {
5033	qtype = vq_type(vi, qid: i);
5034	if (ctx->bitmap[qtype]) {
5035	++qnum;
5036	res_size += ctx->size[qtype];
5037	}
5038	}
5039
5040	if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) {
5041	res_size += ctx->size[VIRTNET_Q_TYPE_CQ];
5042	qnum += `1`;
5043	}
5044
5045	req = kcalloc(qnum, sizeof(*req), GFP_KERNEL);
5046	if (!req)
5047	return -ENOMEM;
5048
5049	reply = kmalloc(res_size, GFP_KERNEL);
5050	if (!reply) {
5051	kfree(objp: req);
5052	return -ENOMEM;
5053	}
5054
5055	j = `0`;
5056	for (i = first_vq; i <= last_vq ; ++i)
5057	virtnet_make_stat_req(vi, ctx, req, qid: i, idx: &j);
5058
5059	if (enable_cvq)
5060	virtnet_make_stat_req(vi, ctx, req, qid: vi->max_queue_pairs * `2`, idx: &j);
5061
5062	ok = __virtnet_get_hw_stats(vi, ctx, req, req_size: sizeof(req) j, reply, res_size);
5063
5064	kfree(objp: req);
5065	kfree(objp: reply);
5066
5067	return ok;
5068	}
5069
5070	static void virtnet_get_strings(struct net_device dev, u32 stringset, u8 data)
5071	{
5072	struct virtnet_info *vi = netdev_priv(dev);
5073	unsigned int i;
5074	u8 *p = data;
5075
5076	switch (stringset) {
5077	case ETH_SS_STATS:
5078	/ Generate the total field names. /
5079	virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, qid: -`1`, data: &p);
5080	virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, qid: -`1`, data: &p);
5081
5082	virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, qid: `0`, data: &p);
5083
5084	for (i = `0`; i < vi->curr_queue_pairs; ++i)
5085	virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, qid: i, data: &p);
5086
5087	for (i = `0`; i < vi->curr_queue_pairs; ++i)
5088	virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, qid: i, data: &p);
5089	break;
5090	}
5091	}
5092
5093	static int virtnet_get_sset_count(struct net_device dev, int* sset)
5094	{
5095	struct virtnet_info *vi = netdev_priv(dev);
5096	struct virtnet_stats_ctx ctx = {`0`};
5097	u32 pair_count;
5098
5099	switch (sset) {
5100	case ETH_SS_STATS:
5101	virtnet_stats_ctx_init(vi, ctx: &ctx, NULL, to_qstat: false);
5102
5103	pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX];
5104
5105	return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] +
5106	vi->curr_queue_pairs * pair_count;
5107	default:
5108	return -EOPNOTSUPP;
5109	}
5110	}
5111
5112	static void virtnet_get_ethtool_stats(struct net_device *dev,
5113	struct ethtool_stats stats, u64 data)
5114	{
5115	struct virtnet_info *vi = netdev_priv(dev);
5116	struct virtnet_stats_ctx ctx = {`0`};
5117	unsigned int start, i;
5118	const u8 *stats_base;
5119
5120	virtnet_stats_ctx_init(vi, ctx: &ctx, data, to_qstat: false);
5121	if (virtnet_get_hw_stats(vi, ctx: &ctx, qid: -`1`))
5122	dev_warn(&vi->dev->dev, "Failed to get hw stats.\n");
5123
5124	for (i = `0`; i < vi->curr_queue_pairs; i++) {
5125	struct receive_queue *rq = &vi->rq[i];
5126	struct send_queue *sq = &vi->sq[i];
5127
5128	stats_base = (const u8 *)&rq->stats;
5129	do {
5130	start = u64_stats_fetch_begin(syncp: &rq->stats.syncp);
5131	virtnet_fill_stats(vi, qid: i * `2`, ctx: &ctx, base: stats_base, drv_stats: true, reply_type: `0`);
5132	} while (u64_stats_fetch_retry(syncp: &rq->stats.syncp, start));
5133
5134	stats_base = (const u8 *)&sq->stats;
5135	do {
5136	start = u64_stats_fetch_begin(syncp: &sq->stats.syncp);
5137	virtnet_fill_stats(vi, qid: i * `2` + `1`, ctx: &ctx, base: stats_base, drv_stats: true, reply_type: `0`);
5138	} while (u64_stats_fetch_retry(syncp: &sq->stats.syncp, start));
5139	}
5140
5141	virtnet_fill_total_fields(vi, ctx: &ctx);
5142	}
5143
5144	static void virtnet_get_channels(struct net_device *dev,
5145	struct ethtool_channels *channels)
5146	{
5147	struct virtnet_info *vi = netdev_priv(dev);
5148
5149	channels->combined_count = vi->curr_queue_pairs;
5150	channels->max_combined = vi->max_queue_pairs;
5151	channels->max_other = `0`;
5152	channels->rx_count = `0`;
5153	channels->tx_count = `0`;
5154	channels->other_count = `0`;
5155	}
5156
5157	static int virtnet_set_link_ksettings(struct net_device *dev,
5158	const struct ethtool_link_ksettings *cmd)
5159	{
5160	struct virtnet_info *vi = netdev_priv(dev);
5161
5162	return ethtool_virtdev_set_link_ksettings(dev, cmd,
5163	dev_speed: &vi->speed, dev_duplex: &vi->duplex);
5164	}
5165
5166	static int virtnet_get_link_ksettings(struct net_device *dev,
5167	struct ethtool_link_ksettings *cmd)
5168	{
5169	struct virtnet_info *vi = netdev_priv(dev);
5170
5171	cmd->base.speed = vi->speed;
5172	cmd->base.duplex = vi->duplex;
5173	cmd->base.port = PORT_OTHER;
5174
5175	return `0`;
5176	}
5177
5178	static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi,
5179	struct ethtool_coalesce *ec)
5180	{
5181	struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL;
5182	struct scatterlist sgs_tx;
5183	int i;
5184
5185	coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL);
5186	if (!coal_tx)
5187	return -ENOMEM;
5188
5189	coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
5190	coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
5191	sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx));
5192
5193	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
5194	VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
5195	out: &sgs_tx))
5196	return -EINVAL;
5197
5198	vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs;
5199	vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames;
5200	for (i = `0`; i < vi->max_queue_pairs; i++) {
5201	vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs;
5202	vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames;
5203	}
5204
5205	return `0`;
5206	}
5207
5208	static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi,
5209	struct ethtool_coalesce *ec)
5210	{
5211	struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL;
5212	bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce;
5213	struct scatterlist sgs_rx;
5214	int i;
5215
5216	if (rx_ctrl_dim_on && !virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
5217	return -EOPNOTSUPP;
5218
5219	if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs \|\|
5220	ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets))
5221	return -EINVAL;
5222
5223	if (rx_ctrl_dim_on && !vi->rx_dim_enabled) {
5224	vi->rx_dim_enabled = true;
5225	for (i = `0`; i < vi->max_queue_pairs; i++) {
5226	mutex_lock(lock: &vi->rq[i].dim_lock);
5227	vi->rq[i].dim_enabled = true;
5228	mutex_unlock(lock: &vi->rq[i].dim_lock);
5229	}
5230	return `0`;
5231	}
5232
5233	coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL);
5234	if (!coal_rx)
5235	return -ENOMEM;
5236
5237	if (!rx_ctrl_dim_on && vi->rx_dim_enabled) {
5238	vi->rx_dim_enabled = false;
5239	for (i = `0`; i < vi->max_queue_pairs; i++) {
5240	mutex_lock(lock: &vi->rq[i].dim_lock);
5241	vi->rq[i].dim_enabled = false;
5242	mutex_unlock(lock: &vi->rq[i].dim_lock);
5243	}
5244	}
5245
5246	/ Since the per-queue coalescing params can be set,*
5247	* we need apply the global new params even if they
5248	* are not updated.
5249	*/
5250	coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
5251	coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
5252	sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx));
5253
5254	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
5255	VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
5256	out: &sgs_rx))
5257	return -EINVAL;
5258
5259	vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs;
5260	vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames;
5261	for (i = `0`; i < vi->max_queue_pairs; i++) {
5262	mutex_lock(lock: &vi->rq[i].dim_lock);
5263	vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs;
5264	vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames;
5265	mutex_unlock(lock: &vi->rq[i].dim_lock);
5266	}
5267
5268	return `0`;
5269	}
5270
5271	static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
5272	struct ethtool_coalesce *ec)
5273	{
5274	int err;
5275
5276	err = virtnet_send_tx_notf_coal_cmds(vi, ec);
5277	if (err)
5278	return err;
5279
5280	err = virtnet_send_rx_notf_coal_cmds(vi, ec);
5281	if (err)
5282	return err;
5283
5284	return `0`;
5285	}
5286
5287	static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi,
5288	struct ethtool_coalesce *ec,
5289	u16 queue)
5290	{
5291	bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce;
5292	u32 max_usecs, max_packets;
5293	bool cur_rx_dim;
5294	int err;
5295
5296	mutex_lock(lock: &vi->rq[queue].dim_lock);
5297	cur_rx_dim = vi->rq[queue].dim_enabled;
5298	max_usecs = vi->rq[queue].intr_coal.max_usecs;
5299	max_packets = vi->rq[queue].intr_coal.max_packets;
5300
5301	if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs \|\|
5302	ec->rx_max_coalesced_frames != max_packets)) {
5303	mutex_unlock(lock: &vi->rq[queue].dim_lock);
5304	return -EINVAL;
5305	}
5306
5307	if (rx_ctrl_dim_on && !cur_rx_dim) {
5308	vi->rq[queue].dim_enabled = true;
5309	mutex_unlock(lock: &vi->rq[queue].dim_lock);
5310	return `0`;
5311	}
5312
5313	if (!rx_ctrl_dim_on && cur_rx_dim)
5314	vi->rq[queue].dim_enabled = false;
5315
5316	/ If no params are updated, userspace ethtool will*
5317	* reject the modification.
5318	*/
5319	err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue,
5320	max_usecs: ec->rx_coalesce_usecs,
5321	max_packets: ec->rx_max_coalesced_frames);
5322	mutex_unlock(lock: &vi->rq[queue].dim_lock);
5323	return err;
5324	}
5325
5326	static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi,
5327	struct ethtool_coalesce *ec,
5328	u16 queue)
5329	{
5330	int err;
5331
5332	err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue);
5333	if (err)
5334	return err;
5335
5336	err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue,
5337	max_usecs: ec->tx_coalesce_usecs,
5338	max_packets: ec->tx_max_coalesced_frames);
5339	if (err)
5340	return err;
5341
5342	return `0`;
5343	}
5344
5345	static void virtnet_rx_dim_work(struct work_struct *work)
5346	{
5347	struct dim dim = container_of(work, struct* dim, work);
5348	struct receive_queue *rq = container_of(dim,
5349	struct receive_queue, dim);
5350	struct virtnet_info *vi = rq->vq->vdev->priv;
5351	struct net_device *dev = vi->dev;
5352	struct dim_cq_moder update_moder;
5353	int qnum, err;
5354
5355	qnum = rq - vi->rq;
5356
5357	mutex_lock(lock: &rq->dim_lock);
5358	if (!rq->dim_enabled)
5359	goto out;
5360
5361	update_moder = net_dim_get_rx_irq_moder(dev, dim);
5362	if (update_moder.usec != rq->intr_coal.max_usecs \|\|
5363	update_moder.pkts != rq->intr_coal.max_packets) {
5364	err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue: qnum,
5365	max_usecs: update_moder.usec,
5366	max_packets: update_moder.pkts);
5367	if (err)
5368	pr_debug("%s: Failed to send dim parameters on rxq%d\n",
5369	dev->name, qnum);
5370	}
5371	out:
5372	dim->state = DIM_START_MEASURE;
5373	mutex_unlock(lock: &rq->dim_lock);
5374	}
5375
5376	static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
5377	{
5378	/ usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL*
5379	* or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated.
5380	*/
5381	if (ec->rx_coalesce_usecs \|\| ec->tx_coalesce_usecs)
5382	return -EOPNOTSUPP;
5383
5384	if (ec->tx_max_coalesced_frames > `1` \|\|
5385	ec->rx_max_coalesced_frames != `1`)
5386	return -EINVAL;
5387
5388	return `0`;
5389	}
5390
5391	static int virtnet_should_update_vq_weight(int dev_flags, int weight,
5392	int vq_weight, bool *should_update)
5393	{
5394	if (weight ^ vq_weight) {
5395	if (dev_flags & IFF_UP)
5396	return -EBUSY;
5397	*should_update = true;
5398	}
5399
5400	return `0`;
5401	}
5402
5403	static int virtnet_set_coalesce(struct net_device *dev,
5404	struct ethtool_coalesce *ec,
5405	struct kernel_ethtool_coalesce *kernel_coal,
5406	struct netlink_ext_ack *extack)
5407	{
5408	struct virtnet_info *vi = netdev_priv(dev);
5409	int ret, queue_number, napi_weight, i;
5410	bool update_napi = false;
5411
5412	/ Can't change NAPI weight if the link is up /
5413	napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : `0`;
5414	for (queue_number = `0`; queue_number < vi->max_queue_pairs; queue_number++) {
5415	ret = virtnet_should_update_vq_weight(dev_flags: dev->flags, weight: napi_weight,
5416	vq_weight: vi->sq[queue_number].napi.weight,
5417	should_update: &update_napi);
5418	if (ret)
5419	return ret;
5420
5421	if (update_napi) {
5422	/ All queues that belong to [queue_number, vi->max_queue_pairs] will be*
5423	* updated for the sake of simplicity, which might not be necessary
5424	*/
5425	break;
5426	}
5427	}
5428
5429	if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_NOTF_COAL))
5430	ret = virtnet_send_notf_coal_cmds(vi, ec);
5431	else
5432	ret = virtnet_coal_params_supported(ec);
5433
5434	if (ret)
5435	return ret;
5436
5437	if (update_napi) {
5438	/ xsk xmit depends on the tx napi. So if xsk is active,*
5439	* prevent modifications to tx napi.
5440	*/
5441	for (i = queue_number; i < vi->max_queue_pairs; i++) {
5442	if (vi->sq[i].xsk_pool)
5443	return -EBUSY;
5444	}
5445
5446	for (; queue_number < vi->max_queue_pairs; queue_number++)
5447	vi->sq[queue_number].napi.weight = napi_weight;
5448	}
5449
5450	return ret;
5451	}
5452
5453	static int virtnet_get_coalesce(struct net_device *dev,
5454	struct ethtool_coalesce *ec,
5455	struct kernel_ethtool_coalesce *kernel_coal,
5456	struct netlink_ext_ack *extack)
5457	{
5458	struct virtnet_info *vi = netdev_priv(dev);
5459
5460	if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
5461	ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs;
5462	ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs;
5463	ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets;
5464	ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets;
5465	ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled;
5466	} else {
5467	ec->rx_max_coalesced_frames = `1`;
5468
5469	if (vi->sq[`0`].napi.weight)
5470	ec->tx_max_coalesced_frames = `1`;
5471	}
5472
5473	return `0`;
5474	}
5475
5476	static int virtnet_set_per_queue_coalesce(struct net_device *dev,
5477	u32 queue,
5478	struct ethtool_coalesce *ec)
5479	{
5480	struct virtnet_info *vi = netdev_priv(dev);
5481	int ret, napi_weight;
5482	bool update_napi = false;
5483
5484	if (queue >= vi->max_queue_pairs)
5485	return -EINVAL;
5486
5487	/ Can't change NAPI weight if the link is up /
5488	napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : `0`;
5489	ret = virtnet_should_update_vq_weight(dev_flags: dev->flags, weight: napi_weight,
5490	vq_weight: vi->sq[queue].napi.weight,
5491	should_update: &update_napi);
5492	if (ret)
5493	return ret;
5494
5495	if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
5496	ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue);
5497	else
5498	ret = virtnet_coal_params_supported(ec);
5499
5500	if (ret)
5501	return ret;
5502
5503	if (update_napi)
5504	vi->sq[queue].napi.weight = napi_weight;
5505
5506	return `0`;
5507	}
5508
5509	static int virtnet_get_per_queue_coalesce(struct net_device *dev,
5510	u32 queue,
5511	struct ethtool_coalesce *ec)
5512	{
5513	struct virtnet_info *vi = netdev_priv(dev);
5514
5515	if (queue >= vi->max_queue_pairs)
5516	return -EINVAL;
5517
5518	if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) {
5519	mutex_lock(lock: &vi->rq[queue].dim_lock);
5520	ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs;
5521	ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs;
5522	ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets;
5523	ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets;
5524	ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled;
5525	mutex_unlock(lock: &vi->rq[queue].dim_lock);
5526	} else {
5527	ec->rx_max_coalesced_frames = `1`;
5528
5529	if (vi->sq[queue].napi.weight)
5530	ec->tx_max_coalesced_frames = `1`;
5531	}
5532
5533	return `0`;
5534	}
5535
5536	static void virtnet_init_settings(struct net_device *dev)
5537	{
5538	struct virtnet_info *vi = netdev_priv(dev);
5539
5540	vi->speed = SPEED_UNKNOWN;
5541	vi->duplex = DUPLEX_UNKNOWN;
5542	}
5543
5544	static u32 virtnet_get_rxfh_key_size(struct net_device *dev)
5545	{
5546	return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size;
5547	}
5548
5549	static u32 virtnet_get_rxfh_indir_size(struct net_device *dev)
5550	{
5551	return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size;
5552	}
5553
5554	static int virtnet_get_rxfh(struct net_device *dev,
5555	struct ethtool_rxfh_param *rxfh)
5556	{
5557	struct virtnet_info *vi = netdev_priv(dev);
5558	int i;
5559
5560	if (rxfh->indir) {
5561	for (i = `0`; i < vi->rss_indir_table_size; ++i)
5562	rxfh->indir[i] = le16_to_cpu(vi->rss_hdr->indirection_table[i]);
5563	}
5564
5565	if (rxfh->key)
5566	memcpy(to: rxfh->key, from: vi->rss_hash_key_data, len: vi->rss_key_size);
5567
5568	rxfh->hfunc = ETH_RSS_HASH_TOP;
5569
5570	return `0`;
5571	}
5572
5573	static int virtnet_set_rxfh(struct net_device *dev,
5574	struct ethtool_rxfh_param *rxfh,
5575	struct netlink_ext_ack *extack)
5576	{
5577	struct virtnet_info *vi = netdev_priv(dev);
5578	bool update = false;
5579	int i;
5580
5581	if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
5582	rxfh->hfunc != ETH_RSS_HASH_TOP)
5583	return -EOPNOTSUPP;
5584
5585	if (rxfh->indir) {
5586	if (!vi->has_rss)
5587	return -EOPNOTSUPP;
5588
5589	for (i = `0`; i < vi->rss_indir_table_size; ++i)
5590	vi->rss_hdr->indirection_table[i] = cpu_to_le16(rxfh->indir[i]);
5591	update = true;
5592	}
5593
5594	if (rxfh->key) {
5595	/ If either _F_HASH_REPORT or _F_RSS are negotiated, the*
5596	* device provides hash calculation capabilities, that is,
5597	* hash_key is configured.
5598	*/
5599	if (!vi->has_rss && !vi->has_rss_hash_report)
5600	return -EOPNOTSUPP;
5601
5602	memcpy(to: vi->rss_hash_key_data, from: rxfh->key, len: vi->rss_key_size);
5603	update = true;
5604	}
5605
5606	if (update)
5607	virtnet_commit_rss_command(vi);
5608
5609	return `0`;
5610	}
5611
5612	static u32 virtnet_get_rx_ring_count(struct net_device *dev)
5613	{
5614	struct virtnet_info *vi = netdev_priv(dev);
5615
5616	return vi->curr_queue_pairs;
5617	}
5618
5619	static const struct ethtool_ops virtnet_ethtool_ops = {
5620	.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES \|
5621	ETHTOOL_COALESCE_USECS \| ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
5622	.get_drvinfo = virtnet_get_drvinfo,
5623	.get_link = ethtool_op_get_link,
5624	.get_ringparam = virtnet_get_ringparam,
5625	.set_ringparam = virtnet_set_ringparam,
5626	.get_strings = virtnet_get_strings,
5627	.get_sset_count = virtnet_get_sset_count,
5628	.get_ethtool_stats = virtnet_get_ethtool_stats,
5629	.set_channels = virtnet_set_channels,
5630	.get_channels = virtnet_get_channels,
5631	.get_ts_info = ethtool_op_get_ts_info,
5632	.get_link_ksettings = virtnet_get_link_ksettings,
5633	.set_link_ksettings = virtnet_set_link_ksettings,
5634	.set_coalesce = virtnet_set_coalesce,
5635	.get_coalesce = virtnet_get_coalesce,
5636	.set_per_queue_coalesce = virtnet_set_per_queue_coalesce,
5637	.get_per_queue_coalesce = virtnet_get_per_queue_coalesce,
5638	.get_rxfh_key_size = virtnet_get_rxfh_key_size,
5639	.get_rxfh_indir_size = virtnet_get_rxfh_indir_size,
5640	.get_rxfh = virtnet_get_rxfh,
5641	.set_rxfh = virtnet_set_rxfh,
5642	.get_rxfh_fields = virtnet_get_hashflow,
5643	.set_rxfh_fields = virtnet_set_hashflow,
5644	.get_rx_ring_count = virtnet_get_rx_ring_count,
5645	};
5646
5647	static void virtnet_get_queue_stats_rx(struct net_device dev, int* i,
5648	struct netdev_queue_stats_rx *stats)
5649	{
5650	struct virtnet_info *vi = netdev_priv(dev);
5651	struct receive_queue *rq = &vi->rq[i];
5652	struct virtnet_stats_ctx ctx = {`0`};
5653
5654	virtnet_stats_ctx_init(vi, ctx: &ctx, data: (void *)stats, to_qstat: true);
5655
5656	virtnet_get_hw_stats(vi, ctx: &ctx, qid: i * `2`);
5657	virtnet_fill_stats(vi, qid: i * `2`, ctx: &ctx, base: (void *)&rq->stats, drv_stats: true, reply_type: `0`);
5658	}
5659
5660	static void virtnet_get_queue_stats_tx(struct net_device dev, int* i,
5661	struct netdev_queue_stats_tx *stats)
5662	{
5663	struct virtnet_info *vi = netdev_priv(dev);
5664	struct send_queue *sq = &vi->sq[i];
5665	struct virtnet_stats_ctx ctx = {`0`};
5666
5667	virtnet_stats_ctx_init(vi, ctx: &ctx, data: (void *)stats, to_qstat: true);
5668
5669	virtnet_get_hw_stats(vi, ctx: &ctx, qid: i * `2` + `1`);
5670	virtnet_fill_stats(vi, qid: i * `2` + `1`, ctx: &ctx, base: (void *)&sq->stats, drv_stats: true, reply_type: `0`);
5671	}
5672
5673	static void virtnet_get_base_stats(struct net_device *dev,
5674	struct netdev_queue_stats_rx *rx,
5675	struct netdev_queue_stats_tx *tx)
5676	{
5677	struct virtnet_info *vi = netdev_priv(dev);
5678
5679	/ The queue stats of the virtio-net will not be reset. So here we*
5680	* return 0.
5681	*/
5682	rx->bytes = `0`;
5683	rx->packets = `0`;
5684
5685	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
5686	rx->hw_drops = `0`;
5687	rx->hw_drop_overruns = `0`;
5688	}
5689
5690	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
5691	rx->csum_unnecessary = `0`;
5692	rx->csum_none = `0`;
5693	rx->csum_bad = `0`;
5694	}
5695
5696	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) {
5697	rx->hw_gro_packets = `0`;
5698	rx->hw_gro_bytes = `0`;
5699	rx->hw_gro_wire_packets = `0`;
5700	rx->hw_gro_wire_bytes = `0`;
5701	}
5702
5703	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED)
5704	rx->hw_drop_ratelimits = `0`;
5705
5706	tx->bytes = `0`;
5707	tx->packets = `0`;
5708	tx->stop = `0`;
5709	tx->wake = `0`;
5710
5711	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
5712	tx->hw_drops = `0`;
5713	tx->hw_drop_errors = `0`;
5714	}
5715
5716	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) {
5717	tx->csum_none = `0`;
5718	tx->needs_csum = `0`;
5719	}
5720
5721	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
5722	tx->hw_gso_packets = `0`;
5723	tx->hw_gso_bytes = `0`;
5724	tx->hw_gso_wire_packets = `0`;
5725	tx->hw_gso_wire_bytes = `0`;
5726	}
5727
5728	if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED)
5729	tx->hw_drop_ratelimits = `0`;
5730
5731	netdev_stat_queue_sum(netdev: dev,
5732	rx_start: dev->real_num_rx_queues, rx_end: vi->max_queue_pairs, rx_sum: rx,
5733	tx_start: dev->real_num_tx_queues, tx_end: vi->max_queue_pairs, tx_sum: tx);
5734	}
5735
5736	static const struct netdev_stat_ops virtnet_stat_ops = {
5737	.get_queue_stats_rx = virtnet_get_queue_stats_rx,
5738	.get_queue_stats_tx = virtnet_get_queue_stats_tx,
5739	.get_base_stats = virtnet_get_base_stats,
5740	};
5741
5742	static void virtnet_freeze_down(struct virtio_device *vdev)
5743	{
5744	struct virtnet_info *vi = vdev->priv;
5745
5746	/ Make sure no work handler is accessing the device /
5747	flush_work(work: &vi->config_work);
5748	disable_rx_mode_work(vi);
5749	flush_work(work: &vi->rx_mode_work);
5750
5751	if (netif_running(dev: vi->dev)) {
5752	rtnl_lock();
5753	virtnet_close(dev: vi->dev);
5754	rtnl_unlock();
5755	}
5756
5757	netif_tx_lock_bh(dev: vi->dev);
5758	netif_device_detach(dev: vi->dev);
5759	netif_tx_unlock_bh(dev: vi->dev);
5760	}
5761
5762	static int init_vqs(struct virtnet_info *vi);
5763
5764	static int virtnet_restore_up(struct virtio_device *vdev)
5765	{
5766	struct virtnet_info *vi = vdev->priv;
5767	int err;
5768
5769	err = init_vqs(vi);
5770	if (err)
5771	return err;
5772
5773	virtio_device_ready(dev: vdev);
5774
5775	enable_delayed_refill(vi);
5776	enable_rx_mode_work(vi);
5777
5778	if (netif_running(dev: vi->dev)) {
5779	rtnl_lock();
5780	err = virtnet_open(dev: vi->dev);
5781	rtnl_unlock();
5782	if (err)
5783	return err;
5784	}
5785
5786	netif_tx_lock_bh(dev: vi->dev);
5787	netif_device_attach(dev: vi->dev);
5788	netif_tx_unlock_bh(dev: vi->dev);
5789	return err;
5790	}
5791
5792	static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
5793	{
5794	__virtio64 *_offloads __free(kfree) = NULL;
5795	struct scatterlist sg;
5796
5797	_offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL);
5798	if (!_offloads)
5799	return -ENOMEM;
5800
5801	*_offloads = cpu_to_virtio64(vdev: vi->vdev, val: offloads);
5802
5803	sg_init_one(&sg, _offloads, sizeof(*_offloads));
5804
5805	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
5806	VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, out: &sg)) {
5807	dev_warn(&vi->dev->dev, "Fail to set guest offload.\n");
5808	return -EINVAL;
5809	}
5810
5811	return `0`;
5812	}
5813
5814	static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
5815	{
5816	u64 offloads = `0`;
5817
5818	if (!vi->guest_offloads)
5819	return `0`;
5820
5821	return virtnet_set_guest_offloads(vi, offloads);
5822	}
5823
5824	static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
5825	{
5826	u64 offloads = vi->guest_offloads;
5827
5828	if (!vi->guest_offloads)
5829	return `0`;
5830
5831	return virtnet_set_guest_offloads(vi, offloads);
5832	}
5833
5834	static int virtnet_rq_bind_xsk_pool(struct virtnet_info vi, struct* receive_queue *rq,
5835	struct xsk_buff_pool *pool)
5836	{
5837	int err, qindex;
5838
5839	qindex = rq - vi->rq;
5840
5841	if (pool) {
5842	err = xdp_rxq_info_reg(xdp_rxq: &rq->xsk_rxq_info, dev: vi->dev, queue_index: qindex, napi_id: rq->napi.napi_id);
5843	if (err < `0`)
5844	return err;
5845
5846	err = xdp_rxq_info_reg_mem_model(xdp_rxq: &rq->xsk_rxq_info,
5847	type: MEM_TYPE_XSK_BUFF_POOL, NULL);
5848	if (err < `0`)
5849	goto unreg;
5850
5851	xsk_pool_set_rxq_info(pool, rxq: &rq->xsk_rxq_info);
5852	}
5853
5854	virtnet_rx_pause(vi, rq);
5855
5856	err = virtqueue_reset(vq: rq->vq, recycle: virtnet_rq_unmap_free_buf, NULL);
5857	if (err) {
5858	netdev_err(dev: vi->dev, format: "reset rx fail: rx queue index: %d err: %d\n", qindex, err);
5859
5860	pool = NULL;
5861	}
5862
5863	rq->xsk_pool = pool;
5864
5865	virtnet_rx_resume(vi, rq);
5866
5867	if (pool)
5868	return `0`;
5869
5870	unreg:
5871	xdp_rxq_info_unreg(xdp_rxq: &rq->xsk_rxq_info);
5872	return err;
5873	}
5874
5875	static int virtnet_sq_bind_xsk_pool(struct virtnet_info *vi,
5876	struct send_queue *sq,
5877	struct xsk_buff_pool *pool)
5878	{
5879	int err, qindex;
5880
5881	qindex = sq - vi->sq;
5882
5883	virtnet_tx_pause(vi, sq);
5884
5885	err = virtqueue_reset(vq: sq->vq, recycle: virtnet_sq_free_unused_buf,
5886	recycle_done: virtnet_sq_free_unused_buf_done);
5887	if (err) {
5888	netdev_err(dev: vi->dev, format: "reset tx fail: tx queue index: %d err: %d\n", qindex, err);
5889	pool = NULL;
5890	}
5891
5892	sq->xsk_pool = pool;
5893
5894	virtnet_tx_resume(vi, sq);
5895
5896	return err;
5897	}
5898
5899	static int virtnet_xsk_pool_enable(struct net_device *dev,
5900	struct xsk_buff_pool *pool,
5901	u16 qid)
5902	{
5903	struct virtnet_info *vi = netdev_priv(dev);
5904	struct receive_queue *rq;
5905	struct device *dma_dev;
5906	struct send_queue *sq;
5907	dma_addr_t hdr_dma;
5908	int err, size;
5909
5910	if (vi->hdr_len > xsk_pool_get_headroom(pool))
5911	return -EINVAL;
5912
5913	/ In big_packets mode, xdp cannot work, so there is no need to*
5914	* initialize xsk of rq.
5915	*/
5916	if (vi->big_packets && !vi->mergeable_rx_bufs)
5917	return -ENOENT;
5918
5919	if (qid >= vi->curr_queue_pairs)
5920	return -EINVAL;
5921
5922	sq = &vi->sq[qid];
5923	rq = &vi->rq[qid];
5924
5925	/ xsk assumes that tx and rx must have the same dma device. The af-xdp*
5926	* may use one buffer to receive from the rx and reuse this buffer to
5927	* send by the tx. So the dma dev of sq and rq must be the same one.
5928	*
5929	* But vq->dma_dev allows every vq has the respective dma dev. So I
5930	* check the dma dev of vq and sq is the same dev.
5931	*/
5932	if (virtqueue_dma_dev(vq: rq->vq) != virtqueue_dma_dev(vq: sq->vq))
5933	return -EINVAL;
5934
5935	dma_dev = virtqueue_dma_dev(vq: rq->vq);
5936	if (!dma_dev)
5937	return -EINVAL;
5938
5939	size = virtqueue_get_vring_size(vq: rq->vq);
5940
5941	rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL);
5942	if (!rq->xsk_buffs)
5943	return -ENOMEM;
5944
5945	hdr_dma = virtqueue_map_single_attrs(vq: sq->vq, ptr: &xsk_hdr, size: vi->hdr_len,
5946	dir: DMA_TO_DEVICE, attrs: `0`);
5947	if (virtqueue_map_mapping_error(vq: sq->vq, addr: hdr_dma)) {
5948	err = -ENOMEM;
5949	goto err_free_buffs;
5950	}
5951
5952	err = xsk_pool_dma_map(pool, dev: dma_dev, attrs: `0`);
5953	if (err)
5954	goto err_xsk_map;
5955
5956	err = virtnet_rq_bind_xsk_pool(vi, rq, pool);
5957	if (err)
5958	goto err_rq;
5959
5960	err = virtnet_sq_bind_xsk_pool(vi, sq, pool);
5961	if (err)
5962	goto err_sq;
5963
5964	/ Now, we do not support tx offload(such as tx csum), so all the tx*
5965	* virtnet hdr is zero. So all the tx packets can share a single hdr.
5966	*/
5967	sq->xsk_hdr_dma_addr = hdr_dma;
5968
5969	return `0`;
5970
5971	err_sq:
5972	virtnet_rq_bind_xsk_pool(vi, rq, NULL);
5973	err_rq:
5974	xsk_pool_dma_unmap(pool, attrs: `0`);
5975	err_xsk_map:
5976	virtqueue_unmap_single_attrs(vq: rq->vq, addr: hdr_dma, size: vi->hdr_len,
5977	dir: DMA_TO_DEVICE, attrs: `0`);
5978	err_free_buffs:
5979	kvfree(addr: rq->xsk_buffs);
5980	return err;
5981	}
5982
5983	static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid)
5984	{
5985	struct virtnet_info *vi = netdev_priv(dev);
5986	struct xsk_buff_pool *pool;
5987	struct receive_queue *rq;
5988	struct send_queue *sq;
5989	int err;
5990
5991	if (qid >= vi->curr_queue_pairs)
5992	return -EINVAL;
5993
5994	sq = &vi->sq[qid];
5995	rq = &vi->rq[qid];
5996
5997	pool = rq->xsk_pool;
5998
5999	err = virtnet_rq_bind_xsk_pool(vi, rq, NULL);
6000	err \|= virtnet_sq_bind_xsk_pool(vi, sq, NULL);
6001
6002	xsk_pool_dma_unmap(pool, attrs: `0`);
6003
6004	virtqueue_unmap_single_attrs(vq: sq->vq, addr: sq->xsk_hdr_dma_addr,
6005	size: vi->hdr_len, dir: DMA_TO_DEVICE, attrs: `0`);
6006	kvfree(addr: rq->xsk_buffs);
6007
6008	return err;
6009	}
6010
6011	static int virtnet_xsk_pool_setup(struct net_device dev, struct* netdev_bpf *xdp)
6012	{
6013	if (xdp->xsk.pool)
6014	return virtnet_xsk_pool_enable(dev, pool: xdp->xsk.pool,
6015	qid: xdp->xsk.queue_id);
6016	else
6017	return virtnet_xsk_pool_disable(dev, qid: xdp->xsk.queue_id);
6018	}
6019
6020	static int virtnet_xdp_set(struct net_device dev, struct* bpf_prog *prog,
6021	struct netlink_ext_ack *extack)
6022	{
6023	unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM +
6024	sizeof(struct skb_shared_info));
6025	unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN;
6026	struct virtnet_info *vi = netdev_priv(dev);
6027	struct bpf_prog *old_prog;
6028	u16 xdp_qp = `0`, curr_qp;
6029	int i, err;
6030
6031	if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
6032	&& (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO4) \|\|
6033	virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO6) \|\|
6034	virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_ECN) \|\|
6035	virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_UFO) \|\|
6036	virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_CSUM) \|\|
6037	virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO4) \|\|
6038	virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO6))) {
6039	NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
6040	return -EOPNOTSUPP;
6041	}
6042
6043	if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
6044	NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required");
6045	return -EINVAL;
6046	}
6047
6048	if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) {
6049	NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags");
6050	netdev_warn(dev, format: "single-buffer XDP requires MTU less than %u\n", max_sz);
6051	return -EINVAL;
6052	}
6053
6054	curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
6055	if (prog)
6056	xdp_qp = nr_cpu_ids;
6057
6058	/ XDP requires extra queues for XDP_TX /
6059	if (curr_qp + xdp_qp > vi->max_queue_pairs) {
6060	netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
6061	curr_qp + xdp_qp, vi->max_queue_pairs);
6062	xdp_qp = `0`;
6063	}
6064
6065	old_prog = rtnl_dereference(vi->rq[`0`].xdp_prog);
6066	if (!prog && !old_prog)
6067	return `0`;
6068
6069	if (prog)
6070	bpf_prog_add(prog, i: vi->max_queue_pairs - `1`);
6071
6072	virtnet_rx_pause_all(vi);
6073
6074	/ Make sure NAPI is not using any XDP TX queues for RX. /
6075	if (netif_running(dev)) {
6076	for (i = `0`; i < vi->max_queue_pairs; i++)
6077	virtnet_napi_tx_disable(sq: &vi->sq[i]);
6078	}
6079
6080	if (!prog) {
6081	for (i = `0`; i < vi->max_queue_pairs; i++) {
6082	rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
6083	if (i == `0`)
6084	virtnet_restore_guest_offloads(vi);
6085	}
6086	synchronize_net();
6087	}
6088
6089	err = virtnet_set_queues(vi, queue_pairs: curr_qp + xdp_qp);
6090	if (err)
6091	goto err;
6092	netif_set_real_num_rx_queues(dev, rxq: curr_qp + xdp_qp);
6093	vi->xdp_queue_pairs = xdp_qp;
6094
6095	if (prog) {
6096	vi->xdp_enabled = true;
6097	for (i = `0`; i < vi->max_queue_pairs; i++) {
6098	rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
6099	if (i == `0` && !old_prog)
6100	virtnet_clear_guest_offloads(vi);
6101	}
6102	if (!old_prog)
6103	xdp_features_set_redirect_target(dev, support_sg: true);
6104	} else {
6105	xdp_features_clear_redirect_target(dev);
6106	vi->xdp_enabled = false;
6107	}
6108
6109	virtnet_rx_resume_all(vi);
6110	for (i = `0`; i < vi->max_queue_pairs; i++) {
6111	if (old_prog)
6112	bpf_prog_put(prog: old_prog);
6113	if (netif_running(dev))
6114	virtnet_napi_tx_enable(sq: &vi->sq[i]);
6115	}
6116
6117	return `0`;
6118
6119	err:
6120	if (!prog) {
6121	virtnet_clear_guest_offloads(vi);
6122	for (i = `0`; i < vi->max_queue_pairs; i++)
6123	rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
6124	}
6125
6126	virtnet_rx_resume_all(vi);
6127	if (netif_running(dev)) {
6128	for (i = `0`; i < vi->max_queue_pairs; i++)
6129	virtnet_napi_tx_enable(sq: &vi->sq[i]);
6130	}
6131	if (prog)
6132	bpf_prog_sub(prog, i: vi->max_queue_pairs - `1`);
6133	return err;
6134	}
6135
6136	static int virtnet_xdp(struct net_device dev, struct* netdev_bpf *xdp)
6137	{
6138	switch (xdp->command) {
6139	case XDP_SETUP_PROG:
6140	return virtnet_xdp_set(dev, prog: xdp->prog, extack: xdp->extack);
6141	case XDP_SETUP_XSK_POOL:
6142	return virtnet_xsk_pool_setup(dev, xdp);
6143	default:
6144	return -EINVAL;
6145	}
6146	}
6147
6148	static int virtnet_get_phys_port_name(struct net_device dev, char* *buf,
6149	size_t len)
6150	{
6151	struct virtnet_info *vi = netdev_priv(dev);
6152	int ret;
6153
6154	if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STANDBY))
6155	return -EOPNOTSUPP;
6156
6157	ret = snprintf(buf, size: len, fmt: "sby");
6158	if (ret >= len)
6159	return -EOPNOTSUPP;
6160
6161	return `0`;
6162	}
6163
6164	static int virtnet_set_features(struct net_device *dev,
6165	netdev_features_t features)
6166	{
6167	struct virtnet_info *vi = netdev_priv(dev);
6168	u64 offloads;
6169	int err;
6170
6171	if ((dev->features ^ features) & NETIF_F_GRO_HW) {
6172	if (vi->xdp_enabled)
6173	return -EBUSY;
6174
6175	if (features & NETIF_F_GRO_HW)
6176	offloads = vi->guest_offloads_capable;
6177	else
6178	offloads = vi->guest_offloads_capable &
6179	~GUEST_OFFLOAD_GRO_HW_MASK;
6180
6181	err = virtnet_set_guest_offloads(vi, offloads);
6182	if (err)
6183	return err;
6184	vi->guest_offloads = offloads;
6185	}
6186
6187	if ((dev->features ^ features) & NETIF_F_RXHASH) {
6188	if (features & NETIF_F_RXHASH)
6189	vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved);
6190	else
6191	vi->rss_hdr->hash_types = cpu_to_le32(VIRTIO_NET_HASH_REPORT_NONE);
6192
6193	if (!virtnet_commit_rss_command(vi))
6194	return -EINVAL;
6195	}
6196
6197	return `0`;
6198	}
6199
6200	static void virtnet_tx_timeout(struct net_device dev, unsigned* int txqueue)
6201	{
6202	struct virtnet_info *priv = netdev_priv(dev);
6203	struct send_queue *sq = &priv->sq[txqueue];
6204	struct netdev_queue *txq = netdev_get_tx_queue(dev, index: txqueue);
6205
6206	u64_stats_update_begin(syncp: &sq->stats.syncp);
6207	u64_stats_inc(p: &sq->stats.tx_timeouts);
6208	u64_stats_update_end(syncp: &sq->stats.syncp);
6209
6210	netdev_err(dev, format: "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n",
6211	txqueue, sq->name, sq->vq->index, sq->vq->name,
6212	jiffies_to_usecs(j: jiffies - READ_ONCE(txq->trans_start)));
6213	}
6214
6215	static int virtnet_init_irq_moder(struct virtnet_info *vi)
6216	{
6217	u8 profile_flags = `0`, coal_flags = `0`;
6218	int ret, i;
6219
6220	profile_flags \|= DIM_PROFILE_RX;
6221	coal_flags \|= DIM_COALESCE_USEC \| DIM_COALESCE_PKTS;
6222	ret = net_dim_init_irq_moder(dev: vi->dev, profile_flags, coal_flags,
6223	rx_mode: DIM_CQ_PERIOD_MODE_START_FROM_EQE,
6224	tx_mode: `0`, rx_dim_work: virtnet_rx_dim_work, NULL);
6225
6226	if (ret)
6227	return ret;
6228
6229	for (i = `0`; i < vi->max_queue_pairs; i++)
6230	net_dim_setting(dev: vi->dev, dim: &vi->rq[i].dim, is_tx: false);
6231
6232	return `0`;
6233	}
6234
6235	static void virtnet_free_irq_moder(struct virtnet_info *vi)
6236	{
6237	if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
6238	return;
6239
6240	rtnl_lock();
6241	net_dim_free_irq_moder(dev: vi->dev);
6242	rtnl_unlock();
6243	}
6244
6245	static const struct net_device_ops virtnet_netdev = {
6246	.ndo_open = virtnet_open,
6247	.ndo_stop = virtnet_close,
6248	.ndo_start_xmit = start_xmit,
6249	.ndo_validate_addr = eth_validate_addr,
6250	.ndo_set_mac_address = virtnet_set_mac_address,
6251	.ndo_set_rx_mode = virtnet_set_rx_mode,
6252	.ndo_get_stats64 = virtnet_stats,
6253	.ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
6254	.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
6255	.ndo_bpf = virtnet_xdp,
6256	.ndo_xdp_xmit = virtnet_xdp_xmit,
6257	.ndo_xsk_wakeup = virtnet_xsk_wakeup,
6258	.ndo_features_check = passthru_features_check,
6259	.ndo_get_phys_port_name = virtnet_get_phys_port_name,
6260	.ndo_set_features = virtnet_set_features,
6261	.ndo_tx_timeout = virtnet_tx_timeout,
6262	};
6263
6264	static void virtnet_config_changed_work(struct work_struct *work)
6265	{
6266	struct virtnet_info *vi =
6267	container_of(work, struct virtnet_info, config_work);
6268	u16 v;
6269
6270	if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
6271	struct virtio_net_config, status, &v) < `0`)
6272	return;
6273
6274	if (v & VIRTIO_NET_S_ANNOUNCE) {
6275	netdev_notify_peers(dev: vi->dev);
6276	virtnet_ack_link_announce(vi);
6277	}
6278
6279	/ Ignore unknown (future) status bits /
6280	v &= VIRTIO_NET_S_LINK_UP;
6281
6282	if (vi->status == v)
6283	return;
6284
6285	vi->status = v;
6286
6287	if (vi->status & VIRTIO_NET_S_LINK_UP) {
6288	virtnet_update_settings(vi);
6289	netif_carrier_on(dev: vi->dev);
6290	netif_tx_wake_all_queues(dev: vi->dev);
6291	} else {
6292	netif_carrier_off(dev: vi->dev);
6293	netif_tx_stop_all_queues(dev: vi->dev);
6294	}
6295	}
6296
6297	static void virtnet_config_changed(struct virtio_device *vdev)
6298	{
6299	struct virtnet_info *vi = vdev->priv;
6300
6301	schedule_work(work: &vi->config_work);
6302	}
6303
6304	static void virtnet_free_queues(struct virtnet_info *vi)
6305	{
6306	int i;
6307
6308	for (i = `0`; i < vi->max_queue_pairs; i++) {
6309	__netif_napi_del(napi: &vi->rq[i].napi);
6310	__netif_napi_del(napi: &vi->sq[i].napi);
6311	}
6312
6313	/ We called __netif_napi_del(),*
6314	* we need to respect an RCU grace period before freeing vi->rq
6315	*/
6316	synchronize_net();
6317
6318	kfree(objp: vi->rq);
6319	kfree(objp: vi->sq);
6320	kfree(objp: vi->ctrl);
6321	}
6322
6323	static void _free_receive_bufs(struct virtnet_info *vi)
6324	{
6325	struct bpf_prog *old_prog;
6326	int i;
6327
6328	for (i = `0`; i < vi->max_queue_pairs; i++) {
6329	while (vi->rq[i].pages)
6330	__free_pages(page: get_a_page(rq: &vi->rq[i], GFP_KERNEL), order: `0`);
6331
6332	old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
6333	RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
6334	if (old_prog)
6335	bpf_prog_put(prog: old_prog);
6336	}
6337	}
6338
6339	static void free_receive_bufs(struct virtnet_info *vi)
6340	{
6341	rtnl_lock();
6342	_free_receive_bufs(vi);
6343	rtnl_unlock();
6344	}
6345
6346	static void free_receive_page_frags(struct virtnet_info *vi)
6347	{
6348	int i;
6349	for (i = `0`; i < vi->max_queue_pairs; i++)
6350	if (vi->rq[i].alloc_frag.page) {
6351	if (vi->rq[i].last_dma)
6352	virtnet_rq_unmap(rq: &vi->rq[i], buf: vi->rq[i].last_dma, len: `0`);
6353	put_page(page: vi->rq[i].alloc_frag.page);
6354	}
6355	}
6356
6357	static void virtnet_sq_free_unused_buf(struct virtqueue vq, void* *buf)
6358	{
6359	struct virtnet_info *vi = vq->vdev->priv;
6360	struct send_queue *sq;
6361	int i = vq2txq(vq);
6362
6363	sq = &vi->sq[i];
6364
6365	switch (virtnet_xmit_ptr_unpack(ptr: &buf)) {
6366	case VIRTNET_XMIT_TYPE_SKB:
6367	case VIRTNET_XMIT_TYPE_SKB_ORPHAN:
6368	dev_kfree_skb(buf);
6369	break;
6370
6371	case VIRTNET_XMIT_TYPE_XDP:
6372	xdp_return_frame(xdpf: buf);
6373	break;
6374
6375	case VIRTNET_XMIT_TYPE_XSK:
6376	xsk_tx_completed(pool: sq->xsk_pool, nb_entries: `1`);
6377	break;
6378	}
6379	}
6380
6381	static void virtnet_sq_free_unused_buf_done(struct virtqueue *vq)
6382	{
6383	struct virtnet_info *vi = vq->vdev->priv;
6384	int i = vq2txq(vq);
6385
6386	netdev_tx_reset_queue(q: netdev_get_tx_queue(dev: vi->dev, index: i));
6387	}
6388
6389	static void free_unused_bufs(struct virtnet_info *vi)
6390	{
6391	void *buf;
6392	int i;
6393
6394	for (i = `0`; i < vi->max_queue_pairs; i++) {
6395	struct virtqueue *vq = vi->sq[i].vq;
6396	while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
6397	virtnet_sq_free_unused_buf(vq, buf);
6398	cond_resched();
6399	}
6400
6401	for (i = `0`; i < vi->max_queue_pairs; i++) {
6402	struct virtqueue *vq = vi->rq[i].vq;
6403
6404	while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
6405	virtnet_rq_unmap_free_buf(vq, buf);
6406	cond_resched();
6407	}
6408	}
6409
6410	static void virtnet_del_vqs(struct virtnet_info *vi)
6411	{
6412	struct virtio_device *vdev = vi->vdev;
6413
6414	virtnet_clean_affinity(vi);
6415
6416	vdev->config->del_vqs(vdev);
6417
6418	virtnet_free_queues(vi);
6419	}
6420
6421	/ How large should a single buffer be so a queue full of these can fit at*
6422	* least one full packet?
6423	* Logic below assumes the mergeable buffer header is used.
6424	*/
6425	static unsigned int mergeable_min_buf_len(struct virtnet_info vi, struct* virtqueue *vq)
6426	{
6427	const unsigned int hdr_len = vi->hdr_len;
6428	unsigned int rq_size = virtqueue_get_vring_size(vq);
6429	unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
6430	unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
6431	unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
6432
6433	return max(max(min_buf_len, hdr_len) - hdr_len,
6434	(unsigned int)GOOD_PACKET_LEN);
6435	}
6436
6437	static int virtnet_find_vqs(struct virtnet_info *vi)
6438	{
6439	struct virtqueue_info *vqs_info;
6440	struct virtqueue **vqs;
6441	int ret = -ENOMEM;
6442	int total_vqs;
6443	bool *ctx;
6444	u16 i;
6445
6446	/ We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by*
6447	* possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
6448	* possible control vq.
6449	*/
6450	total_vqs = vi->max_queue_pairs * `2` +
6451	virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_VQ);
6452
6453	/ Allocate space for find_vqs parameters /
6454	vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL);
6455	if (!vqs)
6456	goto err_vq;
6457	vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL);
6458	if (!vqs_info)
6459	goto err_vqs_info;
6460	if (!vi->big_packets \|\| vi->mergeable_rx_bufs) {
6461	ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL);
6462	if (!ctx)
6463	goto err_ctx;
6464	} else {
6465	ctx = NULL;
6466	}
6467
6468	/ Parameters for control virtqueue, if any /
6469	if (vi->has_cvq) {
6470	vqs_info[total_vqs - `1`].name = "control";
6471	}
6472
6473	/ Allocate/initialize parameters for send/receive virtqueues /
6474	for (i = `0`; i < vi->max_queue_pairs; i++) {
6475	vqs_info[rxq2vq(rxq: i)].callback = skb_recv_done;
6476	vqs_info[txq2vq(txq: i)].callback = skb_xmit_done;
6477	sprintf(buf: vi->rq[i].name, fmt: "input.%u", i);
6478	sprintf(buf: vi->sq[i].name, fmt: "output.%u", i);
6479	vqs_info[rxq2vq(rxq: i)].name = vi->rq[i].name;
6480	vqs_info[txq2vq(txq: i)].name = vi->sq[i].name;
6481	if (ctx)
6482	vqs_info[rxq2vq(rxq: i)].ctx = true;
6483	}
6484
6485	ret = virtio_find_vqs(vdev: vi->vdev, nvqs: total_vqs, vqs, vqs_info, NULL);
6486	if (ret)
6487	goto err_find;
6488
6489	if (vi->has_cvq) {
6490	vi->cvq = vqs[total_vqs - `1`];
6491	if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
6492	vi->dev->features \|= NETIF_F_HW_VLAN_CTAG_FILTER;
6493	}
6494
6495	for (i = `0`; i < vi->max_queue_pairs; i++) {
6496	vi->rq[i].vq = vqs[rxq2vq(rxq: i)];
6497	vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vq: vi->rq[i].vq);
6498	vi->sq[i].vq = vqs[txq2vq(txq: i)];
6499	}
6500
6501	/ run here: ret == 0. /
6502
6503
6504	err_find:
6505	kfree(objp: ctx);
6506	err_ctx:
6507	kfree(objp: vqs_info);
6508	err_vqs_info:
6509	kfree(objp: vqs);
6510	err_vq:
6511	return ret;
6512	}
6513
6514	static int virtnet_alloc_queues(struct virtnet_info *vi)
6515	{
6516	int i;
6517
6518	if (vi->has_cvq) {
6519	vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
6520	if (!vi->ctrl)
6521	goto err_ctrl;
6522	} else {
6523	vi->ctrl = NULL;
6524	}
6525	vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL);
6526	if (!vi->sq)
6527	goto err_sq;
6528	vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL);
6529	if (!vi->rq)
6530	goto err_rq;
6531
6532	INIT_DELAYED_WORK(&vi->refill, refill_work);
6533	for (i = `0`; i < vi->max_queue_pairs; i++) {
6534	vi->rq[i].pages = NULL;
6535	netif_napi_add_config(dev: vi->dev, napi: &vi->rq[i].napi, poll: virtnet_poll,
6536	index: i);
6537	vi->rq[i].napi.weight = napi_weight;
6538	netif_napi_add_tx_weight(dev: vi->dev, napi: &vi->sq[i].napi,
6539	poll: virtnet_poll_tx,
6540	weight: napi_tx ? napi_weight : `0`);
6541
6542	sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
6543	ewma_pkt_len_init(e: &vi->rq[i].mrg_avg_pkt_len);
6544	sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
6545
6546	u64_stats_init(syncp: &vi->rq[i].stats.syncp);
6547	u64_stats_init(syncp: &vi->sq[i].stats.syncp);
6548	mutex_init(&vi->rq[i].dim_lock);
6549	}
6550
6551	return `0`;
6552
6553	err_rq:
6554	kfree(objp: vi->sq);
6555	err_sq:
6556	kfree(objp: vi->ctrl);
6557	err_ctrl:
6558	return -ENOMEM;
6559	}
6560
6561	static int init_vqs(struct virtnet_info *vi)
6562	{
6563	int ret;
6564
6565	/ Allocate send & receive queues /
6566	ret = virtnet_alloc_queues(vi);
6567	if (ret)
6568	goto err;
6569
6570	ret = virtnet_find_vqs(vi);
6571	if (ret)
6572	goto err_free;
6573
6574	cpus_read_lock();
6575	virtnet_set_affinity(vi);
6576	cpus_read_unlock();
6577
6578	return `0`;
6579
6580	err_free:
6581	virtnet_free_queues(vi);
6582	err:
6583	return ret;
6584	}
6585
6586	#ifdef CONFIG_SYSFS
6587	static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
6588	char *buf)
6589	{
6590	struct virtnet_info *vi = netdev_priv(dev: queue->dev);
6591	unsigned int queue_index = get_netdev_rx_queue_index(queue);
6592	unsigned int headroom = virtnet_get_headroom(vi);
6593	unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : `0`;
6594	struct ewma_pkt_len *avg;
6595
6596	BUG_ON(queue_index >= vi->max_queue_pairs);
6597	avg = &vi->rq[queue_index].mrg_avg_pkt_len;
6598	return sprintf(buf, fmt: "%u\n",
6599	get_mergeable_buf_len(rq: &vi->rq[queue_index], avg_pkt_len: avg,
6600	SKB_DATA_ALIGN(headroom + tailroom)));
6601	}
6602
6603	static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
6604	__ATTR_RO(mergeable_rx_buffer_size);
6605
6606	static struct attribute *virtio_net_mrg_rx_attrs[] = {
6607	&mergeable_rx_buffer_size_attribute.attr,
6608	NULL
6609	};
6610
6611	static const struct attribute_group virtio_net_mrg_rx_group = {
6612	.name = "virtio_net",
6613	.attrs = virtio_net_mrg_rx_attrs
6614	};
6615	#endif
6616
6617	static bool virtnet_fail_on_feature(struct virtio_device *vdev,
6618	unsigned int fbit,
6619	const char fname, const* char *dname)
6620	{
6621	if (!virtio_has_feature(vdev, fbit))
6622	return false;
6623
6624	dev_err(&vdev->dev, "device advertises feature %s but not %s",
6625	fname, dname);
6626
6627	return true;
6628	}
6629
6630	#define VIRTNET_FAIL_ON(vdev, fbit, dbit) \
6631	virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)
6632
6633	static bool virtnet_validate_features(struct virtio_device *vdev)
6634	{
6635	if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) &&
6636	(VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX,
6637	"VIRTIO_NET_F_CTRL_VQ") \|\|
6638	VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN,
6639	"VIRTIO_NET_F_CTRL_VQ") \|\|
6640	VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE,
6641	"VIRTIO_NET_F_CTRL_VQ") \|\|
6642	VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") \|\|
6643	VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
6644	"VIRTIO_NET_F_CTRL_VQ") \|\|
6645	VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
6646	"VIRTIO_NET_F_CTRL_VQ") \|\|
6647	VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
6648	"VIRTIO_NET_F_CTRL_VQ") \|\|
6649	VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
6650	"VIRTIO_NET_F_CTRL_VQ") \|\|
6651	VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL,
6652	"VIRTIO_NET_F_CTRL_VQ"))) {
6653	return false;
6654	}
6655
6656	return true;
6657	}
6658
6659	#define MIN_MTU ETH_MIN_MTU
6660	#define MAX_MTU ETH_MAX_MTU
6661
6662	static int virtnet_validate(struct virtio_device *vdev)
6663	{
6664	if (!vdev->config->get) {
6665	dev_err(&vdev->dev, "%s failure: config access disabled\n",
6666	__func__);
6667	return -EINVAL;
6668	}
6669
6670	if (!virtnet_validate_features(vdev))
6671	return -EINVAL;
6672
6673	if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
6674	int mtu = virtio_cread16(vdev,
6675	offsetof(struct virtio_net_config,
6676	mtu));
6677	if (mtu < MIN_MTU)
6678	__virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
6679	}
6680
6681	if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) &&
6682	!virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
6683	dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby");
6684	__virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY);
6685	}
6686
6687	return `0`;
6688	}
6689
6690	static bool virtnet_check_guest_gso(const struct virtnet_info *vi)
6691	{
6692	return virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO4) \|\|
6693	virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO6) \|\|
6694	virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_ECN) \|\|
6695	virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_UFO) \|\|
6696	(virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO4) &&
6697	virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO6));
6698	}
6699
6700	static void virtnet_set_big_packets(struct virtnet_info vi, const* int mtu)
6701	{
6702	bool guest_gso = virtnet_check_guest_gso(vi);
6703
6704	/ If device can receive ANY guest GSO packets, regardless of mtu,*
6705	* allocate packets of maximum size, otherwise limit it to only
6706	* mtu size worth only.
6707	*/
6708	if (mtu > ETH_DATA_LEN \|\| guest_gso) {
6709	vi->big_packets = true;
6710	vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE);
6711	}
6712	}
6713
6714	#define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10
6715	static enum xdp_rss_hash_type
6716	virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = {
6717	[VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE,
6718	[VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4,
6719	[VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP,
6720	[VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP,
6721	[VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6,
6722	[VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP,
6723	[VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP,
6724	[VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX,
6725	[VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX,
6726	[VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX
6727	};
6728
6729	static int virtnet_xdp_rx_hash(const struct xdp_md _ctx, u32 hash,
6730	enum xdp_rss_hash_type *rss_type)
6731	{
6732	const struct xdp_buff xdp = (void* *)_ctx;
6733	struct virtio_net_hdr_v1_hash *hdr_hash;
6734	struct virtnet_info *vi;
6735	u16 hash_report;
6736
6737	if (!(xdp->rxq->dev->features & NETIF_F_RXHASH))
6738	return -ENODATA;
6739
6740	vi = netdev_priv(dev: xdp->rxq->dev);
6741	hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len);
6742	hash_report = __le16_to_cpu(hdr_hash->hash_report);
6743
6744	if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE)
6745	hash_report = VIRTIO_NET_HASH_REPORT_NONE;
6746
6747	*rss_type = virtnet_xdp_rss_type[hash_report];
6748	*hash = __le32_to_cpu(hdr_hash->hash_value);
6749	return `0`;
6750	}
6751
6752	static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
6753	.xmo_rx_hash = virtnet_xdp_rx_hash,
6754	};
6755
6756	static int virtnet_probe(struct virtio_device *vdev)
6757	{
6758	int i, err = -ENOMEM;
6759	struct net_device *dev;
6760	struct virtnet_info *vi;
6761	u16 max_queue_pairs;
6762	int mtu = `0`;
6763
6764	/ Find if host supports multiqueue/rss virtio_net device /
6765	max_queue_pairs = `1`;
6766	if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) \|\| virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
6767	max_queue_pairs =
6768	virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs));
6769
6770	/ We need at least 2 queue's /
6771	if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN \|\|
6772	max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX \|\|
6773	!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
6774	max_queue_pairs = `1`;
6775
6776	/ Allocate ourselves a network device with room for our info /
6777	dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
6778	if (!dev)
6779	return -ENOMEM;
6780
6781	/ Set up network device as normal. /
6782	dev->priv_flags \|= IFF_UNICAST_FLT \| IFF_LIVE_ADDR_CHANGE \|
6783	IFF_TX_SKB_NO_LINEAR;
6784	dev->netdev_ops = &virtnet_netdev;
6785	dev->stat_ops = &virtnet_stat_ops;
6786	dev->features = NETIF_F_HIGHDMA;
6787
6788	dev->ethtool_ops = &virtnet_ethtool_ops;
6789	SET_NETDEV_DEV(dev, &vdev->dev);
6790
6791	/ Do we support "hardware" checksums? /
6792	if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
6793	/ This opens up the world of extra features. /
6794	dev->hw_features \|= NETIF_F_HW_CSUM \| NETIF_F_SG;
6795	if (csum)
6796	dev->features \|= NETIF_F_HW_CSUM \| NETIF_F_SG;
6797
6798	if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
6799	dev->hw_features \|= NETIF_F_TSO
6800	\| NETIF_F_TSO_ECN \| NETIF_F_TSO6;
6801	}
6802	/ Individual feature bits: what can host handle? /
6803	if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
6804	dev->hw_features \|= NETIF_F_TSO;
6805	if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
6806	dev->hw_features \|= NETIF_F_TSO6;
6807	if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
6808	dev->hw_features \|= NETIF_F_TSO_ECN;
6809	if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO))
6810	dev->hw_features \|= NETIF_F_GSO_UDP_L4;
6811
6812	if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) {
6813	dev->hw_features \|= NETIF_F_GSO_UDP_TUNNEL;
6814	dev->hw_enc_features = dev->hw_features;
6815	}
6816	if (dev->hw_features & NETIF_F_GSO_UDP_TUNNEL &&
6817	virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM)) {
6818	dev->hw_features \|= NETIF_F_GSO_UDP_TUNNEL_CSUM;
6819	dev->hw_enc_features \|= NETIF_F_GSO_UDP_TUNNEL_CSUM;
6820	}
6821
6822	dev->features \|= NETIF_F_GSO_ROBUST;
6823
6824	if (gso)
6825	dev->features \|= dev->hw_features;
6826	/ (!csum && gso) case will be fixed by register_netdev() /
6827	}
6828
6829	/ 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't*
6830	* need to calculate checksums for partially checksummed packets,
6831	* as they're considered valid by the upper layer.
6832	* 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only
6833	* receives fully checksummed packets. The device may assist in
6834	* validating these packets' checksums, so the driver won't have to.
6835	*/
6836	dev->features \|= NETIF_F_RXCSUM;
6837
6838	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) \|\|
6839	virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
6840	dev->features \|= NETIF_F_GRO_HW;
6841	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
6842	dev->hw_features \|= NETIF_F_GRO_HW;
6843
6844	dev->vlan_features = dev->features;
6845	dev->xdp_features = NETDEV_XDP_ACT_BASIC \| NETDEV_XDP_ACT_REDIRECT \|
6846	NETDEV_XDP_ACT_XSK_ZEROCOPY;
6847
6848	/ MTU range: 68 - 65535 /
6849	dev->min_mtu = MIN_MTU;
6850	dev->max_mtu = MAX_MTU;
6851
6852	/ Configuration may specify what MAC to use. Otherwise random. /
6853	if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
6854	u8 addr[ETH_ALEN];
6855
6856	virtio_cread_bytes(vdev,
6857	offsetof(struct virtio_net_config, mac),
6858	buf: addr, ETH_ALEN);
6859	eth_hw_addr_set(dev, addr);
6860	} else {
6861	eth_hw_addr_random(dev);
6862	dev_info(&vdev->dev, "Assigned random MAC address %pM\n",
6863	dev->dev_addr);
6864	}
6865
6866	/ Set up our device-specific information /
6867	vi = netdev_priv(dev);
6868	vi->dev = dev;
6869	vi->vdev = vdev;
6870	vdev->priv = vi;
6871
6872	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
6873	INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work);
6874	spin_lock_init(&vi->refill_lock);
6875
6876	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) {
6877	vi->mergeable_rx_bufs = true;
6878	dev->xdp_features \|= NETDEV_XDP_ACT_RX_SG;
6879	}
6880
6881	if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
6882	vi->has_rss_hash_report = true;
6883
6884	if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
6885	vi->has_rss = true;
6886
6887	vi->rss_indir_table_size =
6888	virtio_cread16(vdev, offsetof(struct virtio_net_config,
6889	rss_max_indirection_table_length));
6890	}
6891	vi->rss_hdr = devm_kzalloc(dev: &vdev->dev, size: virtnet_rss_hdr_size(vi), GFP_KERNEL);
6892	if (!vi->rss_hdr) {
6893	err = -ENOMEM;
6894	goto free;
6895	}
6896
6897	if (vi->has_rss \|\| vi->has_rss_hash_report) {
6898	vi->rss_key_size =
6899	virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
6900	if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
6901	dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n",
6902	vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE);
6903	err = -EINVAL;
6904	goto free;
6905	}
6906
6907	vi->rss_hash_types_supported =
6908	virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types));
6909	vi->rss_hash_types_supported &=
6910	~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX \|
6911	VIRTIO_NET_RSS_HASH_TYPE_TCP_EX \|
6912	VIRTIO_NET_RSS_HASH_TYPE_UDP_EX);
6913
6914	dev->hw_features \|= NETIF_F_RXHASH;
6915	dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops;
6916	}
6917
6918	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) \|\|
6919	virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO))
6920	vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash_tunnel);
6921	else if (vi->has_rss_hash_report)
6922	vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);
6923	else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) \|\|
6924	virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
6925	vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
6926	else
6927	vi->hdr_len = sizeof(struct virtio_net_hdr);
6928
6929	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM))
6930	vi->rx_tnl_csum = true;
6931	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO))
6932	vi->rx_tnl = true;
6933	if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO))
6934	vi->tx_tnl = true;
6935
6936	if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) \|\|
6937	virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
6938	vi->any_header_sg = true;
6939
6940	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
6941	vi->has_cvq = true;
6942
6943	mutex_init(&vi->cvq_lock);
6944
6945	if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
6946	mtu = virtio_cread16(vdev,
6947	offsetof(struct virtio_net_config,
6948	mtu));
6949	if (mtu < dev->min_mtu) {
6950	/ Should never trigger: MTU was previously validated*
6951	* in virtnet_validate.
6952	*/
6953	dev_err(&vdev->dev,
6954	"device MTU appears to have changed it is now %d < %d",
6955	mtu, dev->min_mtu);
6956	err = -EINVAL;
6957	goto free;
6958	}
6959
6960	dev->mtu = mtu;
6961	dev->max_mtu = mtu;
6962	}
6963
6964	virtnet_set_big_packets(vi, mtu);
6965
6966	if (vi->any_header_sg)
6967	dev->needed_headroom = vi->hdr_len;
6968
6969	/ Enable multiqueue by default /
6970	if (num_online_cpus() >= max_queue_pairs)
6971	vi->curr_queue_pairs = max_queue_pairs;
6972	else
6973	vi->curr_queue_pairs = num_online_cpus();
6974	vi->max_queue_pairs = max_queue_pairs;
6975
6976	/ Allocate/initialize the rx/tx queues, and invoke find_vqs /
6977	err = init_vqs(vi);
6978	if (err)
6979	goto free;
6980
6981	if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
6982	vi->intr_coal_rx.max_usecs = `0`;
6983	vi->intr_coal_tx.max_usecs = `0`;
6984	vi->intr_coal_rx.max_packets = `0`;
6985
6986	/ Keep the default values of the coalescing parameters*
6987	* aligned with the default napi_tx state.
6988	*/
6989	if (vi->sq[`0`].napi.weight)
6990	vi->intr_coal_tx.max_packets = `1`;
6991	else
6992	vi->intr_coal_tx.max_packets = `0`;
6993	}
6994
6995	if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) {
6996	/ The reason is the same as VIRTIO_NET_F_NOTF_COAL. /
6997	for (i = `0`; i < vi->max_queue_pairs; i++)
6998	if (vi->sq[i].napi.weight)
6999	vi->sq[i].intr_coal.max_packets = `1`;
7000
7001	err = virtnet_init_irq_moder(vi);
7002	if (err)
7003	goto free;
7004	}
7005
7006	#ifdef CONFIG_SYSFS
7007	if (vi->mergeable_rx_bufs)
7008	dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
7009	#endif
7010	netif_set_real_num_tx_queues(dev, txq: vi->curr_queue_pairs);
7011	netif_set_real_num_rx_queues(dev, rxq: vi->curr_queue_pairs);
7012
7013	virtnet_init_settings(dev);
7014
7015	if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
7016	vi->failover = net_failover_create(standby_dev: vi->dev);
7017	if (IS_ERR(ptr: vi->failover)) {
7018	err = PTR_ERR(ptr: vi->failover);
7019	goto free_vqs;
7020	}
7021	}
7022
7023	if (vi->has_rss \|\| vi->has_rss_hash_report)
7024	virtnet_init_default_rss(vi);
7025
7026	enable_rx_mode_work(vi);
7027
7028	/ serialize netdev register + virtio_device_ready() with ndo_open() /
7029	rtnl_lock();
7030
7031	err = register_netdevice(dev);
7032	if (err) {
7033	pr_debug("virtio_net: registering device failed\n");
7034	rtnl_unlock();
7035	goto free_failover;
7036	}
7037
7038	/ Disable config change notification until ndo_open. /
7039	virtio_config_driver_disable(dev: vi->vdev);
7040
7041	virtio_device_ready(dev: vdev);
7042
7043	if (vi->has_rss \|\| vi->has_rss_hash_report) {
7044	if (!virtnet_commit_rss_command(vi)) {
7045	dev_warn(&vdev->dev, "RSS disabled because committing failed.\n");
7046	dev->hw_features &= ~NETIF_F_RXHASH;
7047	vi->has_rss_hash_report = false;
7048	vi->has_rss = false;
7049	}
7050	}
7051
7052	virtnet_set_queues(vi, queue_pairs: vi->curr_queue_pairs);
7053
7054	/ a random MAC address has been assigned, notify the device.*
7055	* We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there
7056	* because many devices work fine without getting MAC explicitly
7057	*/
7058	if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
7059	virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
7060	struct scatterlist sg;
7061
7062	sg_init_one(&sg, dev->dev_addr, dev->addr_len);
7063	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
7064	VIRTIO_NET_CTRL_MAC_ADDR_SET, out: &sg)) {
7065	pr_debug("virtio_net: setting MAC address failed\n");
7066	rtnl_unlock();
7067	err = -EINVAL;
7068	goto free_unregister_netdev;
7069	}
7070	}
7071
7072	if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) {
7073	struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL;
7074	struct scatterlist sg;
7075	__le64 v;
7076
7077	stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL);
7078	if (!stats_cap) {
7079	rtnl_unlock();
7080	err = -ENOMEM;
7081	goto free_unregister_netdev;
7082	}
7083
7084	sg_init_one(&sg, stats_cap, sizeof(*stats_cap));
7085
7086	if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS,
7087	VIRTIO_NET_CTRL_STATS_QUERY,
7088	NULL, in: &sg)) {
7089	pr_debug("virtio_net: fail to get stats capability\n");
7090	rtnl_unlock();
7091	err = -EINVAL;
7092	goto free_unregister_netdev;
7093	}
7094
7095	v = stats_cap->supported_stats_types[`0`];
7096	vi->device_stats_cap = le64_to_cpu(v);
7097	}
7098
7099	/ Assume link up if device can't report link status,*
7100	otherwise get link status from config. /*
7101	netif_carrier_off(dev);
7102	if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STATUS)) {
7103	virtio_config_changed(dev: vi->vdev);
7104	} else {
7105	vi->status = VIRTIO_NET_S_LINK_UP;
7106	virtnet_update_settings(vi);
7107	netif_carrier_on(dev);
7108	}
7109
7110	for (i = `0`; i < ARRAY_SIZE(guest_offloads); i++) {
7111	unsigned int fbit;
7112
7113	fbit = virtio_offload_to_feature(obit: guest_offloads[i]);
7114	if (virtio_has_feature(vdev: vi->vdev, fbit))
7115	set_bit(nr: guest_offloads[i], addr: &vi->guest_offloads);
7116	}
7117	vi->guest_offloads_capable = vi->guest_offloads;
7118
7119	rtnl_unlock();
7120
7121	err = virtnet_cpu_notif_add(vi);
7122	if (err) {
7123	pr_debug("virtio_net: registering cpu notifier failed\n");
7124	goto free_unregister_netdev;
7125	}
7126
7127	pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
7128	dev->name, max_queue_pairs);
7129
7130	return `0`;
7131
7132	free_unregister_netdev:
7133	unregister_netdev(dev);
7134	free_failover:
7135	net_failover_destroy(failover: vi->failover);
7136	free_vqs:
7137	virtio_reset_device(dev: vdev);
7138	cancel_delayed_work_sync(dwork: &vi->refill);
7139	free_receive_page_frags(vi);
7140	virtnet_del_vqs(vi);
7141	free:
7142	free_netdev(dev);
7143	return err;
7144	}
7145
7146	static void remove_vq_common(struct virtnet_info *vi)
7147	{
7148	int i;
7149
7150	virtio_reset_device(dev: vi->vdev);
7151
7152	/ Free unused buffers in both send and recv, if any. /
7153	free_unused_bufs(vi);
7154
7155	/*
7156	* Rule of thumb is netdev_tx_reset_queue() should follow any
7157	* skb freeing not followed by netdev_tx_completed_queue()
7158	*/
7159	for (i = `0`; i < vi->max_queue_pairs; i++)
7160	netdev_tx_reset_queue(q: netdev_get_tx_queue(dev: vi->dev, index: i));
7161
7162	free_receive_bufs(vi);
7163
7164	free_receive_page_frags(vi);
7165
7166	virtnet_del_vqs(vi);
7167	}
7168
7169	static void virtnet_remove(struct virtio_device *vdev)
7170	{
7171	struct virtnet_info *vi = vdev->priv;
7172
7173	virtnet_cpu_notif_remove(vi);
7174
7175	/ Make sure no work handler is accessing the device. /
7176	flush_work(work: &vi->config_work);
7177	disable_rx_mode_work(vi);
7178	flush_work(work: &vi->rx_mode_work);
7179
7180	virtnet_free_irq_moder(vi);
7181
7182	unregister_netdev(dev: vi->dev);
7183
7184	net_failover_destroy(failover: vi->failover);
7185
7186	remove_vq_common(vi);
7187
7188	free_netdev(dev: vi->dev);
7189	}
7190
7191	static __maybe_unused int virtnet_freeze(struct virtio_device *vdev)
7192	{
7193	struct virtnet_info *vi = vdev->priv;
7194
7195	virtnet_cpu_notif_remove(vi);
7196	virtnet_freeze_down(vdev);
7197	remove_vq_common(vi);
7198
7199	return `0`;
7200	}
7201
7202	static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
7203	{
7204	struct virtnet_info *vi = vdev->priv;
7205	int err;
7206
7207	err = virtnet_restore_up(vdev);
7208	if (err)
7209	return err;
7210	virtnet_set_queues(vi, queue_pairs: vi->curr_queue_pairs);
7211
7212	err = virtnet_cpu_notif_add(vi);
7213	if (err) {
7214	virtnet_freeze_down(vdev);
7215	remove_vq_common(vi);
7216	return err;
7217	}
7218
7219	return `0`;
7220	}
7221
7222	static struct virtio_device_id id_table[] = {
7223	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
7224	{ `0` },
7225	};
7226
7227	#define VIRTNET_FEATURES \
7228	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
7229	VIRTIO_NET_F_MAC, \
7230	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
7231	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
7232	VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
7233	VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \
7234	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
7235	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
7236	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
7237	VIRTIO_NET_F_CTRL_MAC_ADDR, \
7238	VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
7239	VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
7240	VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \
7241	VIRTIO_NET_F_VQ_NOTF_COAL, \
7242	VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS
7243
7244	static unsigned int features[] = {
7245	VIRTNET_FEATURES,
7246	VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO,
7247	VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM,
7248	VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO,
7249	VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM,
7250	};
7251
7252	static unsigned int features_legacy[] = {
7253	VIRTNET_FEATURES,
7254	VIRTIO_NET_F_GSO,
7255	VIRTIO_F_ANY_LAYOUT,
7256	};
7257
7258	static struct virtio_driver virtio_net_driver = {
7259	.feature_table = features,
7260	.feature_table_size = ARRAY_SIZE(features),
7261	.feature_table_legacy = features_legacy,
7262	.feature_table_size_legacy = ARRAY_SIZE(features_legacy),
7263	.driver.name = KBUILD_MODNAME,
7264	.id_table = id_table,
7265	.validate = virtnet_validate,
7266	.probe = virtnet_probe,
7267	.remove = virtnet_remove,
7268	.config_changed = virtnet_config_changed,
7269	#ifdef CONFIG_PM_SLEEP
7270	.freeze = virtnet_freeze,
7271	.restore = virtnet_restore,
7272	#endif
7273	};
7274
7275	static __init int virtio_net_driver_init(void)
7276	{
7277	int ret;
7278
7279	ret = cpuhp_setup_state_multi(state: CPUHP_AP_ONLINE_DYN, name: "virtio/net:online",
7280	startup: virtnet_cpu_online,
7281	teardown: virtnet_cpu_down_prep);
7282	if (ret < `0`)
7283	goto out;
7284	virtionet_online = ret;
7285	ret = cpuhp_setup_state_multi(state: CPUHP_VIRT_NET_DEAD, name: "virtio/net:dead",
7286	NULL, teardown: virtnet_cpu_dead);
7287	if (ret)
7288	goto err_dead;
7289	ret = register_virtio_driver(&virtio_net_driver);
7290	if (ret)
7291	goto err_virtio;
7292	return `0`;
7293	err_virtio:
7294	cpuhp_remove_multi_state(state: CPUHP_VIRT_NET_DEAD);
7295	err_dead:
7296	cpuhp_remove_multi_state(state: virtionet_online);
7297	out:
7298	return ret;
7299	}
7300	module_init(virtio_net_driver_init);
7301
7302	static __exit void virtio_net_driver_exit(void)
7303	{
7304	unregister_virtio_driver(drv: &virtio_net_driver);
7305	cpuhp_remove_multi_state(state: CPUHP_VIRT_NET_DEAD);
7306	cpuhp_remove_multi_state(state: virtionet_online);
7307	}
7308	module_exit(virtio_net_driver_exit);
7309
7310	MODULE_DEVICE_TABLE(virtio, id_table);
7311	MODULE_DESCRIPTION("Virtio network driver");
7312	MODULE_LICENSE("GPL");
7313

Browse the source code of Linux/drivers/net/virtio_net.c