net-sysfs.c source code [Linux/net/core/net-sysfs.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* net-sysfs.c - network device class and attributes
4	*
5	* Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
6	*/
7
8	#include <linux/capability.h>
9	#include <linux/kernel.h>
10	#include <linux/netdevice.h>
11	#include <linux/if_arp.h>
12	#include <linux/slab.h>
13	#include <linux/sched/signal.h>
14	#include <linux/sched/isolation.h>
15	#include <linux/nsproxy.h>
16	#include <net/sock.h>
17	#include <net/net_namespace.h>
18	#include <linux/rtnetlink.h>
19	#include <linux/vmalloc.h>
20	#include <linux/export.h>
21	#include <linux/jiffies.h>
22	#include <linux/pm_runtime.h>
23	#include <linux/of.h>
24	#include <linux/of_net.h>
25	#include <linux/cpu.h>
26	#include <net/netdev_lock.h>
27	#include <net/netdev_rx_queue.h>
28	#include <net/rps.h>
29
30	#include "dev.h"
31	#include "net-sysfs.h"
32
33	#ifdef CONFIG_SYSFS
34	static const char fmt_hex[] = "%#x\n";
35	static const char fmt_dec[] = "%d\n";
36	static const char fmt_uint[] = "%u\n";
37	static const char fmt_ulong[] = "%lu\n";
38	static const char fmt_u64[] = "%llu\n";
39
40	/ Caller holds RTNL, netdev->lock or RCU /
41	static inline int dev_isalive(const struct net_device *dev)
42	{
43	return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED;
44	}
45
46	/ There is a possible ABBA deadlock between rtnl_lock and kernfs_node->active,*
47	* when unregistering a net device and accessing associated sysfs files. The
48	* potential deadlock is as follow:
49	*
50	* CPU 0 CPU 1
51	*
52	* rtnl_lock vfs_read
53	* unregister_netdevice_many kernfs_seq_start
54	* device_del / kobject_put kernfs_get_active (kn->active++)
55	* kernfs_drain sysfs_kf_seq_show
56	* wait_event( rtnl_lock
57	* kn->active == KN_DEACTIVATED_BIAS) -> waits on CPU 0 to release
58	* -> waits on CPU 1 to decrease kn->active the rtnl lock.
59	*
60	* The historical fix was to use rtnl_trylock with restart_syscall to bail out
61	* of sysfs operations when the lock couldn't be taken. This fixed the above
62	* issue as it allowed CPU 1 to bail out of the ABBA situation.
63	*
64	* But it came with performances issues, as syscalls are being restarted in
65	* loops when there was contention on the rtnl lock, with huge slow downs in
66	* specific scenarios (e.g. lots of virtual interfaces created and userspace
67	* daemons querying their attributes).
68	*
69	* The idea below is to bail out of the active kernfs_node protection
70	* (kn->active) while trying to take the rtnl lock.
71	*
72	* This replaces rtnl_lock() and still has to be used with rtnl_unlock(). The
73	* net device is guaranteed to be alive if this returns successfully.
74	*/
75	static int sysfs_rtnl_lock(struct kobject kobj, struct* attribute *attr,
76	struct net_device *ndev)
77	{
78	struct kernfs_node *kn;
79	int ret = `0`;
80
81	/ First, we hold a reference to the net device as the unregistration*
82	* path might run in parallel. This will ensure the net device and the
83	* associated sysfs objects won't be freed while we try to take the rtnl
84	* lock.
85	*/
86	dev_hold(dev: ndev);
87	/ sysfs_break_active_protection was introduced to allow self-removal of*
88	* devices and their associated sysfs files by bailing out of the
89	* sysfs/kernfs protection. We do this here to allow the unregistration
90	* path to complete in parallel. The following takes a reference on the
91	* kobject and the kernfs_node being accessed.
92	*
93	* This works because we hold a reference onto the net device and the
94	* unregistration path will wait for us eventually in netdev_run_todo
95	* (outside an rtnl lock section).
96	*/
97	kn = sysfs_break_active_protection(kobj, attr);
98	/ We can now try to take the rtnl lock. This can't deadlock us as the*
99	* unregistration path is able to drain sysfs files (kernfs_node) thanks
100	* to the above dance.
101	*/
102	if (rtnl_lock_interruptible()) {
103	ret = -ERESTARTSYS;
104	goto unbreak;
105	}
106	/ Check dismantle on the device hasn't started, otherwise deny the*
107	* operation.
108	*/
109	if (!dev_isalive(dev: ndev)) {
110	rtnl_unlock();
111	ret = -ENODEV;
112	goto unbreak;
113	}
114	/ We are now sure the device dismantle hasn't started nor that it can*
115	* start before we exit the locking section as we hold the rtnl lock.
116	* There's no need to keep unbreaking the sysfs protection nor to hold
117	* a net device reference from that point; that was only needed to take
118	* the rtnl lock.
119	*/
120	unbreak:
121	sysfs_unbreak_active_protection(kn);
122	dev_put(dev: ndev);
123
124	return ret;
125	}
126
127	/ use same locking rules as GIF* ioctl's /
128	static ssize_t netdev_show(const struct device *dev,
129	struct device_attribute attr, char* *buf,
130	ssize_t (format)(const* struct net_device , char* *))
131	{
132	struct net_device *ndev = to_net_dev(dev);
133	ssize_t ret = -EINVAL;
134
135	rcu_read_lock();
136	if (dev_isalive(dev: ndev))
137	ret = (*format)(ndev, buf);
138	rcu_read_unlock();
139
140	return ret;
141	}
142
143	/ generate a show function for simple field /
144	#define NETDEVICE_SHOW(field, format_string) \
145	static ssize_t format_##field(const struct net_device dev, char buf) \
146	{ \
147	return sysfs_emit(buf, format_string, READ_ONCE(dev->field)); \
148	} \
149	static ssize_t field##_show(struct device *dev, \
150	struct device_attribute attr, char buf) \
151	{ \
152	return netdev_show(dev, attr, buf, format_##field); \
153	} \
154
155	#define NETDEVICE_SHOW_RO(field, format_string) \
156	NETDEVICE_SHOW(field, format_string); \
157	static DEVICE_ATTR_RO(field)
158
159	#define NETDEVICE_SHOW_RW(field, format_string) \
160	NETDEVICE_SHOW(field, format_string); \
161	static DEVICE_ATTR_RW(field)
162
163	/ use same locking and permission rules as SIF* ioctl's /
164	static ssize_t netdev_store(struct device dev, struct* device_attribute *attr,
165	const char *buf, size_t len,
166	int (set)(struct* net_device , unsigned* long))
167	{
168	struct net_device *netdev = to_net_dev(dev);
169	struct net *net = dev_net(dev: netdev);
170	unsigned long new;
171	int ret;
172
173	if (!ns_capable(ns: net->user_ns, CAP_NET_ADMIN))
174	return -EPERM;
175
176	ret = kstrtoul(s: buf, base: `0`, res: &new);
177	if (ret)
178	goto err;
179
180	ret = sysfs_rtnl_lock(kobj: &dev->kobj, attr: &attr->attr, ndev: netdev);
181	if (ret)
182	goto err;
183
184	ret = (*set)(netdev, new);
185	if (ret == `0`)
186	ret = len;
187
188	rtnl_unlock();
189	err:
190	return ret;
191	}
192
193	/ Same as netdev_store() but takes netdev_lock() instead of rtnl_lock() /
194	static ssize_t
195	netdev_lock_store(struct device dev, struct* device_attribute *attr,
196	const char *buf, size_t len,
197	int (set)(struct* net_device , unsigned* long))
198	{
199	struct net_device *netdev = to_net_dev(dev);
200	struct net *net = dev_net(dev: netdev);
201	unsigned long new;
202	int ret;
203
204	if (!ns_capable(ns: net->user_ns, CAP_NET_ADMIN))
205	return -EPERM;
206
207	ret = kstrtoul(s: buf, base: `0`, res: &new);
208	if (ret)
209	return ret;
210
211	netdev_lock(dev: netdev);
212
213	if (dev_isalive(dev: netdev)) {
214	ret = (*set)(netdev, new);
215	if (ret == `0`)
216	ret = len;
217	}
218	netdev_unlock(dev: netdev);
219
220	return ret;
221	}
222
223	NETDEVICE_SHOW_RO(dev_id, fmt_hex);
224	NETDEVICE_SHOW_RO(dev_port, fmt_dec);
225	NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
226	NETDEVICE_SHOW_RO(addr_len, fmt_dec);
227	NETDEVICE_SHOW_RO(ifindex, fmt_dec);
228	NETDEVICE_SHOW_RO(type, fmt_dec);
229	NETDEVICE_SHOW_RO(link_mode, fmt_dec);
230
231	static ssize_t iflink_show(struct device dev, struct* device_attribute *attr,
232	char *buf)
233	{
234	struct net_device *ndev = to_net_dev(dev);
235
236	return sysfs_emit(buf, fmt: fmt_dec, dev_get_iflink(dev: ndev));
237	}
238	static DEVICE_ATTR_RO(iflink);
239
240	static ssize_t format_name_assign_type(const struct net_device dev, char* *buf)
241	{
242	return sysfs_emit(buf, fmt: fmt_dec, READ_ONCE(dev->name_assign_type));
243	}
244
245	static ssize_t name_assign_type_show(struct device *dev,
246	struct device_attribute *attr,
247	char *buf)
248	{
249	struct net_device *ndev = to_net_dev(dev);
250	ssize_t ret = -EINVAL;
251
252	if (READ_ONCE(ndev->name_assign_type) != NET_NAME_UNKNOWN)
253	ret = netdev_show(dev, attr, buf, format: format_name_assign_type);
254
255	return ret;
256	}
257	static DEVICE_ATTR_RO(name_assign_type);
258
259	/ use same locking rules as GIFHWADDR ioctl's (netif_get_mac_address()) /
260	static ssize_t address_show(struct device dev, struct* device_attribute *attr,
261	char *buf)
262	{
263	struct net_device *ndev = to_net_dev(dev);
264	ssize_t ret = -EINVAL;
265
266	down_read(sem: &dev_addr_sem);
267
268	rcu_read_lock();
269	if (dev_isalive(dev: ndev))
270	ret = sysfs_format_mac(buf, addr: ndev->dev_addr, len: ndev->addr_len);
271	rcu_read_unlock();
272
273	up_read(sem: &dev_addr_sem);
274	return ret;
275	}
276	static DEVICE_ATTR_RO(address);
277
278	static ssize_t broadcast_show(struct device *dev,
279	struct device_attribute attr, char* *buf)
280	{
281	struct net_device *ndev = to_net_dev(dev);
282	int ret = -EINVAL;
283
284	rcu_read_lock();
285	if (dev_isalive(dev: ndev))
286	ret = sysfs_format_mac(buf, addr: ndev->broadcast, len: ndev->addr_len);
287	rcu_read_unlock();
288	return ret;
289	}
290	static DEVICE_ATTR_RO(broadcast);
291
292	static int change_carrier(struct net_device dev, unsigned* long new_carrier)
293	{
294	if (!netif_running(dev))
295	return -EINVAL;
296	return dev_change_carrier(dev, new_carrier: (bool)new_carrier);
297	}
298
299	static ssize_t carrier_store(struct device dev, struct* device_attribute *attr,
300	const char *buf, size_t len)
301	{
302	struct net_device *netdev = to_net_dev(dev);
303
304	/ The check is also done in change_carrier; this helps returning early*
305	* without hitting the locking section in netdev_store.
306	*/
307	if (!netdev->netdev_ops->ndo_change_carrier)
308	return -EOPNOTSUPP;
309
310	return netdev_store(dev, attr, buf, len, set: change_carrier);
311	}
312
313	static ssize_t carrier_show(struct device *dev,
314	struct device_attribute attr, char* *buf)
315	{
316	struct net_device *netdev = to_net_dev(dev);
317	int ret;
318
319	ret = sysfs_rtnl_lock(kobj: &dev->kobj, attr: &attr->attr, ndev: netdev);
320	if (ret)
321	return ret;
322
323	ret = -EINVAL;
324	if (netif_running(dev: netdev)) {
325	/ Synchronize carrier state with link watch,*
326	* see also rtnl_getlink().
327	*/
328	linkwatch_sync_dev(dev: netdev);
329
330	ret = sysfs_emit(buf, fmt: fmt_dec, !!netif_carrier_ok(dev: netdev));
331	}
332
333	rtnl_unlock();
334	return ret;
335	}
336	static DEVICE_ATTR_RW(carrier);
337
338	static ssize_t speed_show(struct device *dev,
339	struct device_attribute attr, char* *buf)
340	{
341	struct net_device *netdev = to_net_dev(dev);
342	int ret = -EINVAL;
343
344	/ The check is also done in __ethtool_get_link_ksettings; this helps*
345	* returning early without hitting the locking section below.
346	*/
347	if (!netdev->ethtool_ops->get_link_ksettings)
348	return ret;
349
350	ret = sysfs_rtnl_lock(kobj: &dev->kobj, attr: &attr->attr, ndev: netdev);
351	if (ret)
352	return ret;
353
354	ret = -EINVAL;
355	if (netif_running(dev: netdev)) {
356	struct ethtool_link_ksettings cmd;
357
358	if (!__ethtool_get_link_ksettings(dev: netdev, link_ksettings: &cmd))
359	ret = sysfs_emit(buf, fmt: fmt_dec, cmd.base.speed);
360	}
361	rtnl_unlock();
362	return ret;
363	}
364	static DEVICE_ATTR_RO(speed);
365
366	static ssize_t duplex_show(struct device *dev,
367	struct device_attribute attr, char* *buf)
368	{
369	struct net_device *netdev = to_net_dev(dev);
370	int ret = -EINVAL;
371
372	/ The check is also done in __ethtool_get_link_ksettings; this helps*
373	* returning early without hitting the locking section below.
374	*/
375	if (!netdev->ethtool_ops->get_link_ksettings)
376	return ret;
377
378	ret = sysfs_rtnl_lock(kobj: &dev->kobj, attr: &attr->attr, ndev: netdev);
379	if (ret)
380	return ret;
381
382	ret = -EINVAL;
383	if (netif_running(dev: netdev)) {
384	struct ethtool_link_ksettings cmd;
385
386	if (!__ethtool_get_link_ksettings(dev: netdev, link_ksettings: &cmd)) {
387	const char *duplex;
388
389	switch (cmd.base.duplex) {
390	case DUPLEX_HALF:
391	duplex = "half";
392	break;
393	case DUPLEX_FULL:
394	duplex = "full";
395	break;
396	default:
397	duplex = "unknown";
398	break;
399	}
400	ret = sysfs_emit(buf, fmt: "%s\n", duplex);
401	}
402	}
403	rtnl_unlock();
404	return ret;
405	}
406	static DEVICE_ATTR_RO(duplex);
407
408	static ssize_t testing_show(struct device *dev,
409	struct device_attribute attr, char* *buf)
410	{
411	struct net_device *netdev = to_net_dev(dev);
412
413	if (netif_running(dev: netdev))
414	return sysfs_emit(buf, fmt: fmt_dec, !!netif_testing(dev: netdev));
415
416	return -EINVAL;
417	}
418	static DEVICE_ATTR_RO(testing);
419
420	static ssize_t dormant_show(struct device *dev,
421	struct device_attribute attr, char* *buf)
422	{
423	struct net_device *netdev = to_net_dev(dev);
424
425	if (netif_running(dev: netdev))
426	return sysfs_emit(buf, fmt: fmt_dec, !!netif_dormant(dev: netdev));
427
428	return -EINVAL;
429	}
430	static DEVICE_ATTR_RO(dormant);
431
432	static const char *const operstates[] = {
433	"unknown",
434	"notpresent", / currently unused /
435	"down",
436	"lowerlayerdown",
437	"testing",
438	"dormant",
439	"up"
440	};
441
442	static ssize_t operstate_show(struct device *dev,
443	struct device_attribute attr, char* *buf)
444	{
445	const struct net_device *netdev = to_net_dev(dev);
446	unsigned char operstate;
447
448	operstate = READ_ONCE(netdev->operstate);
449	if (!netif_running(dev: netdev))
450	operstate = IF_OPER_DOWN;
451
452	if (operstate >= ARRAY_SIZE(operstates))
453	return -EINVAL; / should not happen /
454
455	return sysfs_emit(buf, fmt: "%s\n", operstates[operstate]);
456	}
457	static DEVICE_ATTR_RO(operstate);
458
459	static ssize_t carrier_changes_show(struct device *dev,
460	struct device_attribute *attr,
461	char *buf)
462	{
463	struct net_device *netdev = to_net_dev(dev);
464
465	return sysfs_emit(buf, fmt: fmt_dec,
466	atomic_read(v: &netdev->carrier_up_count) +
467	atomic_read(v: &netdev->carrier_down_count));
468	}
469	static DEVICE_ATTR_RO(carrier_changes);
470
471	static ssize_t carrier_up_count_show(struct device *dev,
472	struct device_attribute *attr,
473	char *buf)
474	{
475	struct net_device *netdev = to_net_dev(dev);
476
477	return sysfs_emit(buf, fmt: fmt_dec, atomic_read(v: &netdev->carrier_up_count));
478	}
479	static DEVICE_ATTR_RO(carrier_up_count);
480
481	static ssize_t carrier_down_count_show(struct device *dev,
482	struct device_attribute *attr,
483	char *buf)
484	{
485	struct net_device *netdev = to_net_dev(dev);
486
487	return sysfs_emit(buf, fmt: fmt_dec, atomic_read(v: &netdev->carrier_down_count));
488	}
489	static DEVICE_ATTR_RO(carrier_down_count);
490
491	/ read-write attributes /
492
493	static int change_mtu(struct net_device dev, unsigned* long new_mtu)
494	{
495	return dev_set_mtu(dev, (int)new_mtu);
496	}
497
498	static ssize_t mtu_store(struct device dev, struct* device_attribute *attr,
499	const char *buf, size_t len)
500	{
501	return netdev_store(dev, attr, buf, len, set: change_mtu);
502	}
503	NETDEVICE_SHOW_RW(mtu, fmt_dec);
504
505	static int change_flags(struct net_device dev, unsigned* long new_flags)
506	{
507	return dev_change_flags(dev, flags: (unsigned int)new_flags, NULL);
508	}
509
510	static ssize_t flags_store(struct device dev, struct* device_attribute *attr,
511	const char *buf, size_t len)
512	{
513	return netdev_store(dev, attr, buf, len, set: change_flags);
514	}
515	NETDEVICE_SHOW_RW(flags, fmt_hex);
516
517	static ssize_t tx_queue_len_store(struct device *dev,
518	struct device_attribute *attr,
519	const char *buf, size_t len)
520	{
521	if (!capable(CAP_NET_ADMIN))
522	return -EPERM;
523
524	return netdev_store(dev, attr, buf, len, set: dev_change_tx_queue_len);
525	}
526	NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
527
528	static int change_gro_flush_timeout(struct net_device dev, unsigned* long val)
529	{
530	netdev_set_gro_flush_timeout(netdev: dev, timeout: val);
531	return `0`;
532	}
533
534	static ssize_t gro_flush_timeout_store(struct device *dev,
535	struct device_attribute *attr,
536	const char *buf, size_t len)
537	{
538	if (!capable(CAP_NET_ADMIN))
539	return -EPERM;
540
541	return netdev_lock_store(dev, attr, buf, len, set: change_gro_flush_timeout);
542	}
543	NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
544
545	static int change_napi_defer_hard_irqs(struct net_device dev, unsigned* long val)
546	{
547	if (val > S32_MAX)
548	return -ERANGE;
549
550	netdev_set_defer_hard_irqs(netdev: dev, defer: (u32)val);
551	return `0`;
552	}
553
554	static ssize_t napi_defer_hard_irqs_store(struct device *dev,
555	struct device_attribute *attr,
556	const char *buf, size_t len)
557	{
558	if (!capable(CAP_NET_ADMIN))
559	return -EPERM;
560
561	return netdev_lock_store(dev, attr, buf, len,
562	set: change_napi_defer_hard_irqs);
563	}
564	NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_uint);
565
566	static ssize_t ifalias_store(struct device dev, struct* device_attribute *attr,
567	const char *buf, size_t len)
568	{
569	struct net_device *netdev = to_net_dev(dev);
570	struct net *net = dev_net(dev: netdev);
571	size_t count = len;
572	ssize_t ret;
573
574	if (!ns_capable(ns: net->user_ns, CAP_NET_ADMIN))
575	return -EPERM;
576
577	/ ignore trailing newline /
578	if (len > `0` && buf[len - `1`] == `'\n'`)
579	--count;
580
581	ret = sysfs_rtnl_lock(kobj: &dev->kobj, attr: &attr->attr, ndev: netdev);
582	if (ret)
583	return ret;
584
585	ret = dev_set_alias(netdev, buf, count);
586	if (ret < `0`)
587	goto err;
588	ret = len;
589	netdev_state_change(dev: netdev);
590	err:
591	rtnl_unlock();
592
593	return ret;
594	}
595
596	static ssize_t ifalias_show(struct device *dev,
597	struct device_attribute attr, char* *buf)
598	{
599	const struct net_device *netdev = to_net_dev(dev);
600	char tmp[IFALIASZ];
601	ssize_t ret;
602
603	ret = dev_get_alias(netdev, tmp, sizeof(tmp));
604	if (ret > `0`)
605	ret = sysfs_emit(buf, fmt: "%s\n", tmp);
606	return ret;
607	}
608	static DEVICE_ATTR_RW(ifalias);
609
610	static int change_group(struct net_device dev, unsigned* long new_group)
611	{
612	dev_set_group(dev, new_group: (int)new_group);
613	return `0`;
614	}
615
616	static ssize_t group_store(struct device dev, struct* device_attribute *attr,
617	const char *buf, size_t len)
618	{
619	return netdev_store(dev, attr, buf, len, set: change_group);
620	}
621	NETDEVICE_SHOW(group, fmt_dec);
622	static DEVICE_ATTR(netdev_group, `0644`, group_show, group_store);
623
624	static int change_proto_down(struct net_device dev, unsigned* long proto_down)
625	{
626	return dev_change_proto_down(dev, proto_down: (bool)proto_down);
627	}
628
629	static ssize_t proto_down_store(struct device *dev,
630	struct device_attribute *attr,
631	const char *buf, size_t len)
632	{
633	return netdev_store(dev, attr, buf, len, set: change_proto_down);
634	}
635	NETDEVICE_SHOW_RW(proto_down, fmt_dec);
636
637	static ssize_t phys_port_id_show(struct device *dev,
638	struct device_attribute attr, char* *buf)
639	{
640	struct net_device *netdev = to_net_dev(dev);
641	struct netdev_phys_item_id ppid;
642	ssize_t ret;
643
644	ret = sysfs_rtnl_lock(kobj: &dev->kobj, attr: &attr->attr, ndev: netdev);
645	if (ret)
646	return ret;
647
648	ret = dev_get_phys_port_id(dev: netdev, ppid: &ppid);
649	if (!ret)
650	ret = sysfs_emit(buf, fmt: "%*phN\n", ppid.id_len, ppid.id);
651
652	rtnl_unlock();
653
654	return ret;
655	}
656	static DEVICE_ATTR_RO(phys_port_id);
657
658	static ssize_t phys_port_name_show(struct device *dev,
659	struct device_attribute attr, char* *buf)
660	{
661	struct net_device *netdev = to_net_dev(dev);
662	char name[IFNAMSIZ];
663	ssize_t ret;
664
665	ret = sysfs_rtnl_lock(kobj: &dev->kobj, attr: &attr->attr, ndev: netdev);
666	if (ret)
667	return ret;
668
669	ret = dev_get_phys_port_name(dev: netdev, name, len: sizeof(name));
670	if (!ret)
671	ret = sysfs_emit(buf, fmt: "%s\n", name);
672
673	rtnl_unlock();
674
675	return ret;
676	}
677	static DEVICE_ATTR_RO(phys_port_name);
678
679	static ssize_t phys_switch_id_show(struct device *dev,
680	struct device_attribute attr, char* *buf)
681	{
682	struct net_device *netdev = to_net_dev(dev);
683	struct netdev_phys_item_id ppid = { };
684	ssize_t ret;
685
686	ret = sysfs_rtnl_lock(kobj: &dev->kobj, attr: &attr->attr, ndev: netdev);
687	if (ret)
688	return ret;
689
690	ret = netif_get_port_parent_id(dev: netdev, ppid: &ppid, recurse: false);
691	if (!ret)
692	ret = sysfs_emit(buf, fmt: "%*phN\n", ppid.id_len, ppid.id);
693
694	rtnl_unlock();
695
696	return ret;
697	}
698	static DEVICE_ATTR_RO(phys_switch_id);
699
700	static struct attribute *netdev_phys_attrs[] __ro_after_init = {
701	&dev_attr_phys_port_id.attr,
702	&dev_attr_phys_port_name.attr,
703	&dev_attr_phys_switch_id.attr,
704	NULL,
705	};
706
707	static umode_t netdev_phys_is_visible(struct kobject *kobj,
708	struct attribute attr, int* index)
709	{
710	struct device *dev = kobj_to_dev(kobj);
711	struct net_device *netdev = to_net_dev(dev);
712
713	if (attr == &dev_attr_phys_port_id.attr) {
714	if (!netdev->netdev_ops->ndo_get_phys_port_id)
715	return `0`;
716	} else if (attr == &dev_attr_phys_port_name.attr) {
717	if (!netdev->netdev_ops->ndo_get_phys_port_name &&
718	!netdev->devlink_port)
719	return `0`;
720	} else if (attr == &dev_attr_phys_switch_id.attr) {
721	if (!netdev->netdev_ops->ndo_get_port_parent_id &&
722	!netdev->devlink_port)
723	return `0`;
724	}
725
726	return attr->mode;
727	}
728
729	static const struct attribute_group netdev_phys_group = {
730	.attrs = netdev_phys_attrs,
731	.is_visible = netdev_phys_is_visible,
732	};
733
734	static ssize_t threaded_show(struct device *dev,
735	struct device_attribute attr, char* *buf)
736	{
737	struct net_device *netdev = to_net_dev(dev);
738	ssize_t ret = -EINVAL;
739
740	rcu_read_lock();
741
742	if (dev_isalive(dev: netdev))
743	ret = sysfs_emit(buf, fmt: fmt_dec, READ_ONCE(netdev->threaded));
744
745	rcu_read_unlock();
746
747	return ret;
748	}
749
750	static int modify_napi_threaded(struct net_device dev, unsigned* long val)
751	{
752	int ret;
753
754	if (list_empty(head: &dev->napi_list))
755	return -EOPNOTSUPP;
756
757	if (val != `0` && val != `1`)
758	return -EOPNOTSUPP;
759
760	ret = netif_set_threaded(dev, threaded: val);
761
762	return ret;
763	}
764
765	static ssize_t threaded_store(struct device *dev,
766	struct device_attribute *attr,
767	const char *buf, size_t len)
768	{
769	return netdev_lock_store(dev, attr, buf, len, set: modify_napi_threaded);
770	}
771	static DEVICE_ATTR_RW(threaded);
772
773	static struct attribute *net_class_attrs[] __ro_after_init = {
774	&dev_attr_netdev_group.attr,
775	&dev_attr_type.attr,
776	&dev_attr_dev_id.attr,
777	&dev_attr_dev_port.attr,
778	&dev_attr_iflink.attr,
779	&dev_attr_ifindex.attr,
780	&dev_attr_name_assign_type.attr,
781	&dev_attr_addr_assign_type.attr,
782	&dev_attr_addr_len.attr,
783	&dev_attr_link_mode.attr,
784	&dev_attr_address.attr,
785	&dev_attr_broadcast.attr,
786	&dev_attr_speed.attr,
787	&dev_attr_duplex.attr,
788	&dev_attr_dormant.attr,
789	&dev_attr_testing.attr,
790	&dev_attr_operstate.attr,
791	&dev_attr_carrier_changes.attr,
792	&dev_attr_ifalias.attr,
793	&dev_attr_carrier.attr,
794	&dev_attr_mtu.attr,
795	&dev_attr_flags.attr,
796	&dev_attr_tx_queue_len.attr,
797	&dev_attr_gro_flush_timeout.attr,
798	&dev_attr_napi_defer_hard_irqs.attr,
799	&dev_attr_proto_down.attr,
800	&dev_attr_carrier_up_count.attr,
801	&dev_attr_carrier_down_count.attr,
802	&dev_attr_threaded.attr,
803	NULL,
804	};
805	ATTRIBUTE_GROUPS(net_class);
806
807	/ Show a given an attribute in the statistics group /
808	static ssize_t netstat_show(const struct device *d,
809	struct device_attribute attr, char* *buf,
810	unsigned long offset)
811	{
812	struct net_device *dev = to_net_dev(d);
813	ssize_t ret = -EINVAL;
814
815	WARN_ON(offset > sizeof(struct rtnl_link_stats64) \|\|
816	offset % sizeof(u64) != `0`);
817
818	rcu_read_lock();
819	if (dev_isalive(dev)) {
820	struct rtnl_link_stats64 temp;
821	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, storage: &temp);
822
823	ret = sysfs_emit(buf, fmt: fmt_u64, (u64 )(((u8 *)stats) + offset));
824	}
825	rcu_read_unlock();
826	return ret;
827	}
828
829	/ generate a read-only statistics attribute /
830	#define NETSTAT_ENTRY(name) \
831	static ssize_t name##_show(struct device *d, \
832	struct device_attribute attr, char buf) \
833	{ \
834	return netstat_show(d, attr, buf, \
835	offsetof(struct rtnl_link_stats64, name)); \
836	} \
837	static DEVICE_ATTR_RO(name)
838
839	NETSTAT_ENTRY(rx_packets);
840	NETSTAT_ENTRY(tx_packets);
841	NETSTAT_ENTRY(rx_bytes);
842	NETSTAT_ENTRY(tx_bytes);
843	NETSTAT_ENTRY(rx_errors);
844	NETSTAT_ENTRY(tx_errors);
845	NETSTAT_ENTRY(rx_dropped);
846	NETSTAT_ENTRY(tx_dropped);
847	NETSTAT_ENTRY(multicast);
848	NETSTAT_ENTRY(collisions);
849	NETSTAT_ENTRY(rx_length_errors);
850	NETSTAT_ENTRY(rx_over_errors);
851	NETSTAT_ENTRY(rx_crc_errors);
852	NETSTAT_ENTRY(rx_frame_errors);
853	NETSTAT_ENTRY(rx_fifo_errors);
854	NETSTAT_ENTRY(rx_missed_errors);
855	NETSTAT_ENTRY(tx_aborted_errors);
856	NETSTAT_ENTRY(tx_carrier_errors);
857	NETSTAT_ENTRY(tx_fifo_errors);
858	NETSTAT_ENTRY(tx_heartbeat_errors);
859	NETSTAT_ENTRY(tx_window_errors);
860	NETSTAT_ENTRY(rx_compressed);
861	NETSTAT_ENTRY(tx_compressed);
862	NETSTAT_ENTRY(rx_nohandler);
863
864	static struct attribute *netstat_attrs[] __ro_after_init = {
865	&dev_attr_rx_packets.attr,
866	&dev_attr_tx_packets.attr,
867	&dev_attr_rx_bytes.attr,
868	&dev_attr_tx_bytes.attr,
869	&dev_attr_rx_errors.attr,
870	&dev_attr_tx_errors.attr,
871	&dev_attr_rx_dropped.attr,
872	&dev_attr_tx_dropped.attr,
873	&dev_attr_multicast.attr,
874	&dev_attr_collisions.attr,
875	&dev_attr_rx_length_errors.attr,
876	&dev_attr_rx_over_errors.attr,
877	&dev_attr_rx_crc_errors.attr,
878	&dev_attr_rx_frame_errors.attr,
879	&dev_attr_rx_fifo_errors.attr,
880	&dev_attr_rx_missed_errors.attr,
881	&dev_attr_tx_aborted_errors.attr,
882	&dev_attr_tx_carrier_errors.attr,
883	&dev_attr_tx_fifo_errors.attr,
884	&dev_attr_tx_heartbeat_errors.attr,
885	&dev_attr_tx_window_errors.attr,
886	&dev_attr_rx_compressed.attr,
887	&dev_attr_tx_compressed.attr,
888	&dev_attr_rx_nohandler.attr,
889	NULL
890	};
891
892	static const struct attribute_group netstat_group = {
893	.name = "statistics",
894	.attrs = netstat_attrs,
895	};
896
897	static struct attribute *wireless_attrs[] = {
898	NULL
899	};
900
901	static const struct attribute_group wireless_group = {
902	.name = "wireless",
903	.attrs = wireless_attrs,
904	};
905
906	static bool wireless_group_needed(struct net_device *ndev)
907	{
908	#if IS_ENABLED(CONFIG_CFG80211)
909	if (ndev->ieee80211_ptr)
910	return true;
911	#endif
912	#if IS_ENABLED(CONFIG_WIRELESS_EXT)
913	if (ndev->wireless_handlers)
914	return true;
915	#endif
916	return false;
917	}
918
919	#else /* CONFIG_SYSFS */
920	#define net_class_groups NULL
921	#endif /* CONFIG_SYSFS */
922
923	#ifdef CONFIG_SYSFS
924	#define to_rx_queue_attr(_attr) \
925	container_of(_attr, struct rx_queue_attribute, attr)
926
927	#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
928
929	static ssize_t rx_queue_attr_show(struct kobject kobj, struct* attribute *attr,
930	char *buf)
931	{
932	const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
933	struct netdev_rx_queue *queue = to_rx_queue(kobj);
934
935	if (!attribute->show)
936	return -EIO;
937
938	return attribute->show(queue, buf);
939	}
940
941	static ssize_t rx_queue_attr_store(struct kobject kobj, struct* attribute *attr,
942	const char *buf, size_t count)
943	{
944	const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
945	struct netdev_rx_queue *queue = to_rx_queue(kobj);
946
947	if (!attribute->store)
948	return -EIO;
949
950	return attribute->store(queue, buf, count);
951	}
952
953	static const struct sysfs_ops rx_queue_sysfs_ops = {
954	.show = rx_queue_attr_show,
955	.store = rx_queue_attr_store,
956	};
957
958	#ifdef CONFIG_RPS
959	static ssize_t show_rps_map(struct netdev_rx_queue queue, char* *buf)
960	{
961	struct rps_map *map;
962	cpumask_var_t mask;
963	int i, len;
964
965	if (!zalloc_cpumask_var(mask: &mask, GFP_KERNEL))
966	return -ENOMEM;
967
968	rcu_read_lock();
969	map = rcu_dereference(queue->rps_map);
970	if (map)
971	for (i = `0`; i < map->len; i++)
972	cpumask_set_cpu(cpu: map->cpus[i], dstp: mask);
973
974	len = sysfs_emit(buf, fmt: "%*pb\n", cpumask_pr_args(mask));
975	rcu_read_unlock();
976	free_cpumask_var(mask);
977
978	return len < PAGE_SIZE ? len : -EINVAL;
979	}
980
981	static int netdev_rx_queue_set_rps_mask(struct netdev_rx_queue *queue,
982	cpumask_var_t mask)
983	{
984	static DEFINE_MUTEX(rps_map_mutex);
985	struct rps_map old_map, map;
986	int cpu, i;
987
988	map = kzalloc(max_t(unsigned int,
989	RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
990	GFP_KERNEL);
991	if (!map)
992	return -ENOMEM;
993
994	i = `0`;
995	for_each_cpu_and(cpu, mask, cpu_online_mask)
996	map->cpus[i++] = cpu;
997
998	if (i) {
999	map->len = i;
1000	} else {
1001	kfree(objp: map);
1002	map = NULL;
1003	}
1004
1005	mutex_lock(lock: &rps_map_mutex);
1006	old_map = rcu_dereference_protected(queue->rps_map,
1007	mutex_is_locked(&rps_map_mutex));
1008	rcu_assign_pointer(queue->rps_map, map);
1009
1010	if (map)
1011	static_branch_inc(&rps_needed);
1012	if (old_map)
1013	static_branch_dec(&rps_needed);
1014
1015	mutex_unlock(lock: &rps_map_mutex);
1016
1017	if (old_map)
1018	kfree_rcu(old_map, rcu);
1019	return `0`;
1020	}
1021
1022	int rps_cpumask_housekeeping(struct cpumask *mask)
1023	{
1024	if (!cpumask_empty(srcp: mask)) {
1025	cpumask_and(dstp: mask, src1p: mask, src2p: housekeeping_cpumask(type: HK_TYPE_DOMAIN));
1026	cpumask_and(dstp: mask, src1p: mask, src2p: housekeeping_cpumask(type: HK_TYPE_WQ));
1027	if (cpumask_empty(srcp: mask))
1028	return -EINVAL;
1029	}
1030	return `0`;
1031	}
1032
1033	static ssize_t store_rps_map(struct netdev_rx_queue *queue,
1034	const char *buf, size_t len)
1035	{
1036	cpumask_var_t mask;
1037	int err;
1038
1039	if (!capable(CAP_NET_ADMIN))
1040	return -EPERM;
1041
1042	if (!alloc_cpumask_var(mask: &mask, GFP_KERNEL))
1043	return -ENOMEM;
1044
1045	err = bitmap_parse(buf, buflen: len, cpumask_bits(mask), nr_cpumask_bits);
1046	if (err)
1047	goto out;
1048
1049	err = rps_cpumask_housekeeping(mask);
1050	if (err)
1051	goto out;
1052
1053	err = netdev_rx_queue_set_rps_mask(queue, mask);
1054
1055	out:
1056	free_cpumask_var(mask);
1057	return err ? : len;
1058	}
1059
1060	static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
1061	char *buf)
1062	{
1063	struct rps_dev_flow_table *flow_table;
1064	unsigned long val = `0`;
1065
1066	rcu_read_lock();
1067	flow_table = rcu_dereference(queue->rps_flow_table);
1068	if (flow_table)
1069	val = `1UL` << flow_table->log;
1070	rcu_read_unlock();
1071
1072	return sysfs_emit(buf, fmt: "%lu\n", val);
1073	}
1074
1075	static void rps_dev_flow_table_release(struct rcu_head *rcu)
1076	{
1077	struct rps_dev_flow_table *table = container_of(rcu,
1078	struct rps_dev_flow_table, rcu);
1079	vfree(addr: table);
1080	}
1081
1082	static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
1083	const char *buf, size_t len)
1084	{
1085	unsigned long mask, count;
1086	struct rps_dev_flow_table table, old_table;
1087	static DEFINE_SPINLOCK(rps_dev_flow_lock);
1088	int rc;
1089
1090	if (!capable(CAP_NET_ADMIN))
1091	return -EPERM;
1092
1093	rc = kstrtoul(s: buf, base: `0`, res: &count);
1094	if (rc < `0`)
1095	return rc;
1096
1097	if (count) {
1098	mask = count - `1`;
1099	/ mask = roundup_pow_of_two(count) - 1;*
1100	* without overflows...
1101	*/
1102	while ((mask \| (mask >> `1`)) != mask)
1103	mask \|= (mask >> `1`);
1104	/ On 64 bit arches, must check mask fits in table->mask (u32),*
1105	* and on 32bit arches, must check
1106	* RPS_DEV_FLOW_TABLE_SIZE(mask + 1) doesn't overflow.
1107	*/
1108	#if BITS_PER_LONG > 32
1109	if (mask > (unsigned long)(u32)mask)
1110	return -EINVAL;
1111	#else
1112	if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(`1`))
1113	/ sizeof(struct rps_dev_flow)) {
1114	/ Enforce a limit to prevent overflow /
1115	return -EINVAL;
1116	}
1117	#endif
1118	table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + `1`));
1119	if (!table)
1120	return -ENOMEM;
1121
1122	table->log = ilog2(mask) + `1`;
1123	for (count = `0`; count <= mask; count++) {
1124	table->flows[count].cpu = RPS_NO_CPU;
1125	table->flows[count].filter = RPS_NO_FILTER;
1126	}
1127	} else {
1128	table = NULL;
1129	}
1130
1131	spin_lock(lock: &rps_dev_flow_lock);
1132	old_table = rcu_dereference_protected(queue->rps_flow_table,
1133	lockdep_is_held(&rps_dev_flow_lock));
1134	rcu_assign_pointer(queue->rps_flow_table, table);
1135	spin_unlock(lock: &rps_dev_flow_lock);
1136
1137	if (old_table)
1138	call_rcu(head: &old_table->rcu, func: rps_dev_flow_table_release);
1139
1140	return len;
1141	}
1142
1143	static struct rx_queue_attribute rps_cpus_attribute __ro_after_init
1144	= __ATTR(rps_cpus, `0644`, show_rps_map, store_rps_map);
1145
1146	static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init
1147	= __ATTR(rps_flow_cnt, `0644`,
1148	show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
1149	#endif /* CONFIG_RPS */
1150
1151	static struct attribute *rx_queue_default_attrs[] __ro_after_init = {
1152	#ifdef CONFIG_RPS
1153	&rps_cpus_attribute.attr,
1154	&rps_dev_flow_table_cnt_attribute.attr,
1155	#endif
1156	NULL
1157	};
1158	ATTRIBUTE_GROUPS(rx_queue_default);
1159
1160	static void rx_queue_release(struct kobject *kobj)
1161	{
1162	struct netdev_rx_queue *queue = to_rx_queue(kobj);
1163	#ifdef CONFIG_RPS
1164	struct rps_map *map;
1165	struct rps_dev_flow_table *flow_table;
1166
1167	map = rcu_dereference_protected(queue->rps_map, `1`);
1168	if (map) {
1169	RCU_INIT_POINTER(queue->rps_map, NULL);
1170	kfree_rcu(map, rcu);
1171	}
1172
1173	flow_table = rcu_dereference_protected(queue->rps_flow_table, `1`);
1174	if (flow_table) {
1175	RCU_INIT_POINTER(queue->rps_flow_table, NULL);
1176	call_rcu(head: &flow_table->rcu, func: rps_dev_flow_table_release);
1177	}
1178	#endif
1179
1180	memset(s: kobj, c: `0`, n: sizeof(*kobj));
1181	netdev_put(dev: queue->dev, tracker: &queue->dev_tracker);
1182	}
1183
1184	static const void rx_queue_namespace(const* struct kobject *kobj)
1185	{
1186	struct netdev_rx_queue *queue = to_rx_queue(kobj);
1187	struct device *dev = &queue->dev->dev;
1188	const void *ns = NULL;
1189
1190	if (dev->class && dev->class->namespace)
1191	ns = dev->class->namespace(dev);
1192
1193	return ns;
1194	}
1195
1196	static void rx_queue_get_ownership(const struct kobject *kobj,
1197	kuid_t uid, kgid_t gid)
1198	{
1199	const struct net *net = rx_queue_namespace(kobj);
1200
1201	net_ns_get_ownership(net, uid, gid);
1202	}
1203
1204	static const struct kobj_type rx_queue_ktype = {
1205	.sysfs_ops = &rx_queue_sysfs_ops,
1206	.release = rx_queue_release,
1207	.namespace = rx_queue_namespace,
1208	.get_ownership = rx_queue_get_ownership,
1209	};
1210
1211	static int rx_queue_default_mask(struct net_device *dev,
1212	struct netdev_rx_queue *queue)
1213	{
1214	#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
1215	struct cpumask *rps_default_mask;
1216	int res = `0`;
1217
1218	mutex_lock(lock: &rps_default_mask_mutex);
1219
1220	rps_default_mask = dev_net(dev)->core.rps_default_mask;
1221	if (rps_default_mask && !cpumask_empty(srcp: rps_default_mask))
1222	res = netdev_rx_queue_set_rps_mask(queue, mask: rps_default_mask);
1223
1224	mutex_unlock(lock: &rps_default_mask_mutex);
1225
1226	return res;
1227	#else
1228	return `0`;
1229	#endif
1230	}
1231
1232	static int rx_queue_add_kobject(struct net_device dev, int* index)
1233	{
1234	struct netdev_rx_queue *queue = dev->_rx + index;
1235	struct kobject *kobj = &queue->kobj;
1236	int error = `0`;
1237
1238	/ Rx queues are cleared in rx_queue_release to allow later*
1239	* re-registration. This is triggered when their kobj refcount is
1240	* dropped.
1241	*
1242	* If a queue is removed while both a read (or write) operation and a
1243	* the re-addition of the same queue are pending (waiting on rntl_lock)
1244	* it might happen that the re-addition will execute before the read,
1245	* making the initial removal to never happen (queue's kobj refcount
1246	* won't drop enough because of the pending read). In such rare case,
1247	* return to allow the removal operation to complete.
1248	*/
1249	if (unlikely(kobj->state_initialized)) {
1250	netdev_warn_once(dev, "Cannot re-add rx queues before their removal completed");
1251	return -EAGAIN;
1252	}
1253
1254	/ Kobject_put later will trigger rx_queue_release call which*
1255	* decreases dev refcount: Take that reference here
1256	*/
1257	netdev_hold(dev: queue->dev, tracker: &queue->dev_tracker, GFP_KERNEL);
1258
1259	kobj->kset = dev->queues_kset;
1260	error = kobject_init_and_add(kobj, ktype: &rx_queue_ktype, NULL,
1261	fmt: "rx-%u", index);
1262	if (error)
1263	goto err;
1264
1265	queue->groups = rx_queue_default_groups;
1266	error = sysfs_create_groups(kobj, groups: queue->groups);
1267	if (error)
1268	goto err;
1269
1270	if (dev->sysfs_rx_queue_group) {
1271	error = sysfs_create_group(kobj, grp: dev->sysfs_rx_queue_group);
1272	if (error)
1273	goto err_default_groups;
1274	}
1275
1276	error = rx_queue_default_mask(dev, queue);
1277	if (error)
1278	goto err_default_groups;
1279
1280	kobject_uevent(kobj, action: KOBJ_ADD);
1281
1282	return error;
1283
1284	err_default_groups:
1285	sysfs_remove_groups(kobj, groups: queue->groups);
1286	err:
1287	kobject_put(kobj);
1288	return error;
1289	}
1290
1291	static int rx_queue_change_owner(struct net_device dev, int* index, kuid_t kuid,
1292	kgid_t kgid)
1293	{
1294	struct netdev_rx_queue *queue = dev->_rx + index;
1295	struct kobject *kobj = &queue->kobj;
1296	int error;
1297
1298	error = sysfs_change_owner(kobj, kuid, kgid);
1299	if (error)
1300	return error;
1301
1302	if (dev->sysfs_rx_queue_group)
1303	error = sysfs_group_change_owner(
1304	kobj, groups: dev->sysfs_rx_queue_group, kuid, kgid);
1305
1306	return error;
1307	}
1308	#endif /* CONFIG_SYSFS */
1309
1310	int
1311	net_rx_queue_update_kobjects(struct net_device dev, int* old_num, int new_num)
1312	{
1313	#ifdef CONFIG_SYSFS
1314	int i;
1315	int error = `0`;
1316
1317	#ifndef CONFIG_RPS
1318	if (!dev->sysfs_rx_queue_group)
1319	return `0`;
1320	#endif
1321	for (i = old_num; i < new_num; i++) {
1322	error = rx_queue_add_kobject(dev, index: i);
1323	if (error) {
1324	new_num = old_num;
1325	break;
1326	}
1327	}
1328
1329	while (--i >= new_num) {
1330	struct netdev_rx_queue *queue = &dev->_rx[i];
1331	struct kobject *kobj = &queue->kobj;
1332
1333	if (!check_net(net: dev_net(dev)))
1334	kobj->uevent_suppress = `1`;
1335	if (dev->sysfs_rx_queue_group)
1336	sysfs_remove_group(kobj, grp: dev->sysfs_rx_queue_group);
1337	sysfs_remove_groups(kobj, groups: queue->groups);
1338	kobject_put(kobj);
1339	}
1340
1341	return error;
1342	#else
1343	return `0`;
1344	#endif
1345	}
1346
1347	static int net_rx_queue_change_owner(struct net_device dev, int* num,
1348	kuid_t kuid, kgid_t kgid)
1349	{
1350	#ifdef CONFIG_SYSFS
1351	int error = `0`;
1352	int i;
1353
1354	#ifndef CONFIG_RPS
1355	if (!dev->sysfs_rx_queue_group)
1356	return `0`;
1357	#endif
1358	for (i = `0`; i < num; i++) {
1359	error = rx_queue_change_owner(dev, index: i, kuid, kgid);
1360	if (error)
1361	break;
1362	}
1363
1364	return error;
1365	#else
1366	return `0`;
1367	#endif
1368	}
1369
1370	#ifdef CONFIG_SYSFS
1371	/*
1372	* netdev_queue sysfs structures and functions.
1373	*/
1374	struct netdev_queue_attribute {
1375	struct attribute attr;
1376	ssize_t (show)(struct* kobject kobj, struct* attribute *attr,
1377	struct netdev_queue queue, char* *buf);
1378	ssize_t (store)(struct* kobject kobj, struct* attribute *attr,
1379	struct netdev_queue queue, const* char *buf,
1380	size_t len);
1381	};
1382	#define to_netdev_queue_attr(_attr) \
1383	container_of(_attr, struct netdev_queue_attribute, attr)
1384
1385	#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
1386
1387	static ssize_t netdev_queue_attr_show(struct kobject *kobj,
1388	struct attribute attr, char* *buf)
1389	{
1390	const struct netdev_queue_attribute *attribute
1391	= to_netdev_queue_attr(attr);
1392	struct netdev_queue *queue = to_netdev_queue(kobj);
1393
1394	if (!attribute->show)
1395	return -EIO;
1396
1397	return attribute->show(kobj, attr, queue, buf);
1398	}
1399
1400	static ssize_t netdev_queue_attr_store(struct kobject *kobj,
1401	struct attribute *attr,
1402	const char *buf, size_t count)
1403	{
1404	const struct netdev_queue_attribute *attribute
1405	= to_netdev_queue_attr(attr);
1406	struct netdev_queue *queue = to_netdev_queue(kobj);
1407
1408	if (!attribute->store)
1409	return -EIO;
1410
1411	return attribute->store(kobj, attr, queue, buf, count);
1412	}
1413
1414	static const struct sysfs_ops netdev_queue_sysfs_ops = {
1415	.show = netdev_queue_attr_show,
1416	.store = netdev_queue_attr_store,
1417	};
1418
1419	static ssize_t tx_timeout_show(struct kobject kobj, struct* attribute *attr,
1420	struct netdev_queue queue, char* *buf)
1421	{
1422	unsigned long trans_timeout = atomic_long_read(v: &queue->trans_timeout);
1423
1424	return sysfs_emit(buf, fmt: fmt_ulong, trans_timeout);
1425	}
1426
1427	static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
1428	{
1429	struct net_device *dev = queue->dev;
1430	unsigned int i;
1431
1432	i = queue - dev->_tx;
1433	BUG_ON(i >= dev->num_tx_queues);
1434
1435	return i;
1436	}
1437
1438	static ssize_t traffic_class_show(struct kobject kobj, struct* attribute *attr,
1439	struct netdev_queue queue, char* *buf)
1440	{
1441	struct net_device *dev = queue->dev;
1442	int num_tc, tc, index, ret;
1443
1444	if (!netif_is_multiqueue(dev))
1445	return -ENOENT;
1446
1447	ret = sysfs_rtnl_lock(kobj, attr, ndev: queue->dev);
1448	if (ret)
1449	return ret;
1450
1451	index = get_netdev_queue_index(queue);
1452
1453	/ If queue belongs to subordinate dev use its TC mapping /
1454	dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
1455
1456	num_tc = dev->num_tc;
1457	tc = netdev_txq_to_tc(dev, txq: index);
1458
1459	rtnl_unlock();
1460
1461	if (tc < `0`)
1462	return -EINVAL;
1463
1464	/ We can report the traffic class one of two ways:*
1465	* Subordinate device traffic classes are reported with the traffic
1466	* class first, and then the subordinate class so for example TC0 on
1467	* subordinate device 2 will be reported as "0-2". If the queue
1468	* belongs to the root device it will be reported with just the
1469	* traffic class, so just "0" for TC 0 for example.
1470	*/
1471	return num_tc < `0` ? sysfs_emit(buf, fmt: "%d%d\n", tc, num_tc) :
1472	sysfs_emit(buf, fmt: "%d\n", tc);
1473	}
1474
1475	#ifdef CONFIG_XPS
1476	static ssize_t tx_maxrate_show(struct kobject kobj, struct* attribute *attr,
1477	struct netdev_queue queue, char* *buf)
1478	{
1479	return sysfs_emit(buf, fmt: "%lu\n", queue->tx_maxrate);
1480	}
1481
1482	static ssize_t tx_maxrate_store(struct kobject kobj, struct* attribute *attr,
1483	struct netdev_queue queue, const* char *buf,
1484	size_t len)
1485	{
1486	int err, index = get_netdev_queue_index(queue);
1487	struct net_device *dev = queue->dev;
1488	u32 rate = `0`;
1489
1490	if (!capable(CAP_NET_ADMIN))
1491	return -EPERM;
1492
1493	/ The check is also done later; this helps returning early without*
1494	* hitting the locking section below.
1495	*/
1496	if (!dev->netdev_ops->ndo_set_tx_maxrate)
1497	return -EOPNOTSUPP;
1498
1499	err = kstrtou32(s: buf, base: `10`, res: &rate);
1500	if (err < `0`)
1501	return err;
1502
1503	err = sysfs_rtnl_lock(kobj, attr, ndev: dev);
1504	if (err)
1505	return err;
1506
1507	err = -EOPNOTSUPP;
1508	netdev_lock_ops(dev);
1509	if (dev->netdev_ops->ndo_set_tx_maxrate)
1510	err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
1511	netdev_unlock_ops(dev);
1512
1513	if (!err) {
1514	queue->tx_maxrate = rate;
1515	rtnl_unlock();
1516	return len;
1517	}
1518
1519	rtnl_unlock();
1520	return err;
1521	}
1522
1523	static struct netdev_queue_attribute queue_tx_maxrate __ro_after_init
1524	= __ATTR_RW(tx_maxrate);
1525	#endif
1526
1527	static struct netdev_queue_attribute queue_trans_timeout __ro_after_init
1528	= __ATTR_RO(tx_timeout);
1529
1530	static struct netdev_queue_attribute queue_traffic_class __ro_after_init
1531	= __ATTR_RO(traffic_class);
1532
1533	#ifdef CONFIG_BQL
1534	/*
1535	* Byte queue limits sysfs structures and functions.
1536	*/
1537	static ssize_t bql_show(char buf, unsigned* int value)
1538	{
1539	return sysfs_emit(buf, fmt: "%u\n", value);
1540	}
1541
1542	static ssize_t bql_set(const char buf, const* size_t count,
1543	unsigned int *pvalue)
1544	{
1545	unsigned int value;
1546	int err;
1547
1548	if (!strcmp(buf, "max") \|\| !strcmp(buf, "max\n")) {
1549	value = DQL_MAX_LIMIT;
1550	} else {
1551	err = kstrtouint(s: buf, base: `10`, res: &value);
1552	if (err < `0`)
1553	return err;
1554	if (value > DQL_MAX_LIMIT)
1555	return -EINVAL;
1556	}
1557
1558	*pvalue = value;
1559
1560	return count;
1561	}
1562
1563	static ssize_t bql_show_hold_time(struct kobject kobj, struct* attribute *attr,
1564	struct netdev_queue queue, char* *buf)
1565	{
1566	struct dql *dql = &queue->dql;
1567
1568	return sysfs_emit(buf, fmt: "%u\n", jiffies_to_msecs(j: dql->slack_hold_time));
1569	}
1570
1571	static ssize_t bql_set_hold_time(struct kobject kobj, struct* attribute *attr,
1572	struct netdev_queue queue, const* char *buf,
1573	size_t len)
1574	{
1575	struct dql *dql = &queue->dql;
1576	unsigned int value;
1577	int err;
1578
1579	err = kstrtouint(s: buf, base: `10`, res: &value);
1580	if (err < `0`)
1581	return err;
1582
1583	dql->slack_hold_time = msecs_to_jiffies(m: value);
1584
1585	return len;
1586	}
1587
1588	static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
1589	= __ATTR(hold_time, `0644`,
1590	bql_show_hold_time, bql_set_hold_time);
1591
1592	static ssize_t bql_show_stall_thrs(struct kobject kobj, struct* attribute *attr,
1593	struct netdev_queue queue, char* *buf)
1594	{
1595	struct dql *dql = &queue->dql;
1596
1597	return sysfs_emit(buf, fmt: "%u\n", jiffies_to_msecs(j: dql->stall_thrs));
1598	}
1599
1600	static ssize_t bql_set_stall_thrs(struct kobject kobj, struct* attribute *attr,
1601	struct netdev_queue queue, const* char *buf,
1602	size_t len)
1603	{
1604	struct dql *dql = &queue->dql;
1605	unsigned int value;
1606	int err;
1607
1608	err = kstrtouint(s: buf, base: `10`, res: &value);
1609	if (err < `0`)
1610	return err;
1611
1612	value = msecs_to_jiffies(m: value);
1613	if (value && (value < `4` \|\| value > `4` / `2` * BITS_PER_LONG))
1614	return -ERANGE;
1615
1616	if (!dql->stall_thrs && value)
1617	dql->last_reap = jiffies;
1618	/ Force last_reap to be live /
1619	smp_wmb();
1620	dql->stall_thrs = value;
1621
1622	return len;
1623	}
1624
1625	static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init =
1626	__ATTR(stall_thrs, `0644`, bql_show_stall_thrs, bql_set_stall_thrs);
1627
1628	static ssize_t bql_show_stall_max(struct kobject kobj, struct* attribute *attr,
1629	struct netdev_queue queue, char* *buf)
1630	{
1631	return sysfs_emit(buf, fmt: "%u\n", READ_ONCE(queue->dql.stall_max));
1632	}
1633
1634	static ssize_t bql_set_stall_max(struct kobject kobj, struct* attribute *attr,
1635	struct netdev_queue queue, const* char *buf,
1636	size_t len)
1637	{
1638	WRITE_ONCE(queue->dql.stall_max, `0`);
1639	return len;
1640	}
1641
1642	static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init =
1643	__ATTR(stall_max, `0644`, bql_show_stall_max, bql_set_stall_max);
1644
1645	static ssize_t bql_show_stall_cnt(struct kobject kobj, struct* attribute *attr,
1646	struct netdev_queue queue, char* *buf)
1647	{
1648	struct dql *dql = &queue->dql;
1649
1650	return sysfs_emit(buf, fmt: "%lu\n", dql->stall_cnt);
1651	}
1652
1653	static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init =
1654	__ATTR(stall_cnt, `0444`, bql_show_stall_cnt, NULL);
1655
1656	static ssize_t bql_show_inflight(struct kobject kobj, struct* attribute *attr,
1657	struct netdev_queue queue, char* *buf)
1658	{
1659	struct dql *dql = &queue->dql;
1660
1661	return sysfs_emit(buf, fmt: "%u\n", dql->num_queued - dql->num_completed);
1662	}
1663
1664	static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
1665	__ATTR(inflight, `0444`, bql_show_inflight, NULL);
1666
1667	#define BQL_ATTR(NAME, FIELD) \
1668	static ssize_t bql_show_ ## NAME(struct kobject *kobj, \
1669	struct attribute *attr, \
1670	struct netdev_queue queue, char buf) \
1671	{ \
1672	return bql_show(buf, queue->dql.FIELD); \
1673	} \
1674	\
1675	static ssize_t bql_set_ ## NAME(struct kobject *kobj, \
1676	struct attribute *attr, \
1677	struct netdev_queue *queue, \
1678	const char *buf, size_t len) \
1679	{ \
1680	return bql_set(buf, len, &queue->dql.FIELD); \
1681	} \
1682	\
1683	static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \
1684	= __ATTR(NAME, 0644, \
1685	bql_show_ ## NAME, bql_set_ ## NAME)
1686
1687	BQL_ATTR(limit, limit);
1688	BQL_ATTR(limit_max, max_limit);
1689	BQL_ATTR(limit_min, min_limit);
1690
1691	static struct attribute *dql_attrs[] __ro_after_init = {
1692	&bql_limit_attribute.attr,
1693	&bql_limit_max_attribute.attr,
1694	&bql_limit_min_attribute.attr,
1695	&bql_hold_time_attribute.attr,
1696	&bql_inflight_attribute.attr,
1697	&bql_stall_thrs_attribute.attr,
1698	&bql_stall_cnt_attribute.attr,
1699	&bql_stall_max_attribute.attr,
1700	NULL
1701	};
1702
1703	static const struct attribute_group dql_group = {
1704	.name = "byte_queue_limits",
1705	.attrs = dql_attrs,
1706	};
1707	#else
1708	/ Fake declaration, all the code using it should be dead /
1709	static const struct attribute_group dql_group = {};
1710	#endif /* CONFIG_BQL */
1711
1712	#ifdef CONFIG_XPS
1713	static ssize_t xps_queue_show(struct net_device dev, unsigned* int index,
1714	int tc, char buf, enum* xps_map_type type)
1715	{
1716	struct xps_dev_maps *dev_maps;
1717	unsigned long *mask;
1718	unsigned int nr_ids;
1719	int j, len;
1720
1721	rcu_read_lock();
1722	dev_maps = rcu_dereference(dev->xps_maps[type]);
1723
1724	/ Default to nr_cpu_ids/dev->num_rx_queues and do not just return 0*
1725	* when dev_maps hasn't been allocated yet, to be backward compatible.
1726	*/
1727	nr_ids = dev_maps ? dev_maps->nr_ids :
1728	(type == XPS_CPUS ? nr_cpu_ids : dev->num_rx_queues);
1729
1730	mask = bitmap_zalloc(nbits: nr_ids, GFP_NOWAIT);
1731	if (!mask) {
1732	rcu_read_unlock();
1733	return -ENOMEM;
1734	}
1735
1736	if (!dev_maps \|\| tc >= dev_maps->num_tc)
1737	goto out_no_maps;
1738
1739	for (j = `0`; j < nr_ids; j++) {
1740	int i, tci = j * dev_maps->num_tc + tc;
1741	struct xps_map *map;
1742
1743	map = rcu_dereference(dev_maps->attr_map[tci]);
1744	if (!map)
1745	continue;
1746
1747	for (i = map->len; i--;) {
1748	if (map->queues[i] == index) {
1749	__set_bit(j, mask);
1750	break;
1751	}
1752	}
1753	}
1754	out_no_maps:
1755	rcu_read_unlock();
1756
1757	len = bitmap_print_to_pagebuf(list: false, buf, maskp: mask, nmaskbits: nr_ids);
1758	bitmap_free(bitmap: mask);
1759
1760	return len < PAGE_SIZE ? len : -EINVAL;
1761	}
1762
1763	static ssize_t xps_cpus_show(struct kobject kobj, struct* attribute *attr,
1764	struct netdev_queue queue, char* *buf)
1765	{
1766	struct net_device *dev = queue->dev;
1767	unsigned int index;
1768	int len, tc, ret;
1769
1770	if (!netif_is_multiqueue(dev))
1771	return -ENOENT;
1772
1773	index = get_netdev_queue_index(queue);
1774
1775	ret = sysfs_rtnl_lock(kobj, attr, ndev: queue->dev);
1776	if (ret)
1777	return ret;
1778
1779	/ If queue belongs to subordinate dev use its map /
1780	dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
1781
1782	tc = netdev_txq_to_tc(dev, txq: index);
1783	if (tc < `0`) {
1784	rtnl_unlock();
1785	return -EINVAL;
1786	}
1787
1788	/ Increase the net device refcnt to make sure it won't be freed while*
1789	* xps_queue_show is running.
1790	*/
1791	dev_hold(dev);
1792	rtnl_unlock();
1793
1794	len = xps_queue_show(dev, index, tc, buf, type: XPS_CPUS);
1795
1796	dev_put(dev);
1797	return len;
1798	}
1799
1800	static ssize_t xps_cpus_store(struct kobject kobj, struct* attribute *attr,
1801	struct netdev_queue queue, const* char *buf,
1802	size_t len)
1803	{
1804	struct net_device *dev = queue->dev;
1805	unsigned int index;
1806	cpumask_var_t mask;
1807	int err;
1808
1809	if (!netif_is_multiqueue(dev))
1810	return -ENOENT;
1811
1812	if (!capable(CAP_NET_ADMIN))
1813	return -EPERM;
1814
1815	if (!alloc_cpumask_var(mask: &mask, GFP_KERNEL))
1816	return -ENOMEM;
1817
1818	index = get_netdev_queue_index(queue);
1819
1820	err = bitmap_parse(buf, buflen: len, cpumask_bits(mask), nr_cpumask_bits);
1821	if (err) {
1822	free_cpumask_var(mask);
1823	return err;
1824	}
1825
1826	err = sysfs_rtnl_lock(kobj, attr, ndev: dev);
1827	if (err) {
1828	free_cpumask_var(mask);
1829	return err;
1830	}
1831
1832	err = netif_set_xps_queue(dev, mask, index);
1833	rtnl_unlock();
1834
1835	free_cpumask_var(mask);
1836
1837	return err ? : len;
1838	}
1839
1840	static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
1841	= __ATTR_RW(xps_cpus);
1842
1843	static ssize_t xps_rxqs_show(struct kobject kobj, struct* attribute *attr,
1844	struct netdev_queue queue, char* *buf)
1845	{
1846	struct net_device *dev = queue->dev;
1847	unsigned int index;
1848	int tc, ret;
1849
1850	index = get_netdev_queue_index(queue);
1851
1852	ret = sysfs_rtnl_lock(kobj, attr, ndev: dev);
1853	if (ret)
1854	return ret;
1855
1856	tc = netdev_txq_to_tc(dev, txq: index);
1857
1858	/ Increase the net device refcnt to make sure it won't be freed while*
1859	* xps_queue_show is running.
1860	*/
1861	dev_hold(dev);
1862	rtnl_unlock();
1863
1864	ret = tc >= `0` ? xps_queue_show(dev, index, tc, buf, type: XPS_RXQS) : -EINVAL;
1865	dev_put(dev);
1866	return ret;
1867	}
1868
1869	static ssize_t xps_rxqs_store(struct kobject kobj, struct* attribute *attr,
1870	struct netdev_queue queue, const* char *buf,
1871	size_t len)
1872	{
1873	struct net_device *dev = queue->dev;
1874	struct net *net = dev_net(dev);
1875	unsigned long *mask;
1876	unsigned int index;
1877	int err;
1878
1879	if (!ns_capable(ns: net->user_ns, CAP_NET_ADMIN))
1880	return -EPERM;
1881
1882	mask = bitmap_zalloc(nbits: dev->num_rx_queues, GFP_KERNEL);
1883	if (!mask)
1884	return -ENOMEM;
1885
1886	index = get_netdev_queue_index(queue);
1887
1888	err = bitmap_parse(buf, buflen: len, dst: mask, nbits: dev->num_rx_queues);
1889	if (err) {
1890	bitmap_free(bitmap: mask);
1891	return err;
1892	}
1893
1894	err = sysfs_rtnl_lock(kobj, attr, ndev: dev);
1895	if (err) {
1896	bitmap_free(bitmap: mask);
1897	return err;
1898	}
1899
1900	cpus_read_lock();
1901	err = __netif_set_xps_queue(dev, mask, index, type: XPS_RXQS);
1902	cpus_read_unlock();
1903
1904	rtnl_unlock();
1905
1906	bitmap_free(bitmap: mask);
1907	return err ? : len;
1908	}
1909
1910	static struct netdev_queue_attribute xps_rxqs_attribute __ro_after_init
1911	= __ATTR_RW(xps_rxqs);
1912	#endif /* CONFIG_XPS */
1913
1914	static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
1915	&queue_trans_timeout.attr,
1916	&queue_traffic_class.attr,
1917	#ifdef CONFIG_XPS
1918	&xps_cpus_attribute.attr,
1919	&xps_rxqs_attribute.attr,
1920	&queue_tx_maxrate.attr,
1921	#endif
1922	NULL
1923	};
1924	ATTRIBUTE_GROUPS(netdev_queue_default);
1925
1926	static void netdev_queue_release(struct kobject *kobj)
1927	{
1928	struct netdev_queue *queue = to_netdev_queue(kobj);
1929
1930	memset(s: kobj, c: `0`, n: sizeof(*kobj));
1931	netdev_put(dev: queue->dev, tracker: &queue->dev_tracker);
1932	}
1933
1934	static const void netdev_queue_namespace(const* struct kobject *kobj)
1935	{
1936	struct netdev_queue *queue = to_netdev_queue(kobj);
1937	struct device *dev = &queue->dev->dev;
1938	const void *ns = NULL;
1939
1940	if (dev->class && dev->class->namespace)
1941	ns = dev->class->namespace(dev);
1942
1943	return ns;
1944	}
1945
1946	static void netdev_queue_get_ownership(const struct kobject *kobj,
1947	kuid_t uid, kgid_t gid)
1948	{
1949	const struct net *net = netdev_queue_namespace(kobj);
1950
1951	net_ns_get_ownership(net, uid, gid);
1952	}
1953
1954	static const struct kobj_type netdev_queue_ktype = {
1955	.sysfs_ops = &netdev_queue_sysfs_ops,
1956	.release = netdev_queue_release,
1957	.namespace = netdev_queue_namespace,
1958	.get_ownership = netdev_queue_get_ownership,
1959	};
1960
1961	static bool netdev_uses_bql(const struct net_device *dev)
1962	{
1963	if (dev->lltx \|\| (dev->priv_flags & IFF_NO_QUEUE))
1964	return false;
1965
1966	return IS_ENABLED(CONFIG_BQL);
1967	}
1968
1969	static int netdev_queue_add_kobject(struct net_device dev, int* index)
1970	{
1971	struct netdev_queue *queue = dev->_tx + index;
1972	struct kobject *kobj = &queue->kobj;
1973	int error = `0`;
1974
1975	/ Tx queues are cleared in netdev_queue_release to allow later*
1976	* re-registration. This is triggered when their kobj refcount is
1977	* dropped.
1978	*
1979	* If a queue is removed while both a read (or write) operation and a
1980	* the re-addition of the same queue are pending (waiting on rntl_lock)
1981	* it might happen that the re-addition will execute before the read,
1982	* making the initial removal to never happen (queue's kobj refcount
1983	* won't drop enough because of the pending read). In such rare case,
1984	* return to allow the removal operation to complete.
1985	*/
1986	if (unlikely(kobj->state_initialized)) {
1987	netdev_warn_once(dev, "Cannot re-add tx queues before their removal completed");
1988	return -EAGAIN;
1989	}
1990
1991	/ Kobject_put later will trigger netdev_queue_release call*
1992	* which decreases dev refcount: Take that reference here
1993	*/
1994	netdev_hold(dev: queue->dev, tracker: &queue->dev_tracker, GFP_KERNEL);
1995
1996	kobj->kset = dev->queues_kset;
1997	error = kobject_init_and_add(kobj, ktype: &netdev_queue_ktype, NULL,
1998	fmt: "tx-%u", index);
1999	if (error)
2000	goto err;
2001
2002	queue->groups = netdev_queue_default_groups;
2003	error = sysfs_create_groups(kobj, groups: queue->groups);
2004	if (error)
2005	goto err;
2006
2007	if (netdev_uses_bql(dev)) {
2008	error = sysfs_create_group(kobj, grp: &dql_group);
2009	if (error)
2010	goto err_default_groups;
2011	}
2012
2013	kobject_uevent(kobj, action: KOBJ_ADD);
2014	return `0`;
2015
2016	err_default_groups:
2017	sysfs_remove_groups(kobj, groups: queue->groups);
2018	err:
2019	kobject_put(kobj);
2020	return error;
2021	}
2022
2023	static int tx_queue_change_owner(struct net_device ndev, int* index,
2024	kuid_t kuid, kgid_t kgid)
2025	{
2026	struct netdev_queue *queue = ndev->_tx + index;
2027	struct kobject *kobj = &queue->kobj;
2028	int error;
2029
2030	error = sysfs_change_owner(kobj, kuid, kgid);
2031	if (error)
2032	return error;
2033
2034	if (netdev_uses_bql(dev: ndev))
2035	error = sysfs_group_change_owner(kobj, groups: &dql_group, kuid, kgid);
2036
2037	return error;
2038	}
2039	#endif /* CONFIG_SYSFS */
2040
2041	int
2042	netdev_queue_update_kobjects(struct net_device dev, int* old_num, int new_num)
2043	{
2044	#ifdef CONFIG_SYSFS
2045	int i;
2046	int error = `0`;
2047
2048	/ Tx queue kobjects are allowed to be updated when a device is being*
2049	* unregistered, but solely to remove queues from qdiscs. Any path
2050	* adding queues should be fixed.
2051	*/
2052	WARN(dev->reg_state == NETREG_UNREGISTERING && new_num > old_num,
2053	"New queues can't be registered after device unregistration.");
2054
2055	for (i = old_num; i < new_num; i++) {
2056	error = netdev_queue_add_kobject(dev, index: i);
2057	if (error) {
2058	new_num = old_num;
2059	break;
2060	}
2061	}
2062
2063	while (--i >= new_num) {
2064	struct netdev_queue *queue = dev->_tx + i;
2065
2066	if (!check_net(net: dev_net(dev)))
2067	queue->kobj.uevent_suppress = `1`;
2068
2069	if (netdev_uses_bql(dev))
2070	sysfs_remove_group(kobj: &queue->kobj, grp: &dql_group);
2071
2072	sysfs_remove_groups(kobj: &queue->kobj, groups: queue->groups);
2073	kobject_put(kobj: &queue->kobj);
2074	}
2075
2076	return error;
2077	#else
2078	return `0`;
2079	#endif /* CONFIG_SYSFS */
2080	}
2081
2082	static int net_tx_queue_change_owner(struct net_device dev, int* num,
2083	kuid_t kuid, kgid_t kgid)
2084	{
2085	#ifdef CONFIG_SYSFS
2086	int error = `0`;
2087	int i;
2088
2089	for (i = `0`; i < num; i++) {
2090	error = tx_queue_change_owner(ndev: dev, index: i, kuid, kgid);
2091	if (error)
2092	break;
2093	}
2094
2095	return error;
2096	#else
2097	return `0`;
2098	#endif /* CONFIG_SYSFS */
2099	}
2100
2101	static int register_queue_kobjects(struct net_device *dev)
2102	{
2103	int error = `0`, txq = `0`, rxq = `0`, real_rx = `0`, real_tx = `0`;
2104
2105	#ifdef CONFIG_SYSFS
2106	dev->queues_kset = kset_create_and_add(name: "queues",
2107	NULL, parent_kobj: &dev->dev.kobj);
2108	if (!dev->queues_kset)
2109	return -ENOMEM;
2110	real_rx = dev->real_num_rx_queues;
2111	#endif
2112	real_tx = dev->real_num_tx_queues;
2113
2114	error = net_rx_queue_update_kobjects(dev, old_num: `0`, new_num: real_rx);
2115	if (error)
2116	goto error;
2117	rxq = real_rx;
2118
2119	error = netdev_queue_update_kobjects(dev, old_num: `0`, new_num: real_tx);
2120	if (error)
2121	goto error;
2122	txq = real_tx;
2123
2124	return `0`;
2125
2126	error:
2127	netdev_queue_update_kobjects(dev, old_num: txq, new_num: `0`);
2128	net_rx_queue_update_kobjects(dev, old_num: rxq, new_num: `0`);
2129	#ifdef CONFIG_SYSFS
2130	kset_unregister(kset: dev->queues_kset);
2131	#endif
2132	return error;
2133	}
2134
2135	static int queue_change_owner(struct net_device *ndev, kuid_t kuid, kgid_t kgid)
2136	{
2137	int error = `0`, real_rx = `0`, real_tx = `0`;
2138
2139	#ifdef CONFIG_SYSFS
2140	if (ndev->queues_kset) {
2141	error = sysfs_change_owner(kobj: &ndev->queues_kset->kobj, kuid, kgid);
2142	if (error)
2143	return error;
2144	}
2145	real_rx = ndev->real_num_rx_queues;
2146	#endif
2147	real_tx = ndev->real_num_tx_queues;
2148
2149	error = net_rx_queue_change_owner(dev: ndev, num: real_rx, kuid, kgid);
2150	if (error)
2151	return error;
2152
2153	error = net_tx_queue_change_owner(dev: ndev, num: real_tx, kuid, kgid);
2154	if (error)
2155	return error;
2156
2157	return `0`;
2158	}
2159
2160	static void remove_queue_kobjects(struct net_device *dev)
2161	{
2162	int real_rx = `0`, real_tx = `0`;
2163
2164	#ifdef CONFIG_SYSFS
2165	real_rx = dev->real_num_rx_queues;
2166	#endif
2167	real_tx = dev->real_num_tx_queues;
2168
2169	net_rx_queue_update_kobjects(dev, old_num: real_rx, new_num: `0`);
2170	netdev_queue_update_kobjects(dev, old_num: real_tx, new_num: `0`);
2171
2172	netdev_lock_ops(dev);
2173	dev->real_num_rx_queues = `0`;
2174	dev->real_num_tx_queues = `0`;
2175	netdev_unlock_ops(dev);
2176	#ifdef CONFIG_SYSFS
2177	kset_unregister(kset: dev->queues_kset);
2178	#endif
2179	}
2180
2181	static bool net_current_may_mount(void)
2182	{
2183	struct net *net = current->nsproxy->net_ns;
2184
2185	return ns_capable(ns: net->user_ns, CAP_SYS_ADMIN);
2186	}
2187
2188	static void net_grab_current_ns(void*)
2189	{
2190	struct net *ns = current->nsproxy->net_ns;
2191	#ifdef CONFIG_NET_NS
2192	if (ns)
2193	refcount_inc(r: &ns->passive);
2194	#endif
2195	return ns;
2196	}
2197
2198	static const void net_initial_ns(void*)
2199	{
2200	return &init_net;
2201	}
2202
2203	static const void net_netlink_ns(struct* sock *sk)
2204	{
2205	return sock_net(sk);
2206	}
2207
2208	const struct kobj_ns_type_operations net_ns_type_operations = {
2209	.type = KOBJ_NS_TYPE_NET,
2210	.current_may_mount = net_current_may_mount,
2211	.grab_current_ns = net_grab_current_ns,
2212	.netlink_ns = net_netlink_ns,
2213	.initial_ns = net_initial_ns,
2214	.drop_ns = net_drop_ns,
2215	};
2216	EXPORT_SYMBOL_GPL(net_ns_type_operations);
2217
2218	static int netdev_uevent(const struct device d, struct* kobj_uevent_env *env)
2219	{
2220	const struct net_device *dev = to_net_dev(d);
2221	int retval;
2222
2223	/ pass interface to uevent. /
2224	retval = add_uevent_var(env, format: "INTERFACE=%s", dev->name);
2225	if (retval)
2226	goto exit;
2227
2228	/ pass ifindex to uevent.*
2229	* ifindex is useful as it won't change (interface name may change)
2230	* and is what RtNetlink uses natively.
2231	*/
2232	retval = add_uevent_var(env, format: "IFINDEX=%d", dev->ifindex);
2233
2234	exit:
2235	return retval;
2236	}
2237
2238	/*
2239	* netdev_release -- destroy and free a dead device.
2240	* Called when last reference to device kobject is gone.
2241	*/
2242	static void netdev_release(struct device *d)
2243	{
2244	struct net_device *dev = to_net_dev(d);
2245
2246	BUG_ON(dev->reg_state != NETREG_RELEASED);
2247
2248	/ no need to wait for rcu grace period:*
2249	* device is dead and about to be freed.
2250	*/
2251	kfree(rcu_access_pointer(dev->ifalias));
2252	kvfree(addr: dev);
2253	}
2254
2255	static const void net_namespace(const* struct device *d)
2256	{
2257	const struct net_device *dev = to_net_dev(d);
2258
2259	return dev_net(dev);
2260	}
2261
2262	static void net_get_ownership(const struct device d, kuid_t uid, kgid_t *gid)
2263	{
2264	const struct net_device *dev = to_net_dev(d);
2265	const struct net *net = dev_net(dev);
2266
2267	net_ns_get_ownership(net, uid, gid);
2268	}
2269
2270	static const struct class net_class = {
2271	.name = "net",
2272	.dev_release = netdev_release,
2273	.dev_groups = net_class_groups,
2274	.dev_uevent = netdev_uevent,
2275	.ns_type = &net_ns_type_operations,
2276	.namespace = net_namespace,
2277	.get_ownership = net_get_ownership,
2278	};
2279
2280	#ifdef CONFIG_OF
2281	static int of_dev_node_match(struct device dev, const* void *data)
2282	{
2283	for (; dev; dev = dev->parent) {
2284	if (dev->of_node == data)
2285	return `1`;
2286	}
2287
2288	return `0`;
2289	}
2290
2291	/*
2292	* of_find_net_device_by_node - lookup the net device for the device node
2293	* @np: OF device node
2294	*
2295	* Looks up the net_device structure corresponding with the device node.
2296	* If successful, returns a pointer to the net_device with the embedded
2297	* struct device refcount incremented by one, or NULL on failure. The
2298	* refcount must be dropped when done with the net_device.
2299	*/
2300	struct net_device of_find_net_device_by_node(struct* device_node *np)
2301	{
2302	struct device *dev;
2303
2304	dev = class_find_device(&net_class, NULL, np, of_dev_node_match);
2305	if (!dev)
2306	return NULL;
2307
2308	return to_net_dev(dev);
2309	}
2310	EXPORT_SYMBOL(of_find_net_device_by_node);
2311	#endif
2312
2313	/ Delete sysfs entries but hold kobject reference until after all*
2314	* netdev references are gone.
2315	*/
2316	void netdev_unregister_kobject(struct net_device *ndev)
2317	{
2318	struct device *dev = &ndev->dev;
2319
2320	if (!check_net(net: dev_net(dev: ndev)))
2321	dev_set_uevent_suppress(dev, val: `1`);
2322
2323	kobject_get(kobj: &dev->kobj);
2324
2325	remove_queue_kobjects(dev: ndev);
2326
2327	pm_runtime_set_memalloc_noio(dev, enable: false);
2328
2329	device_del(dev);
2330	}
2331
2332	/ Create sysfs entries for network device. /
2333	int netdev_register_kobject(struct net_device *ndev)
2334	{
2335	struct device *dev = &ndev->dev;
2336	const struct attribute_group **groups = ndev->sysfs_groups;
2337	int error = `0`;
2338
2339	device_initialize(dev);
2340	dev->class = &net_class;
2341	dev->platform_data = ndev;
2342	dev->groups = groups;
2343
2344	dev_set_name(dev, name: "%s", ndev->name);
2345
2346	#ifdef CONFIG_SYSFS
2347	/ Allow for a device specific group /
2348	if (*groups)
2349	groups++;
2350
2351	*groups++ = &netstat_group;
2352	*groups++ = &netdev_phys_group;
2353
2354	if (wireless_group_needed(ndev))
2355	*groups++ = &wireless_group;
2356	#endif /* CONFIG_SYSFS */
2357
2358	error = device_add(dev);
2359	if (error)
2360	return error;
2361
2362	error = register_queue_kobjects(dev: ndev);
2363	if (error) {
2364	device_del(dev);
2365	return error;
2366	}
2367
2368	pm_runtime_set_memalloc_noio(dev, enable: true);
2369
2370	return error;
2371	}
2372
2373	/ Change owner for sysfs entries when moving network devices across network*
2374	* namespaces owned by different user namespaces.
2375	*/
2376	int netdev_change_owner(struct net_device ndev, const* struct net *net_old,
2377	const struct net *net_new)
2378	{
2379	kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID;
2380	kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID;
2381	struct device *dev = &ndev->dev;
2382	int error;
2383
2384	net_ns_get_ownership(net: net_old, uid: &old_uid, gid: &old_gid);
2385	net_ns_get_ownership(net: net_new, uid: &new_uid, gid: &new_gid);
2386
2387	/ The network namespace was changed but the owning user namespace is*
2388	* identical so there's no need to change the owner of sysfs entries.
2389	*/
2390	if (uid_eq(left: old_uid, right: new_uid) && gid_eq(left: old_gid, right: new_gid))
2391	return `0`;
2392
2393	error = device_change_owner(dev, kuid: new_uid, kgid: new_gid);
2394	if (error)
2395	return error;
2396
2397	error = queue_change_owner(ndev, kuid: new_uid, kgid: new_gid);
2398	if (error)
2399	return error;
2400
2401	return `0`;
2402	}
2403
2404	int netdev_class_create_file_ns(const struct class_attribute *class_attr,
2405	const void *ns)
2406	{
2407	return class_create_file_ns(class: &net_class, attr: class_attr, ns);
2408	}
2409	EXPORT_SYMBOL(netdev_class_create_file_ns);
2410
2411	void netdev_class_remove_file_ns(const struct class_attribute *class_attr,
2412	const void *ns)
2413	{
2414	class_remove_file_ns(class: &net_class, attr: class_attr, ns);
2415	}
2416	EXPORT_SYMBOL(netdev_class_remove_file_ns);
2417
2418	int __init netdev_kobject_init(void)
2419	{
2420	kobj_ns_type_register(ops: &net_ns_type_operations);
2421	return class_register(class: &net_class);
2422	}
2423

Browse the source code of Linux/net/core/net-sysfs.c