| 1 | // SPDX-License-Identifier: GPL-2.0-or-later | 
|---|
| 2 | /* | 
|---|
| 3 | * Linux network device link state notification | 
|---|
| 4 | * | 
|---|
| 5 | * Author: | 
|---|
| 6 | *     Stefan Rompf <sux@loplof.de> | 
|---|
| 7 | */ | 
|---|
| 8 |  | 
|---|
| 9 | #include <linux/module.h> | 
|---|
| 10 | #include <linux/netdevice.h> | 
|---|
| 11 | #include <linux/if.h> | 
|---|
| 12 | #include <net/sock.h> | 
|---|
| 13 | #include <net/pkt_sched.h> | 
|---|
| 14 | #include <linux/rtnetlink.h> | 
|---|
| 15 | #include <linux/jiffies.h> | 
|---|
| 16 | #include <linux/spinlock.h> | 
|---|
| 17 | #include <linux/workqueue.h> | 
|---|
| 18 | #include <linux/bitops.h> | 
|---|
| 19 | #include <linux/types.h> | 
|---|
| 20 |  | 
|---|
| 21 | #include "dev.h" | 
|---|
| 22 |  | 
|---|
| 23 | enum lw_bits { | 
|---|
| 24 | LW_URGENT = 0, | 
|---|
| 25 | }; | 
|---|
| 26 |  | 
|---|
| 27 | static unsigned long linkwatch_flags; | 
|---|
| 28 | static unsigned long linkwatch_nextevent; | 
|---|
| 29 |  | 
|---|
| 30 | static void linkwatch_event(struct work_struct *dummy); | 
|---|
| 31 | static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); | 
|---|
| 32 |  | 
|---|
| 33 | static LIST_HEAD(lweventlist); | 
|---|
| 34 | static DEFINE_SPINLOCK(lweventlist_lock); | 
|---|
| 35 |  | 
|---|
| 36 | static unsigned int default_operstate(const struct net_device *dev) | 
|---|
| 37 | { | 
|---|
| 38 | if (netif_testing(dev)) | 
|---|
| 39 | return IF_OPER_TESTING; | 
|---|
| 40 |  | 
|---|
| 41 | /* Some uppers (DSA) have additional sources for being down, so | 
|---|
| 42 | * first check whether lower is indeed the source of its down state. | 
|---|
| 43 | */ | 
|---|
| 44 | if (!netif_carrier_ok(dev)) { | 
|---|
| 45 | struct net_device *peer; | 
|---|
| 46 | int iflink; | 
|---|
| 47 |  | 
|---|
| 48 | /* If called from netdev_run_todo()/linkwatch_sync_dev(), | 
|---|
| 49 | * dev_net(dev) can be already freed, and RTNL is not held. | 
|---|
| 50 | */ | 
|---|
| 51 | if (dev->reg_state <= NETREG_REGISTERED) | 
|---|
| 52 | iflink = dev_get_iflink(dev); | 
|---|
| 53 | else | 
|---|
| 54 | iflink = dev->ifindex; | 
|---|
| 55 |  | 
|---|
| 56 | if (iflink == dev->ifindex) | 
|---|
| 57 | return IF_OPER_DOWN; | 
|---|
| 58 |  | 
|---|
| 59 | ASSERT_RTNL(); | 
|---|
| 60 | peer = __dev_get_by_index(net: dev_net(dev), ifindex: iflink); | 
|---|
| 61 | if (!peer) | 
|---|
| 62 | return IF_OPER_DOWN; | 
|---|
| 63 |  | 
|---|
| 64 | return netif_carrier_ok(dev: peer) ? IF_OPER_DOWN : | 
|---|
| 65 | IF_OPER_LOWERLAYERDOWN; | 
|---|
| 66 | } | 
|---|
| 67 |  | 
|---|
| 68 | if (netif_dormant(dev)) | 
|---|
| 69 | return IF_OPER_DORMANT; | 
|---|
| 70 |  | 
|---|
| 71 | return IF_OPER_UP; | 
|---|
| 72 | } | 
|---|
| 73 |  | 
|---|
| 74 | static void rfc2863_policy(struct net_device *dev) | 
|---|
| 75 | { | 
|---|
| 76 | unsigned int operstate = default_operstate(dev); | 
|---|
| 77 |  | 
|---|
| 78 | if (operstate == READ_ONCE(dev->operstate)) | 
|---|
| 79 | return; | 
|---|
| 80 |  | 
|---|
| 81 | switch(dev->link_mode) { | 
|---|
| 82 | case IF_LINK_MODE_TESTING: | 
|---|
| 83 | if (operstate == IF_OPER_UP) | 
|---|
| 84 | operstate = IF_OPER_TESTING; | 
|---|
| 85 | break; | 
|---|
| 86 |  | 
|---|
| 87 | case IF_LINK_MODE_DORMANT: | 
|---|
| 88 | if (operstate == IF_OPER_UP) | 
|---|
| 89 | operstate = IF_OPER_DORMANT; | 
|---|
| 90 | break; | 
|---|
| 91 | case IF_LINK_MODE_DEFAULT: | 
|---|
| 92 | default: | 
|---|
| 93 | break; | 
|---|
| 94 | } | 
|---|
| 95 |  | 
|---|
| 96 | WRITE_ONCE(dev->operstate, operstate); | 
|---|
| 97 | } | 
|---|
| 98 |  | 
|---|
| 99 |  | 
|---|
| 100 | void linkwatch_init_dev(struct net_device *dev) | 
|---|
| 101 | { | 
|---|
| 102 | /* Handle pre-registration link state changes */ | 
|---|
| 103 | if (!netif_carrier_ok(dev) || netif_dormant(dev) || | 
|---|
| 104 | netif_testing(dev)) | 
|---|
| 105 | rfc2863_policy(dev); | 
|---|
| 106 | } | 
|---|
| 107 |  | 
|---|
| 108 |  | 
|---|
| 109 | static bool linkwatch_urgent_event(struct net_device *dev) | 
|---|
| 110 | { | 
|---|
| 111 | if (!netif_running(dev)) | 
|---|
| 112 | return false; | 
|---|
| 113 |  | 
|---|
| 114 | if (dev->ifindex != dev_get_iflink(dev)) | 
|---|
| 115 | return true; | 
|---|
| 116 |  | 
|---|
| 117 | if (netif_is_lag_port(dev) || netif_is_lag_master(dev)) | 
|---|
| 118 | return true; | 
|---|
| 119 |  | 
|---|
| 120 | return netif_carrier_ok(dev) &&	qdisc_tx_changing(dev); | 
|---|
| 121 | } | 
|---|
| 122 |  | 
|---|
| 123 |  | 
|---|
| 124 | static void linkwatch_add_event(struct net_device *dev) | 
|---|
| 125 | { | 
|---|
| 126 | unsigned long flags; | 
|---|
| 127 |  | 
|---|
| 128 | spin_lock_irqsave(&lweventlist_lock, flags); | 
|---|
| 129 | if (list_empty(head: &dev->link_watch_list)) { | 
|---|
| 130 | list_add_tail(new: &dev->link_watch_list, head: &lweventlist); | 
|---|
| 131 | netdev_hold(dev, tracker: &dev->linkwatch_dev_tracker, GFP_ATOMIC); | 
|---|
| 132 | } | 
|---|
| 133 | spin_unlock_irqrestore(lock: &lweventlist_lock, flags); | 
|---|
| 134 | } | 
|---|
| 135 |  | 
|---|
| 136 |  | 
|---|
| 137 | static void linkwatch_schedule_work(int urgent) | 
|---|
| 138 | { | 
|---|
| 139 | unsigned long delay = linkwatch_nextevent - jiffies; | 
|---|
| 140 |  | 
|---|
| 141 | if (test_bit(LW_URGENT, &linkwatch_flags)) | 
|---|
| 142 | return; | 
|---|
| 143 |  | 
|---|
| 144 | /* Minimise down-time: drop delay for up event. */ | 
|---|
| 145 | if (urgent) { | 
|---|
| 146 | if (test_and_set_bit(nr: LW_URGENT, addr: &linkwatch_flags)) | 
|---|
| 147 | return; | 
|---|
| 148 | delay = 0; | 
|---|
| 149 | } | 
|---|
| 150 |  | 
|---|
| 151 | /* If we wrap around we'll delay it by at most HZ. */ | 
|---|
| 152 | if (delay > HZ) | 
|---|
| 153 | delay = 0; | 
|---|
| 154 |  | 
|---|
| 155 | /* | 
|---|
| 156 | * If urgent, schedule immediate execution; otherwise, don't | 
|---|
| 157 | * override the existing timer. | 
|---|
| 158 | */ | 
|---|
| 159 | if (test_bit(LW_URGENT, &linkwatch_flags)) | 
|---|
| 160 | mod_delayed_work(wq: system_dfl_wq, dwork: &linkwatch_work, delay: 0); | 
|---|
| 161 | else | 
|---|
| 162 | queue_delayed_work(wq: system_dfl_wq, dwork: &linkwatch_work, delay); | 
|---|
| 163 | } | 
|---|
| 164 |  | 
|---|
| 165 |  | 
|---|
| 166 | static void linkwatch_do_dev(struct net_device *dev) | 
|---|
| 167 | { | 
|---|
| 168 | /* | 
|---|
| 169 | * Make sure the above read is complete since it can be | 
|---|
| 170 | * rewritten as soon as we clear the bit below. | 
|---|
| 171 | */ | 
|---|
| 172 | smp_mb__before_atomic(); | 
|---|
| 173 |  | 
|---|
| 174 | /* We are about to handle this device, | 
|---|
| 175 | * so new events can be accepted | 
|---|
| 176 | */ | 
|---|
| 177 | clear_bit(nr: __LINK_STATE_LINKWATCH_PENDING, addr: &dev->state); | 
|---|
| 178 |  | 
|---|
| 179 | rfc2863_policy(dev); | 
|---|
| 180 | if (dev->flags & IFF_UP) { | 
|---|
| 181 | if (netif_carrier_ok(dev)) | 
|---|
| 182 | dev_activate(dev); | 
|---|
| 183 | else | 
|---|
| 184 | dev_deactivate(dev); | 
|---|
| 185 |  | 
|---|
| 186 | netif_state_change(dev); | 
|---|
| 187 | } | 
|---|
| 188 | /* Note: our callers are responsible for calling netdev_tracker_free(). | 
|---|
| 189 | * This is the reason we use __dev_put() instead of dev_put(). | 
|---|
| 190 | */ | 
|---|
| 191 | __dev_put(dev); | 
|---|
| 192 | } | 
|---|
| 193 |  | 
|---|
| 194 | static void __linkwatch_run_queue(int urgent_only) | 
|---|
| 195 | { | 
|---|
| 196 | #define MAX_DO_DEV_PER_LOOP	100 | 
|---|
| 197 |  | 
|---|
| 198 | int do_dev = MAX_DO_DEV_PER_LOOP; | 
|---|
| 199 | /* Use a local list here since we add non-urgent | 
|---|
| 200 | * events back to the global one when called with | 
|---|
| 201 | * urgent_only=1. | 
|---|
| 202 | */ | 
|---|
| 203 | LIST_HEAD(wrk); | 
|---|
| 204 |  | 
|---|
| 205 | /* Give urgent case more budget */ | 
|---|
| 206 | if (urgent_only) | 
|---|
| 207 | do_dev += MAX_DO_DEV_PER_LOOP; | 
|---|
| 208 |  | 
|---|
| 209 | /* | 
|---|
| 210 | * Limit the number of linkwatch events to one | 
|---|
| 211 | * per second so that a runaway driver does not | 
|---|
| 212 | * cause a storm of messages on the netlink | 
|---|
| 213 | * socket.  This limit does not apply to up events | 
|---|
| 214 | * while the device qdisc is down. | 
|---|
| 215 | */ | 
|---|
| 216 | if (!urgent_only) | 
|---|
| 217 | linkwatch_nextevent = jiffies + HZ; | 
|---|
| 218 | /* Limit wrap-around effect on delay. */ | 
|---|
| 219 | else if (time_after(linkwatch_nextevent, jiffies + HZ)) | 
|---|
| 220 | linkwatch_nextevent = jiffies; | 
|---|
| 221 |  | 
|---|
| 222 | clear_bit(nr: LW_URGENT, addr: &linkwatch_flags); | 
|---|
| 223 |  | 
|---|
| 224 | spin_lock_irq(lock: &lweventlist_lock); | 
|---|
| 225 | list_splice_init(list: &lweventlist, head: &wrk); | 
|---|
| 226 |  | 
|---|
| 227 | while (!list_empty(head: &wrk) && do_dev > 0) { | 
|---|
| 228 | struct net_device *dev; | 
|---|
| 229 |  | 
|---|
| 230 | dev = list_first_entry(&wrk, struct net_device, link_watch_list); | 
|---|
| 231 | list_del_init(entry: &dev->link_watch_list); | 
|---|
| 232 |  | 
|---|
| 233 | if (!netif_device_present(dev) || | 
|---|
| 234 | (urgent_only && !linkwatch_urgent_event(dev))) { | 
|---|
| 235 | list_add_tail(new: &dev->link_watch_list, head: &lweventlist); | 
|---|
| 236 | continue; | 
|---|
| 237 | } | 
|---|
| 238 | /* We must free netdev tracker under | 
|---|
| 239 | * the spinlock protection. | 
|---|
| 240 | */ | 
|---|
| 241 | netdev_tracker_free(dev, tracker: &dev->linkwatch_dev_tracker); | 
|---|
| 242 | spin_unlock_irq(lock: &lweventlist_lock); | 
|---|
| 243 | netdev_lock_ops(dev); | 
|---|
| 244 | linkwatch_do_dev(dev); | 
|---|
| 245 | netdev_unlock_ops(dev); | 
|---|
| 246 | do_dev--; | 
|---|
| 247 | spin_lock_irq(lock: &lweventlist_lock); | 
|---|
| 248 | } | 
|---|
| 249 |  | 
|---|
| 250 | /* Add the remaining work back to lweventlist */ | 
|---|
| 251 | list_splice_init(list: &wrk, head: &lweventlist); | 
|---|
| 252 |  | 
|---|
| 253 | if (!list_empty(head: &lweventlist)) | 
|---|
| 254 | linkwatch_schedule_work(urgent: 0); | 
|---|
| 255 | spin_unlock_irq(lock: &lweventlist_lock); | 
|---|
| 256 | } | 
|---|
| 257 |  | 
|---|
| 258 | static bool linkwatch_clean_dev(struct net_device *dev) | 
|---|
| 259 | { | 
|---|
| 260 | unsigned long flags; | 
|---|
| 261 | bool clean = false; | 
|---|
| 262 |  | 
|---|
| 263 | spin_lock_irqsave(&lweventlist_lock, flags); | 
|---|
| 264 | if (!list_empty(head: &dev->link_watch_list)) { | 
|---|
| 265 | list_del_init(entry: &dev->link_watch_list); | 
|---|
| 266 | clean = true; | 
|---|
| 267 | /* We must release netdev tracker under | 
|---|
| 268 | * the spinlock protection. | 
|---|
| 269 | */ | 
|---|
| 270 | netdev_tracker_free(dev, tracker: &dev->linkwatch_dev_tracker); | 
|---|
| 271 | } | 
|---|
| 272 | spin_unlock_irqrestore(lock: &lweventlist_lock, flags); | 
|---|
| 273 |  | 
|---|
| 274 | return clean; | 
|---|
| 275 | } | 
|---|
| 276 |  | 
|---|
| 277 | void __linkwatch_sync_dev(struct net_device *dev) | 
|---|
| 278 | { | 
|---|
| 279 | netdev_ops_assert_locked(dev); | 
|---|
| 280 |  | 
|---|
| 281 | if (linkwatch_clean_dev(dev)) | 
|---|
| 282 | linkwatch_do_dev(dev); | 
|---|
| 283 | } | 
|---|
| 284 |  | 
|---|
| 285 | void linkwatch_sync_dev(struct net_device *dev) | 
|---|
| 286 | { | 
|---|
| 287 | if (linkwatch_clean_dev(dev)) { | 
|---|
| 288 | netdev_lock_ops(dev); | 
|---|
| 289 | linkwatch_do_dev(dev); | 
|---|
| 290 | netdev_unlock_ops(dev); | 
|---|
| 291 | } | 
|---|
| 292 | } | 
|---|
| 293 |  | 
|---|
| 294 | /* Must be called with the rtnl semaphore held */ | 
|---|
| 295 | void linkwatch_run_queue(void) | 
|---|
| 296 | { | 
|---|
| 297 | __linkwatch_run_queue(urgent_only: 0); | 
|---|
| 298 | } | 
|---|
| 299 |  | 
|---|
| 300 |  | 
|---|
| 301 | static void linkwatch_event(struct work_struct *dummy) | 
|---|
| 302 | { | 
|---|
| 303 | rtnl_lock(); | 
|---|
| 304 | __linkwatch_run_queue(time_after(linkwatch_nextevent, jiffies)); | 
|---|
| 305 | rtnl_unlock(); | 
|---|
| 306 | } | 
|---|
| 307 |  | 
|---|
| 308 |  | 
|---|
| 309 | void linkwatch_fire_event(struct net_device *dev) | 
|---|
| 310 | { | 
|---|
| 311 | bool urgent = linkwatch_urgent_event(dev); | 
|---|
| 312 |  | 
|---|
| 313 | if (!test_and_set_bit(nr: __LINK_STATE_LINKWATCH_PENDING, addr: &dev->state)) { | 
|---|
| 314 | linkwatch_add_event(dev); | 
|---|
| 315 | } else if (!urgent) | 
|---|
| 316 | return; | 
|---|
| 317 |  | 
|---|
| 318 | linkwatch_schedule_work(urgent); | 
|---|
| 319 | } | 
|---|
| 320 | EXPORT_SYMBOL(linkwatch_fire_event); | 
|---|
| 321 |  | 
|---|