| 1 | // SPDX-License-Identifier: GPL-2.0+ | 
|---|
| 2 | /* | 
|---|
| 3 | * PCIe bandwidth controller | 
|---|
| 4 | * | 
|---|
| 5 | * Author: Alexandru Gagniuc <mr.nuke.me@gmail.com> | 
|---|
| 6 | * | 
|---|
| 7 | * Copyright (C) 2019 Dell Inc | 
|---|
| 8 | * Copyright (C) 2023-2024 Intel Corporation | 
|---|
| 9 | * | 
|---|
| 10 | * The PCIe bandwidth controller provides a way to alter PCIe Link Speeds | 
|---|
| 11 | * and notify the operating system when the Link Width or Speed changes. The | 
|---|
| 12 | * notification capability is required for all Root Ports and Downstream | 
|---|
| 13 | * Ports supporting Link Width wider than x1 and/or multiple Link Speeds. | 
|---|
| 14 | * | 
|---|
| 15 | * This service port driver hooks into the Bandwidth Notification interrupt | 
|---|
| 16 | * watching for changes or links becoming degraded in operation. It updates | 
|---|
| 17 | * the cached Current Link Speed that is exposed to user space through sysfs. | 
|---|
| 18 | */ | 
|---|
| 19 |  | 
|---|
| 20 | #define dev_fmt(fmt) "bwctrl: " fmt | 
|---|
| 21 |  | 
|---|
| 22 | #include <linux/atomic.h> | 
|---|
| 23 | #include <linux/bitops.h> | 
|---|
| 24 | #include <linux/bits.h> | 
|---|
| 25 | #include <linux/cleanup.h> | 
|---|
| 26 | #include <linux/errno.h> | 
|---|
| 27 | #include <linux/interrupt.h> | 
|---|
| 28 | #include <linux/mutex.h> | 
|---|
| 29 | #include <linux/pci.h> | 
|---|
| 30 | #include <linux/pci-bwctrl.h> | 
|---|
| 31 | #include <linux/rwsem.h> | 
|---|
| 32 | #include <linux/slab.h> | 
|---|
| 33 | #include <linux/types.h> | 
|---|
| 34 |  | 
|---|
| 35 | #include "../pci.h" | 
|---|
| 36 | #include "portdrv.h" | 
|---|
| 37 |  | 
|---|
| 38 | /** | 
|---|
| 39 | * struct pcie_bwctrl_data - PCIe bandwidth controller | 
|---|
| 40 | * @set_speed_mutex:	Serializes link speed changes | 
|---|
| 41 | * @cdev:		Thermal cooling device associated with the port | 
|---|
| 42 | */ | 
|---|
| 43 | struct pcie_bwctrl_data { | 
|---|
| 44 | struct mutex set_speed_mutex; | 
|---|
| 45 | struct thermal_cooling_device *cdev; | 
|---|
| 46 | }; | 
|---|
| 47 |  | 
|---|
| 48 | /* Prevent port removal during Link Speed changes. */ | 
|---|
| 49 | static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem); | 
|---|
| 50 |  | 
|---|
| 51 | static bool pcie_valid_speed(enum pci_bus_speed speed) | 
|---|
| 52 | { | 
|---|
| 53 | return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT); | 
|---|
| 54 | } | 
|---|
| 55 |  | 
|---|
| 56 | static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed) | 
|---|
| 57 | { | 
|---|
| 58 | static const u8 speed_conv[] = { | 
|---|
| 59 | [PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT, | 
|---|
| 60 | [PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT, | 
|---|
| 61 | [PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT, | 
|---|
| 62 | [PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT, | 
|---|
| 63 | [PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT, | 
|---|
| 64 | [PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT, | 
|---|
| 65 | }; | 
|---|
| 66 |  | 
|---|
| 67 | if (WARN_ON_ONCE(!pcie_valid_speed(speed))) | 
|---|
| 68 | return 0; | 
|---|
| 69 |  | 
|---|
| 70 | return speed_conv[speed]; | 
|---|
| 71 | } | 
|---|
| 72 |  | 
|---|
| 73 | static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds) | 
|---|
| 74 | { | 
|---|
| 75 | return __fls(word: supported_speeds); | 
|---|
| 76 | } | 
|---|
| 77 |  | 
|---|
| 78 | /** | 
|---|
| 79 | * pcie_bwctrl_select_speed - Select Target Link Speed | 
|---|
| 80 | * @port:	PCIe Port | 
|---|
| 81 | * @speed_req:	Requested PCIe Link Speed | 
|---|
| 82 | * | 
|---|
| 83 | * Select Target Link Speed by take into account Supported Link Speeds of | 
|---|
| 84 | * both the Root Port and the Endpoint. | 
|---|
| 85 | * | 
|---|
| 86 | * Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.) | 
|---|
| 87 | */ | 
|---|
| 88 | static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req) | 
|---|
| 89 | { | 
|---|
| 90 | struct pci_bus *bus = port->subordinate; | 
|---|
| 91 | u8 desired_speeds, supported_speeds; | 
|---|
| 92 | struct pci_dev *dev; | 
|---|
| 93 |  | 
|---|
| 94 | desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req), | 
|---|
| 95 | __fls(PCI_EXP_LNKCAP2_SLS_2_5GB)); | 
|---|
| 96 |  | 
|---|
| 97 | supported_speeds = port->supported_speeds; | 
|---|
| 98 | if (bus) { | 
|---|
| 99 | down_read(sem: &pci_bus_sem); | 
|---|
| 100 | dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list); | 
|---|
| 101 | if (dev) | 
|---|
| 102 | supported_speeds &= dev->supported_speeds; | 
|---|
| 103 | up_read(sem: &pci_bus_sem); | 
|---|
| 104 | } | 
|---|
| 105 | if (!supported_speeds) | 
|---|
| 106 | supported_speeds = PCI_EXP_LNKCAP2_SLS_2_5GB; | 
|---|
| 107 |  | 
|---|
| 108 | return pcie_supported_speeds2target_speed(supported_speeds: supported_speeds & desired_speeds); | 
|---|
| 109 | } | 
|---|
| 110 |  | 
|---|
| 111 | static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt) | 
|---|
| 112 | { | 
|---|
| 113 | int ret; | 
|---|
| 114 |  | 
|---|
| 115 | ret = pcie_capability_clear_and_set_word(dev: port, PCI_EXP_LNKCTL2, | 
|---|
| 116 | PCI_EXP_LNKCTL2_TLS, set: target_speed); | 
|---|
| 117 | if (ret != PCIBIOS_SUCCESSFUL) | 
|---|
| 118 | return pcibios_err_to_errno(err: ret); | 
|---|
| 119 |  | 
|---|
| 120 | return pcie_retrain_link(pdev: port, use_lt); | 
|---|
| 121 | } | 
|---|
| 122 |  | 
|---|
| 123 | /** | 
|---|
| 124 | * pcie_set_target_speed - Set downstream Link Speed for PCIe Port | 
|---|
| 125 | * @port:	PCIe Port | 
|---|
| 126 | * @speed_req:	Requested PCIe Link Speed | 
|---|
| 127 | * @use_lt:	Wait for the LT or DLLLA bit to detect the end of link training | 
|---|
| 128 | * | 
|---|
| 129 | * Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be | 
|---|
| 130 | * adjusted downwards to the best speed supported by both the Port and PCIe | 
|---|
| 131 | * Device underneath it. | 
|---|
| 132 | * | 
|---|
| 133 | * Return: | 
|---|
| 134 | * * 0		- on success | 
|---|
| 135 | * * -EINVAL	- @speed_req is not a PCIe Link Speed | 
|---|
| 136 | * * -ENODEV	- @port is not controllable | 
|---|
| 137 | * * -ETIMEDOUT	- changing Link Speed took too long | 
|---|
| 138 | * * -EAGAIN	- Link Speed was changed but @speed_req was not achieved | 
|---|
| 139 | */ | 
|---|
| 140 | int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, | 
|---|
| 141 | bool use_lt) | 
|---|
| 142 | { | 
|---|
| 143 | struct pci_bus *bus = port->subordinate; | 
|---|
| 144 | u16 target_speed; | 
|---|
| 145 | int ret; | 
|---|
| 146 |  | 
|---|
| 147 | if (WARN_ON_ONCE(!pcie_valid_speed(speed_req))) | 
|---|
| 148 | return -EINVAL; | 
|---|
| 149 |  | 
|---|
| 150 | if (bus && bus->cur_bus_speed == speed_req) | 
|---|
| 151 | return 0; | 
|---|
| 152 |  | 
|---|
| 153 | target_speed = pcie_bwctrl_select_speed(port, speed_req); | 
|---|
| 154 |  | 
|---|
| 155 | scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) { | 
|---|
| 156 | struct pcie_bwctrl_data *data = port->link_bwctrl; | 
|---|
| 157 |  | 
|---|
| 158 | /* | 
|---|
| 159 | * port->link_bwctrl is NULL during initial scan when called | 
|---|
| 160 | * e.g. from the Target Speed quirk. | 
|---|
| 161 | */ | 
|---|
| 162 | if (data) | 
|---|
| 163 | mutex_lock(lock: &data->set_speed_mutex); | 
|---|
| 164 |  | 
|---|
| 165 | ret = pcie_bwctrl_change_speed(port, target_speed, use_lt); | 
|---|
| 166 |  | 
|---|
| 167 | if (data) | 
|---|
| 168 | mutex_unlock(lock: &data->set_speed_mutex); | 
|---|
| 169 | } | 
|---|
| 170 |  | 
|---|
| 171 | /* | 
|---|
| 172 | * Despite setting higher speed into the Target Link Speed, empty | 
|---|
| 173 | * bus won't train to 5GT+ speeds. | 
|---|
| 174 | */ | 
|---|
| 175 | if (!ret && bus && bus->cur_bus_speed != speed_req && | 
|---|
| 176 | !list_empty(head: &bus->devices)) | 
|---|
| 177 | ret = -EAGAIN; | 
|---|
| 178 |  | 
|---|
| 179 | return ret; | 
|---|
| 180 | } | 
|---|
| 181 |  | 
|---|
| 182 | static void pcie_bwnotif_enable(struct pcie_device *srv) | 
|---|
| 183 | { | 
|---|
| 184 | struct pci_dev *port = srv->port; | 
|---|
| 185 | u16 link_status; | 
|---|
| 186 | int ret; | 
|---|
| 187 |  | 
|---|
| 188 | /* Note if LBMS has been seen so far */ | 
|---|
| 189 | ret = pcie_capability_read_word(dev: port, PCI_EXP_LNKSTA, val: &link_status); | 
|---|
| 190 | if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS) | 
|---|
| 191 | set_bit(PCI_LINK_LBMS_SEEN, addr: &port->priv_flags); | 
|---|
| 192 |  | 
|---|
| 193 | pcie_capability_set_word(dev: port, PCI_EXP_LNKCTL, | 
|---|
| 194 | PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); | 
|---|
| 195 | pcie_capability_write_word(dev: port, PCI_EXP_LNKSTA, | 
|---|
| 196 | PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); | 
|---|
| 197 |  | 
|---|
| 198 | /* | 
|---|
| 199 | * Update after enabling notifications & clearing status bits ensures | 
|---|
| 200 | * link speed is up to date. | 
|---|
| 201 | */ | 
|---|
| 202 | pcie_update_link_speed(bus: port->subordinate); | 
|---|
| 203 | } | 
|---|
| 204 |  | 
|---|
| 205 | static void pcie_bwnotif_disable(struct pci_dev *port) | 
|---|
| 206 | { | 
|---|
| 207 | pcie_capability_clear_word(dev: port, PCI_EXP_LNKCTL, | 
|---|
| 208 | PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); | 
|---|
| 209 | } | 
|---|
| 210 |  | 
|---|
| 211 | static irqreturn_t pcie_bwnotif_irq(int irq, void *context) | 
|---|
| 212 | { | 
|---|
| 213 | struct pcie_device *srv = context; | 
|---|
| 214 | struct pci_dev *port = srv->port; | 
|---|
| 215 | u16 link_status, events; | 
|---|
| 216 | int ret; | 
|---|
| 217 |  | 
|---|
| 218 | ret = pcie_capability_read_word(dev: port, PCI_EXP_LNKSTA, val: &link_status); | 
|---|
| 219 | if (ret != PCIBIOS_SUCCESSFUL) | 
|---|
| 220 | return IRQ_NONE; | 
|---|
| 221 |  | 
|---|
| 222 | events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); | 
|---|
| 223 | if (!events) | 
|---|
| 224 | return IRQ_NONE; | 
|---|
| 225 |  | 
|---|
| 226 | if (events & PCI_EXP_LNKSTA_LBMS) | 
|---|
| 227 | set_bit(PCI_LINK_LBMS_SEEN, addr: &port->priv_flags); | 
|---|
| 228 |  | 
|---|
| 229 | pcie_capability_write_word(dev: port, PCI_EXP_LNKSTA, val: events); | 
|---|
| 230 |  | 
|---|
| 231 | /* | 
|---|
| 232 | * Interrupts will not be triggered from any further Link Speed | 
|---|
| 233 | * change until LBMS is cleared by the write. Therefore, re-read the | 
|---|
| 234 | * speed (inside pcie_update_link_speed()) after LBMS has been | 
|---|
| 235 | * cleared to avoid missing link speed changes. | 
|---|
| 236 | */ | 
|---|
| 237 | pcie_update_link_speed(bus: port->subordinate); | 
|---|
| 238 |  | 
|---|
| 239 | return IRQ_HANDLED; | 
|---|
| 240 | } | 
|---|
| 241 |  | 
|---|
| 242 | void pcie_reset_lbms(struct pci_dev *port) | 
|---|
| 243 | { | 
|---|
| 244 | clear_bit(PCI_LINK_LBMS_SEEN, addr: &port->priv_flags); | 
|---|
| 245 | pcie_capability_write_word(dev: port, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS); | 
|---|
| 246 | } | 
|---|
| 247 |  | 
|---|
| 248 | static int pcie_bwnotif_probe(struct pcie_device *srv) | 
|---|
| 249 | { | 
|---|
| 250 | struct pci_dev *port = srv->port; | 
|---|
| 251 | int ret; | 
|---|
| 252 |  | 
|---|
| 253 | /* Can happen if we run out of bus numbers during enumeration. */ | 
|---|
| 254 | if (!port->subordinate) | 
|---|
| 255 | return -ENODEV; | 
|---|
| 256 |  | 
|---|
| 257 | struct pcie_bwctrl_data *data = devm_kzalloc(dev: &srv->device, | 
|---|
| 258 | size: sizeof(*data), GFP_KERNEL); | 
|---|
| 259 | if (!data) | 
|---|
| 260 | return -ENOMEM; | 
|---|
| 261 |  | 
|---|
| 262 | ret = devm_mutex_init(&srv->device, &data->set_speed_mutex); | 
|---|
| 263 | if (ret) | 
|---|
| 264 | return ret; | 
|---|
| 265 |  | 
|---|
| 266 | scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { | 
|---|
| 267 | port->link_bwctrl = data; | 
|---|
| 268 |  | 
|---|
| 269 | ret = request_irq(irq: srv->irq, handler: pcie_bwnotif_irq, | 
|---|
| 270 | IRQF_SHARED, name: "PCIe bwctrl", dev: srv); | 
|---|
| 271 | if (ret) { | 
|---|
| 272 | port->link_bwctrl = NULL; | 
|---|
| 273 | return ret; | 
|---|
| 274 | } | 
|---|
| 275 |  | 
|---|
| 276 | pcie_bwnotif_enable(srv); | 
|---|
| 277 | } | 
|---|
| 278 |  | 
|---|
| 279 | pci_dbg(port, "enabled with IRQ %d\n", srv->irq); | 
|---|
| 280 |  | 
|---|
| 281 | /* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */ | 
|---|
| 282 | port->link_bwctrl->cdev = pcie_cooling_device_register(port); | 
|---|
| 283 | if (IS_ERR(ptr: port->link_bwctrl->cdev)) | 
|---|
| 284 | port->link_bwctrl->cdev = NULL; | 
|---|
| 285 |  | 
|---|
| 286 | return 0; | 
|---|
| 287 | } | 
|---|
| 288 |  | 
|---|
| 289 | static void pcie_bwnotif_remove(struct pcie_device *srv) | 
|---|
| 290 | { | 
|---|
| 291 | struct pcie_bwctrl_data *data = srv->port->link_bwctrl; | 
|---|
| 292 |  | 
|---|
| 293 | pcie_cooling_device_unregister(cdev: data->cdev); | 
|---|
| 294 |  | 
|---|
| 295 | scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { | 
|---|
| 296 | pcie_bwnotif_disable(port: srv->port); | 
|---|
| 297 |  | 
|---|
| 298 | free_irq(srv->irq, srv); | 
|---|
| 299 |  | 
|---|
| 300 | srv->port->link_bwctrl = NULL; | 
|---|
| 301 | } | 
|---|
| 302 | } | 
|---|
| 303 |  | 
|---|
| 304 | static int pcie_bwnotif_suspend(struct pcie_device *srv) | 
|---|
| 305 | { | 
|---|
| 306 | pcie_bwnotif_disable(port: srv->port); | 
|---|
| 307 | return 0; | 
|---|
| 308 | } | 
|---|
| 309 |  | 
|---|
| 310 | static int pcie_bwnotif_resume(struct pcie_device *srv) | 
|---|
| 311 | { | 
|---|
| 312 | pcie_bwnotif_enable(srv); | 
|---|
| 313 | return 0; | 
|---|
| 314 | } | 
|---|
| 315 |  | 
|---|
| 316 | static struct pcie_port_service_driver pcie_bwctrl_driver = { | 
|---|
| 317 | .name		= "pcie_bwctrl", | 
|---|
| 318 | .port_type	= PCIE_ANY_PORT, | 
|---|
| 319 | .service	= PCIE_PORT_SERVICE_BWCTRL, | 
|---|
| 320 | .probe		= pcie_bwnotif_probe, | 
|---|
| 321 | .suspend	= pcie_bwnotif_suspend, | 
|---|
| 322 | .resume		= pcie_bwnotif_resume, | 
|---|
| 323 | .remove		= pcie_bwnotif_remove, | 
|---|
| 324 | }; | 
|---|
| 325 |  | 
|---|
| 326 | int __init pcie_bwctrl_init(void) | 
|---|
| 327 | { | 
|---|
| 328 | return pcie_port_service_register(new: &pcie_bwctrl_driver); | 
|---|
| 329 | } | 
|---|
| 330 |  | 
|---|