| 1 | // SPDX-License-Identifier: GPL-2.0-only | 
|---|
| 2 | /* | 
|---|
| 3 | * Support of MSI, HPET and DMAR interrupts. | 
|---|
| 4 | * | 
|---|
| 5 | * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo | 
|---|
| 6 | *	Moved from arch/x86/kernel/apic/io_apic.c. | 
|---|
| 7 | * Jiang Liu <jiang.liu@linux.intel.com> | 
|---|
| 8 | *	Convert to hierarchical irqdomain | 
|---|
| 9 | */ | 
|---|
| 10 | #include <linux/mm.h> | 
|---|
| 11 | #include <linux/interrupt.h> | 
|---|
| 12 | #include <linux/irq.h> | 
|---|
| 13 | #include <linux/pci.h> | 
|---|
| 14 | #include <linux/dmar.h> | 
|---|
| 15 | #include <linux/hpet.h> | 
|---|
| 16 | #include <linux/msi.h> | 
|---|
| 17 | #include <asm/irqdomain.h> | 
|---|
| 18 | #include <asm/hpet.h> | 
|---|
| 19 | #include <asm/hw_irq.h> | 
|---|
| 20 | #include <asm/apic.h> | 
|---|
| 21 | #include <asm/irq_remapping.h> | 
|---|
| 22 | #include <asm/xen/hypervisor.h> | 
|---|
| 23 |  | 
|---|
| 24 | struct irq_domain *x86_pci_msi_default_domain __ro_after_init; | 
|---|
| 25 |  | 
|---|
| 26 | static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg) | 
|---|
| 27 | { | 
|---|
| 28 | struct msi_msg msg[2] = { [1] = { }, }; | 
|---|
| 29 |  | 
|---|
| 30 | __irq_msi_compose_msg(cfg, msg, dmar: false); | 
|---|
| 31 | irq_data_get_irq_chip(d: irqd)->irq_write_msi_msg(irqd, msg); | 
|---|
| 32 | } | 
|---|
| 33 |  | 
|---|
| 34 | static int | 
|---|
| 35 | msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) | 
|---|
| 36 | { | 
|---|
| 37 | struct irq_cfg old_cfg, *cfg = irqd_cfg(irq_data: irqd); | 
|---|
| 38 | struct irq_data *parent = irqd->parent_data; | 
|---|
| 39 | unsigned int cpu; | 
|---|
| 40 | int ret; | 
|---|
| 41 |  | 
|---|
| 42 | /* Save the current configuration */ | 
|---|
| 43 | cpu = cpumask_first(srcp: irq_data_get_effective_affinity_mask(d: irqd)); | 
|---|
| 44 | old_cfg = *cfg; | 
|---|
| 45 |  | 
|---|
| 46 | /* Allocate a new target vector */ | 
|---|
| 47 | ret = parent->chip->irq_set_affinity(parent, mask, force); | 
|---|
| 48 | if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) | 
|---|
| 49 | return ret; | 
|---|
| 50 |  | 
|---|
| 51 | /* | 
|---|
| 52 | * For non-maskable and non-remapped MSI interrupts the migration | 
|---|
| 53 | * to a different destination CPU and a different vector has to be | 
|---|
| 54 | * done careful to handle the possible stray interrupt which can be | 
|---|
| 55 | * caused by the non-atomic update of the address/data pair. | 
|---|
| 56 | * | 
|---|
| 57 | * Direct update is possible when: | 
|---|
| 58 | * - The MSI is maskable (remapped MSI does not use this code path). | 
|---|
| 59 | *   The reservation mode bit is set in this case. | 
|---|
| 60 | * - The new vector is the same as the old vector | 
|---|
| 61 | * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) | 
|---|
| 62 | * - The interrupt is not yet started up | 
|---|
| 63 | * - The new destination CPU is the same as the old destination CPU | 
|---|
| 64 | */ | 
|---|
| 65 | if (!irqd_can_reserve(d: irqd) || | 
|---|
| 66 | cfg->vector == old_cfg.vector || | 
|---|
| 67 | old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || | 
|---|
| 68 | !irqd_is_started(d: irqd) || | 
|---|
| 69 | cfg->dest_apicid == old_cfg.dest_apicid) { | 
|---|
| 70 | irq_msi_update_msg(irqd, cfg); | 
|---|
| 71 | return ret; | 
|---|
| 72 | } | 
|---|
| 73 |  | 
|---|
| 74 | /* | 
|---|
| 75 | * Paranoia: Validate that the interrupt target is the local | 
|---|
| 76 | * CPU. | 
|---|
| 77 | */ | 
|---|
| 78 | if (WARN_ON_ONCE(cpu != smp_processor_id())) { | 
|---|
| 79 | irq_msi_update_msg(irqd, cfg); | 
|---|
| 80 | return ret; | 
|---|
| 81 | } | 
|---|
| 82 |  | 
|---|
| 83 | /* | 
|---|
| 84 | * Redirect the interrupt to the new vector on the current CPU | 
|---|
| 85 | * first. This might cause a spurious interrupt on this vector if | 
|---|
| 86 | * the device raises an interrupt right between this update and the | 
|---|
| 87 | * update to the final destination CPU. | 
|---|
| 88 | * | 
|---|
| 89 | * If the vector is in use then the installed device handler will | 
|---|
| 90 | * denote it as spurious which is no harm as this is a rare event | 
|---|
| 91 | * and interrupt handlers have to cope with spurious interrupts | 
|---|
| 92 | * anyway. If the vector is unused, then it is marked so it won't | 
|---|
| 93 | * trigger the 'No irq handler for vector' warning in | 
|---|
| 94 | * common_interrupt(). | 
|---|
| 95 | * | 
|---|
| 96 | * This requires to hold vector lock to prevent concurrent updates to | 
|---|
| 97 | * the affected vector. | 
|---|
| 98 | */ | 
|---|
| 99 | lock_vector_lock(); | 
|---|
| 100 |  | 
|---|
| 101 | /* | 
|---|
| 102 | * Mark the new target vector on the local CPU if it is currently | 
|---|
| 103 | * unused. Reuse the VECTOR_RETRIGGERED state which is also used in | 
|---|
| 104 | * the CPU hotplug path for a similar purpose. This cannot be | 
|---|
| 105 | * undone here as the current CPU has interrupts disabled and | 
|---|
| 106 | * cannot handle the interrupt before the whole set_affinity() | 
|---|
| 107 | * section is done. In the CPU unplug case, the current CPU is | 
|---|
| 108 | * about to vanish and will not handle any interrupts anymore. The | 
|---|
| 109 | * vector is cleaned up when the CPU comes online again. | 
|---|
| 110 | */ | 
|---|
| 111 | if (IS_ERR_OR_NULL(this_cpu_read(vector_irq[cfg->vector]))) | 
|---|
| 112 | this_cpu_write(vector_irq[cfg->vector], VECTOR_RETRIGGERED); | 
|---|
| 113 |  | 
|---|
| 114 | /* Redirect it to the new vector on the local CPU temporarily */ | 
|---|
| 115 | old_cfg.vector = cfg->vector; | 
|---|
| 116 | irq_msi_update_msg(irqd, cfg: &old_cfg); | 
|---|
| 117 |  | 
|---|
| 118 | /* Now transition it to the target CPU */ | 
|---|
| 119 | irq_msi_update_msg(irqd, cfg); | 
|---|
| 120 |  | 
|---|
| 121 | /* | 
|---|
| 122 | * All interrupts after this point are now targeted at the new | 
|---|
| 123 | * vector/CPU. | 
|---|
| 124 | * | 
|---|
| 125 | * Drop vector lock before testing whether the temporary assignment | 
|---|
| 126 | * to the local CPU was hit by an interrupt raised in the device, | 
|---|
| 127 | * because the retrigger function acquires vector lock again. | 
|---|
| 128 | */ | 
|---|
| 129 | unlock_vector_lock(); | 
|---|
| 130 |  | 
|---|
| 131 | /* | 
|---|
| 132 | * Check whether the transition raced with a device interrupt and | 
|---|
| 133 | * is pending in the local APICs IRR. It is safe to do this outside | 
|---|
| 134 | * of vector lock as the irq_desc::lock of this interrupt is still | 
|---|
| 135 | * held and interrupts are disabled: The check is not accessing the | 
|---|
| 136 | * underlying vector store. It's just checking the local APIC's | 
|---|
| 137 | * IRR. | 
|---|
| 138 | */ | 
|---|
| 139 | if (lapic_vector_set_in_irr(vector: cfg->vector)) | 
|---|
| 140 | irq_data_get_irq_chip(d: irqd)->irq_retrigger(irqd); | 
|---|
| 141 |  | 
|---|
| 142 | return ret; | 
|---|
| 143 | } | 
|---|
| 144 |  | 
|---|
| 145 | /** | 
|---|
| 146 | * pci_dev_has_default_msi_parent_domain - Check whether the device has the default | 
|---|
| 147 | *					   MSI parent domain associated | 
|---|
| 148 | * @dev:	Pointer to the PCI device | 
|---|
| 149 | */ | 
|---|
| 150 | bool pci_dev_has_default_msi_parent_domain(struct pci_dev *dev) | 
|---|
| 151 | { | 
|---|
| 152 | struct irq_domain *domain = dev_get_msi_domain(dev: &dev->dev); | 
|---|
| 153 |  | 
|---|
| 154 | if (!domain) | 
|---|
| 155 | domain = dev_get_msi_domain(dev: &dev->bus->dev); | 
|---|
| 156 | if (!domain) | 
|---|
| 157 | return false; | 
|---|
| 158 |  | 
|---|
| 159 | return domain == x86_vector_domain; | 
|---|
| 160 | } | 
|---|
| 161 |  | 
|---|
| 162 | /** | 
|---|
| 163 | * x86_msi_prepare - Setup of msi_alloc_info_t for allocations | 
|---|
| 164 | * @domain:	The domain for which this setup happens | 
|---|
| 165 | * @dev:	The device for which interrupts are allocated | 
|---|
| 166 | * @nvec:	The number of vectors to allocate | 
|---|
| 167 | * @alloc:	The allocation info structure to initialize | 
|---|
| 168 | * | 
|---|
| 169 | * This function is to be used for all types of MSI domains above the x86 | 
|---|
| 170 | * vector domain and any intermediates. It is always invoked from the | 
|---|
| 171 | * top level interrupt domain. The domain specific allocation | 
|---|
| 172 | * functionality is determined via the @domain's bus token which allows to | 
|---|
| 173 | * map the X86 specific allocation type. | 
|---|
| 174 | */ | 
|---|
| 175 | static int x86_msi_prepare(struct irq_domain *domain, struct device *dev, | 
|---|
| 176 | int nvec, msi_alloc_info_t *alloc) | 
|---|
| 177 | { | 
|---|
| 178 | struct msi_domain_info *info = domain->host_data; | 
|---|
| 179 |  | 
|---|
| 180 | init_irq_alloc_info(info: alloc, NULL); | 
|---|
| 181 |  | 
|---|
| 182 | switch (info->bus_token) { | 
|---|
| 183 | case DOMAIN_BUS_PCI_DEVICE_MSI: | 
|---|
| 184 | alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSI; | 
|---|
| 185 | return 0; | 
|---|
| 186 | case DOMAIN_BUS_PCI_DEVICE_MSIX: | 
|---|
| 187 | alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX; | 
|---|
| 188 | return 0; | 
|---|
| 189 | default: | 
|---|
| 190 | return -EINVAL; | 
|---|
| 191 | } | 
|---|
| 192 | } | 
|---|
| 193 |  | 
|---|
| 194 | /** | 
|---|
| 195 | * x86_init_dev_msi_info - Domain info setup for MSI domains | 
|---|
| 196 | * @dev:		The device for which the domain should be created | 
|---|
| 197 | * @domain:		The (root) domain providing this callback | 
|---|
| 198 | * @real_parent:	The real parent domain of the to initialize domain | 
|---|
| 199 | * @info:		The domain info for the to initialize domain | 
|---|
| 200 | * | 
|---|
| 201 | * This function is to be used for all types of MSI domains above the x86 | 
|---|
| 202 | * vector domain and any intermediates. The domain specific functionality | 
|---|
| 203 | * is determined via the @real_parent. | 
|---|
| 204 | */ | 
|---|
| 205 | static bool x86_init_dev_msi_info(struct device *dev, struct irq_domain *domain, | 
|---|
| 206 | struct irq_domain *real_parent, struct msi_domain_info *info) | 
|---|
| 207 | { | 
|---|
| 208 | const struct msi_parent_ops *pops = real_parent->msi_parent_ops; | 
|---|
| 209 |  | 
|---|
| 210 | /* MSI parent domain specific settings */ | 
|---|
| 211 | switch (real_parent->bus_token) { | 
|---|
| 212 | case DOMAIN_BUS_ANY: | 
|---|
| 213 | /* Only the vector domain can have the ANY token */ | 
|---|
| 214 | if (WARN_ON_ONCE(domain != real_parent)) | 
|---|
| 215 | return false; | 
|---|
| 216 | info->chip->irq_set_affinity = msi_set_affinity; | 
|---|
| 217 | info->chip->flags |= IRQCHIP_MOVE_DEFERRED; | 
|---|
| 218 | break; | 
|---|
| 219 | case DOMAIN_BUS_DMAR: | 
|---|
| 220 | case DOMAIN_BUS_AMDVI: | 
|---|
| 221 | break; | 
|---|
| 222 | default: | 
|---|
| 223 | WARN_ON_ONCE(1); | 
|---|
| 224 | return false; | 
|---|
| 225 | } | 
|---|
| 226 |  | 
|---|
| 227 | /* Is the target supported? */ | 
|---|
| 228 | switch(info->bus_token) { | 
|---|
| 229 | case DOMAIN_BUS_PCI_DEVICE_MSI: | 
|---|
| 230 | case DOMAIN_BUS_PCI_DEVICE_MSIX: | 
|---|
| 231 | break; | 
|---|
| 232 | default: | 
|---|
| 233 | WARN_ON_ONCE(1); | 
|---|
| 234 | return false; | 
|---|
| 235 | } | 
|---|
| 236 |  | 
|---|
| 237 | /* | 
|---|
| 238 | * Mask out the domain specific MSI feature flags which are not | 
|---|
| 239 | * supported by the real parent. | 
|---|
| 240 | */ | 
|---|
| 241 | info->flags			&= pops->supported_flags; | 
|---|
| 242 | /* Enforce the required flags */ | 
|---|
| 243 | info->flags			|= X86_VECTOR_MSI_FLAGS_REQUIRED; | 
|---|
| 244 |  | 
|---|
| 245 | /* This is always invoked from the top level MSI domain! */ | 
|---|
| 246 | info->ops->msi_prepare		= x86_msi_prepare; | 
|---|
| 247 |  | 
|---|
| 248 | info->chip->irq_ack		= irq_chip_ack_parent; | 
|---|
| 249 | info->chip->irq_retrigger	= irq_chip_retrigger_hierarchy; | 
|---|
| 250 | info->chip->flags		|= IRQCHIP_SKIP_SET_WAKE | | 
|---|
| 251 | IRQCHIP_AFFINITY_PRE_STARTUP; | 
|---|
| 252 |  | 
|---|
| 253 | info->handler			= handle_edge_irq; | 
|---|
| 254 | info->handler_name		= "edge"; | 
|---|
| 255 |  | 
|---|
| 256 | return true; | 
|---|
| 257 | } | 
|---|
| 258 |  | 
|---|
| 259 | static const struct msi_parent_ops x86_vector_msi_parent_ops = { | 
|---|
| 260 | .supported_flags	= X86_VECTOR_MSI_FLAGS_SUPPORTED, | 
|---|
| 261 | .init_dev_msi_info	= x86_init_dev_msi_info, | 
|---|
| 262 | }; | 
|---|
| 263 |  | 
|---|
| 264 | struct irq_domain * __init native_create_pci_msi_domain(void) | 
|---|
| 265 | { | 
|---|
| 266 | if (apic_is_disabled) | 
|---|
| 267 | return NULL; | 
|---|
| 268 |  | 
|---|
| 269 | x86_vector_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; | 
|---|
| 270 | x86_vector_domain->msi_parent_ops = &x86_vector_msi_parent_ops; | 
|---|
| 271 | return x86_vector_domain; | 
|---|
| 272 | } | 
|---|
| 273 |  | 
|---|
| 274 | void __init x86_create_pci_msi_domain(void) | 
|---|
| 275 | { | 
|---|
| 276 | x86_pci_msi_default_domain = x86_init.irqs.create_pci_msi_domain(); | 
|---|
| 277 | } | 
|---|
| 278 |  | 
|---|
| 279 | /* Keep around for hyperV */ | 
|---|
| 280 | int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, | 
|---|
| 281 | msi_alloc_info_t *arg) | 
|---|
| 282 | { | 
|---|
| 283 | init_irq_alloc_info(info: arg, NULL); | 
|---|
| 284 |  | 
|---|
| 285 | if (to_pci_dev(dev)->msix_enabled) | 
|---|
| 286 | arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX; | 
|---|
| 287 | else | 
|---|
| 288 | arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI; | 
|---|
| 289 | return 0; | 
|---|
| 290 | } | 
|---|
| 291 | EXPORT_SYMBOL_GPL(pci_msi_prepare); | 
|---|
| 292 |  | 
|---|
| 293 | #ifdef CONFIG_DMAR_TABLE | 
|---|
| 294 | /* | 
|---|
| 295 | * The Intel IOMMU (ab)uses the high bits of the MSI address to contain the | 
|---|
| 296 | * high bits of the destination APIC ID. This can't be done in the general | 
|---|
| 297 | * case for MSIs as it would be targeting real memory above 4GiB not the | 
|---|
| 298 | * APIC. | 
|---|
| 299 | */ | 
|---|
| 300 | static void dmar_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) | 
|---|
| 301 | { | 
|---|
| 302 | __irq_msi_compose_msg(cfg: irqd_cfg(irq_data: data), msg, dmar: true); | 
|---|
| 303 | } | 
|---|
| 304 |  | 
|---|
| 305 | static void dmar_msi_write_msg(struct irq_data *data, struct msi_msg *msg) | 
|---|
| 306 | { | 
|---|
| 307 | dmar_msi_write(irq: data->irq, msg); | 
|---|
| 308 | } | 
|---|
| 309 |  | 
|---|
| 310 | static struct irq_chip dmar_msi_controller = { | 
|---|
| 311 | .name			= "DMAR-MSI", | 
|---|
| 312 | .irq_unmask		= dmar_msi_unmask, | 
|---|
| 313 | .irq_mask		= dmar_msi_mask, | 
|---|
| 314 | .irq_ack		= irq_chip_ack_parent, | 
|---|
| 315 | .irq_set_affinity	= msi_domain_set_affinity, | 
|---|
| 316 | .irq_retrigger		= irq_chip_retrigger_hierarchy, | 
|---|
| 317 | .irq_compose_msi_msg	= dmar_msi_compose_msg, | 
|---|
| 318 | .irq_write_msi_msg	= dmar_msi_write_msg, | 
|---|
| 319 | .flags			= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED | | 
|---|
| 320 | IRQCHIP_AFFINITY_PRE_STARTUP, | 
|---|
| 321 | }; | 
|---|
| 322 |  | 
|---|
| 323 | static int dmar_msi_init(struct irq_domain *domain, | 
|---|
| 324 | struct msi_domain_info *info, unsigned int virq, | 
|---|
| 325 | irq_hw_number_t hwirq, msi_alloc_info_t *arg) | 
|---|
| 326 | { | 
|---|
| 327 | irq_domain_set_info(domain, virq, hwirq: arg->devid, chip: info->chip, NULL, | 
|---|
| 328 | handler: handle_edge_irq, handler_data: arg->data, handler_name: "edge"); | 
|---|
| 329 |  | 
|---|
| 330 | return 0; | 
|---|
| 331 | } | 
|---|
| 332 |  | 
|---|
| 333 | static struct msi_domain_ops dmar_msi_domain_ops = { | 
|---|
| 334 | .msi_init	= dmar_msi_init, | 
|---|
| 335 | }; | 
|---|
| 336 |  | 
|---|
| 337 | static struct msi_domain_info dmar_msi_domain_info = { | 
|---|
| 338 | .ops		= &dmar_msi_domain_ops, | 
|---|
| 339 | .chip		= &dmar_msi_controller, | 
|---|
| 340 | .flags		= MSI_FLAG_USE_DEF_DOM_OPS, | 
|---|
| 341 | }; | 
|---|
| 342 |  | 
|---|
| 343 | static struct irq_domain *dmar_get_irq_domain(void) | 
|---|
| 344 | { | 
|---|
| 345 | static struct irq_domain *dmar_domain; | 
|---|
| 346 | static DEFINE_MUTEX(dmar_lock); | 
|---|
| 347 | struct fwnode_handle *fn; | 
|---|
| 348 |  | 
|---|
| 349 | mutex_lock(lock: &dmar_lock); | 
|---|
| 350 | if (dmar_domain) | 
|---|
| 351 | goto out; | 
|---|
| 352 |  | 
|---|
| 353 | fn = irq_domain_alloc_named_fwnode(name: "DMAR-MSI"); | 
|---|
| 354 | if (fn) { | 
|---|
| 355 | dmar_domain = msi_create_irq_domain(fwnode: fn, info: &dmar_msi_domain_info, | 
|---|
| 356 | parent: x86_vector_domain); | 
|---|
| 357 | if (!dmar_domain) | 
|---|
| 358 | irq_domain_free_fwnode(fwnode: fn); | 
|---|
| 359 | } | 
|---|
| 360 | out: | 
|---|
| 361 | mutex_unlock(lock: &dmar_lock); | 
|---|
| 362 | return dmar_domain; | 
|---|
| 363 | } | 
|---|
| 364 |  | 
|---|
| 365 | int dmar_alloc_hwirq(int id, int node, void *arg) | 
|---|
| 366 | { | 
|---|
| 367 | struct irq_domain *domain = dmar_get_irq_domain(); | 
|---|
| 368 | struct irq_alloc_info info; | 
|---|
| 369 |  | 
|---|
| 370 | if (!domain) | 
|---|
| 371 | return -1; | 
|---|
| 372 |  | 
|---|
| 373 | init_irq_alloc_info(info: &info, NULL); | 
|---|
| 374 | info.type = X86_IRQ_ALLOC_TYPE_DMAR; | 
|---|
| 375 | info.devid = id; | 
|---|
| 376 | info.hwirq = id; | 
|---|
| 377 | info.data = arg; | 
|---|
| 378 |  | 
|---|
| 379 | return irq_domain_alloc_irqs(domain, nr_irqs: 1, node, arg: &info); | 
|---|
| 380 | } | 
|---|
| 381 |  | 
|---|
| 382 | void dmar_free_hwirq(int irq) | 
|---|
| 383 | { | 
|---|
| 384 | irq_domain_free_irqs(virq: irq, nr_irqs: 1); | 
|---|
| 385 | } | 
|---|
| 386 | #endif | 
|---|
| 387 |  | 
|---|
| 388 | bool arch_restore_msi_irqs(struct pci_dev *dev) | 
|---|
| 389 | { | 
|---|
| 390 | return xen_initdom_restore_msi(dev); | 
|---|
| 391 | } | 
|---|
| 392 |  | 
|---|