| 1 | // SPDX-License-Identifier: GPL-2.0-or-later | 
|---|
| 2 | /* | 
|---|
| 3 | *  acpi_numa.c - ACPI NUMA support | 
|---|
| 4 | * | 
|---|
| 5 | *  Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com> | 
|---|
| 6 | */ | 
|---|
| 7 |  | 
|---|
| 8 | #define pr_fmt(fmt) "ACPI: " fmt | 
|---|
| 9 |  | 
|---|
| 10 | #include <linux/module.h> | 
|---|
| 11 | #include <linux/init.h> | 
|---|
| 12 | #include <linux/kernel.h> | 
|---|
| 13 | #include <linux/types.h> | 
|---|
| 14 | #include <linux/errno.h> | 
|---|
| 15 | #include <linux/acpi.h> | 
|---|
| 16 | #include <linux/memblock.h> | 
|---|
| 17 | #include <linux/memory.h> | 
|---|
| 18 | #include <linux/numa.h> | 
|---|
| 19 | #include <linux/nodemask.h> | 
|---|
| 20 | #include <linux/topology.h> | 
|---|
| 21 | #include <linux/numa_memblks.h> | 
|---|
| 22 | #include <linux/string_choices.h> | 
|---|
| 23 |  | 
|---|
| 24 | static nodemask_t nodes_found_map = NODE_MASK_NONE; | 
|---|
| 25 |  | 
|---|
| 26 | /* maps to convert between proximity domain and logical node ID */ | 
|---|
| 27 | static int pxm_to_node_map[MAX_PXM_DOMAINS] | 
|---|
| 28 | = { [0 ... MAX_PXM_DOMAINS - 1] = NUMA_NO_NODE }; | 
|---|
| 29 | static int node_to_pxm_map[MAX_NUMNODES] | 
|---|
| 30 | = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL }; | 
|---|
| 31 |  | 
|---|
| 32 | unsigned char acpi_srat_revision __initdata; | 
|---|
| 33 | static int acpi_numa __initdata; | 
|---|
| 34 |  | 
|---|
| 35 | static int last_real_pxm; | 
|---|
| 36 |  | 
|---|
| 37 | void __init disable_srat(void) | 
|---|
| 38 | { | 
|---|
| 39 | acpi_numa = -1; | 
|---|
| 40 | } | 
|---|
| 41 |  | 
|---|
| 42 | int pxm_to_node(int pxm) | 
|---|
| 43 | { | 
|---|
| 44 | if (pxm < 0 || pxm >= MAX_PXM_DOMAINS || numa_off) | 
|---|
| 45 | return NUMA_NO_NODE; | 
|---|
| 46 | return pxm_to_node_map[pxm]; | 
|---|
| 47 | } | 
|---|
| 48 | EXPORT_SYMBOL(pxm_to_node); | 
|---|
| 49 |  | 
|---|
| 50 | int node_to_pxm(int node) | 
|---|
| 51 | { | 
|---|
| 52 | if (node < 0) | 
|---|
| 53 | return PXM_INVAL; | 
|---|
| 54 | return node_to_pxm_map[node]; | 
|---|
| 55 | } | 
|---|
| 56 | EXPORT_SYMBOL_GPL(node_to_pxm); | 
|---|
| 57 |  | 
|---|
| 58 | static void __acpi_map_pxm_to_node(int pxm, int node) | 
|---|
| 59 | { | 
|---|
| 60 | if (pxm_to_node_map[pxm] == NUMA_NO_NODE || node < pxm_to_node_map[pxm]) | 
|---|
| 61 | pxm_to_node_map[pxm] = node; | 
|---|
| 62 | if (node_to_pxm_map[node] == PXM_INVAL || pxm < node_to_pxm_map[node]) | 
|---|
| 63 | node_to_pxm_map[node] = pxm; | 
|---|
| 64 | } | 
|---|
| 65 |  | 
|---|
| 66 | int acpi_map_pxm_to_node(int pxm) | 
|---|
| 67 | { | 
|---|
| 68 | int node; | 
|---|
| 69 |  | 
|---|
| 70 | if (pxm < 0 || pxm >= MAX_PXM_DOMAINS || numa_off) | 
|---|
| 71 | return NUMA_NO_NODE; | 
|---|
| 72 |  | 
|---|
| 73 | node = pxm_to_node_map[pxm]; | 
|---|
| 74 |  | 
|---|
| 75 | if (node == NUMA_NO_NODE) { | 
|---|
| 76 | node = first_unset_node(nodes_found_map); | 
|---|
| 77 | if (node >= MAX_NUMNODES) | 
|---|
| 78 | return NUMA_NO_NODE; | 
|---|
| 79 | __acpi_map_pxm_to_node(pxm, node); | 
|---|
| 80 | node_set(node, nodes_found_map); | 
|---|
| 81 | } | 
|---|
| 82 |  | 
|---|
| 83 | return node; | 
|---|
| 84 | } | 
|---|
| 85 | EXPORT_SYMBOL(acpi_map_pxm_to_node); | 
|---|
| 86 |  | 
|---|
| 87 | #ifdef CONFIG_NUMA_EMU | 
|---|
| 88 | /* | 
|---|
| 89 | * Take max_nid - 1 fake-numa nodes into account in both | 
|---|
| 90 | * pxm_to_node_map()/node_to_pxm_map[] tables. | 
|---|
| 91 | */ | 
|---|
| 92 | int __init fix_pxm_node_maps(int max_nid) | 
|---|
| 93 | { | 
|---|
| 94 | static int pxm_to_node_map_copy[MAX_PXM_DOMAINS] __initdata | 
|---|
| 95 | = { [0 ... MAX_PXM_DOMAINS - 1] = NUMA_NO_NODE }; | 
|---|
| 96 | static int node_to_pxm_map_copy[MAX_NUMNODES] __initdata | 
|---|
| 97 | = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL }; | 
|---|
| 98 | int i, j, index = -1, count = 0; | 
|---|
| 99 | nodemask_t nodes_to_enable; | 
|---|
| 100 |  | 
|---|
| 101 | if (numa_off) | 
|---|
| 102 | return -1; | 
|---|
| 103 |  | 
|---|
| 104 | /* no or incomplete node/PXM mapping set, nothing to do */ | 
|---|
| 105 | if (srat_disabled()) | 
|---|
| 106 | return 0; | 
|---|
| 107 |  | 
|---|
| 108 | /* find fake nodes PXM mapping */ | 
|---|
| 109 | for (i = 0; i < MAX_NUMNODES; i++) { | 
|---|
| 110 | if (node_to_pxm_map[i] != PXM_INVAL) { | 
|---|
| 111 | for (j = 0; j <= max_nid; j++) { | 
|---|
| 112 | if ((emu_nid_to_phys[j] == i) && | 
|---|
| 113 | WARN(node_to_pxm_map_copy[j] != PXM_INVAL, | 
|---|
| 114 | "Node %d is already binded to PXM %d\n", | 
|---|
| 115 | j, node_to_pxm_map_copy[j])) | 
|---|
| 116 | return -1; | 
|---|
| 117 | if (emu_nid_to_phys[j] == i) { | 
|---|
| 118 | node_to_pxm_map_copy[j] = | 
|---|
| 119 | node_to_pxm_map[i]; | 
|---|
| 120 | if (j > index) | 
|---|
| 121 | index = j; | 
|---|
| 122 | count++; | 
|---|
| 123 | } | 
|---|
| 124 | } | 
|---|
| 125 | } | 
|---|
| 126 | } | 
|---|
| 127 | if (index == -1) { | 
|---|
| 128 | pr_debug( "No node/PXM mapping has been set\n"); | 
|---|
| 129 | /* nothing more to be done */ | 
|---|
| 130 | return 0; | 
|---|
| 131 | } | 
|---|
| 132 | if (WARN(index != max_nid, "%d max nid  when expected %d\n", | 
|---|
| 133 | index, max_nid)) | 
|---|
| 134 | return -1; | 
|---|
| 135 |  | 
|---|
| 136 | nodes_clear(nodes_to_enable); | 
|---|
| 137 |  | 
|---|
| 138 | /* map phys nodes not used for fake nodes */ | 
|---|
| 139 | for (i = 0; i < MAX_NUMNODES; i++) { | 
|---|
| 140 | if (node_to_pxm_map[i] != PXM_INVAL) { | 
|---|
| 141 | for (j = 0; j <= max_nid; j++) | 
|---|
| 142 | if (emu_nid_to_phys[j] == i) | 
|---|
| 143 | break; | 
|---|
| 144 | /* fake nodes PXM mapping has been done */ | 
|---|
| 145 | if (j <= max_nid) | 
|---|
| 146 | continue; | 
|---|
| 147 | /* find first hole */ | 
|---|
| 148 | for (j = 0; | 
|---|
| 149 | j < MAX_NUMNODES && | 
|---|
| 150 | node_to_pxm_map_copy[j] != PXM_INVAL; | 
|---|
| 151 | j++) | 
|---|
| 152 | ; | 
|---|
| 153 | if (WARN(j == MAX_NUMNODES, | 
|---|
| 154 | "Number of nodes exceeds MAX_NUMNODES\n")) | 
|---|
| 155 | return -1; | 
|---|
| 156 | node_to_pxm_map_copy[j] = node_to_pxm_map[i]; | 
|---|
| 157 | node_set(j, nodes_to_enable); | 
|---|
| 158 | count++; | 
|---|
| 159 | } | 
|---|
| 160 | } | 
|---|
| 161 |  | 
|---|
| 162 | /* creating reverse mapping in pxm_to_node_map[] */ | 
|---|
| 163 | for (i = 0; i < MAX_NUMNODES; i++) | 
|---|
| 164 | if (node_to_pxm_map_copy[i] != PXM_INVAL && | 
|---|
| 165 | pxm_to_node_map_copy[node_to_pxm_map_copy[i]] == NUMA_NO_NODE) | 
|---|
| 166 | pxm_to_node_map_copy[node_to_pxm_map_copy[i]] = i; | 
|---|
| 167 |  | 
|---|
| 168 | /* overwrite with new mapping */ | 
|---|
| 169 | for (i = 0; i < MAX_NUMNODES; i++) { | 
|---|
| 170 | node_to_pxm_map[i] = node_to_pxm_map_copy[i]; | 
|---|
| 171 | pxm_to_node_map[i] = pxm_to_node_map_copy[i]; | 
|---|
| 172 | } | 
|---|
| 173 |  | 
|---|
| 174 | /* enable other nodes found in PXM for hotplug */ | 
|---|
| 175 | nodes_or(numa_nodes_parsed, nodes_to_enable, numa_nodes_parsed); | 
|---|
| 176 |  | 
|---|
| 177 | pr_debug( "found %d total number of nodes\n", count); | 
|---|
| 178 | return 0; | 
|---|
| 179 | } | 
|---|
| 180 | #endif | 
|---|
| 181 |  | 
|---|
| 182 | static void __init | 
|---|
| 183 | acpi_table_print_srat_entry(struct acpi_subtable_header *) | 
|---|
| 184 | { | 
|---|
| 185 | switch (header->type) { | 
|---|
| 186 | case ACPI_SRAT_TYPE_CPU_AFFINITY: | 
|---|
| 187 | { | 
|---|
| 188 | struct acpi_srat_cpu_affinity *p = | 
|---|
| 189 | (struct acpi_srat_cpu_affinity *)header; | 
|---|
| 190 | pr_debug( "SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n", | 
|---|
| 191 | p->apic_id, p->local_sapic_eid, | 
|---|
| 192 | p->proximity_domain_lo, | 
|---|
| 193 | str_enabled_disabled(p->flags & ACPI_SRAT_CPU_ENABLED)); | 
|---|
| 194 | } | 
|---|
| 195 | break; | 
|---|
| 196 |  | 
|---|
| 197 | case ACPI_SRAT_TYPE_MEMORY_AFFINITY: | 
|---|
| 198 | { | 
|---|
| 199 | struct acpi_srat_mem_affinity *p = | 
|---|
| 200 | (struct acpi_srat_mem_affinity *)header; | 
|---|
| 201 | pr_debug( "SRAT Memory (0x%llx length 0x%llx) in proximity domain %d %s%s%s\n", | 
|---|
| 202 | (unsigned long long)p->base_address, | 
|---|
| 203 | (unsigned long long)p->length, | 
|---|
| 204 | p->proximity_domain, | 
|---|
| 205 | str_enabled_disabled(p->flags & ACPI_SRAT_MEM_ENABLED), | 
|---|
| 206 | (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ? | 
|---|
| 207 | " hot-pluggable": "", | 
|---|
| 208 | (p->flags & ACPI_SRAT_MEM_NON_VOLATILE) ? | 
|---|
| 209 | " non-volatile": ""); | 
|---|
| 210 | } | 
|---|
| 211 | break; | 
|---|
| 212 |  | 
|---|
| 213 | case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: | 
|---|
| 214 | { | 
|---|
| 215 | struct acpi_srat_x2apic_cpu_affinity *p = | 
|---|
| 216 | (struct acpi_srat_x2apic_cpu_affinity *)header; | 
|---|
| 217 | pr_debug( "SRAT Processor (x2apicid[0x%08x]) in proximity domain %d %s\n", | 
|---|
| 218 | p->apic_id, | 
|---|
| 219 | p->proximity_domain, | 
|---|
| 220 | str_enabled_disabled(p->flags & ACPI_SRAT_CPU_ENABLED)); | 
|---|
| 221 | } | 
|---|
| 222 | break; | 
|---|
| 223 |  | 
|---|
| 224 | case ACPI_SRAT_TYPE_GICC_AFFINITY: | 
|---|
| 225 | { | 
|---|
| 226 | struct acpi_srat_gicc_affinity *p = | 
|---|
| 227 | (struct acpi_srat_gicc_affinity *)header; | 
|---|
| 228 | pr_debug( "SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n", | 
|---|
| 229 | p->acpi_processor_uid, | 
|---|
| 230 | p->proximity_domain, | 
|---|
| 231 | str_enabled_disabled(p->flags & ACPI_SRAT_GICC_ENABLED)); | 
|---|
| 232 | } | 
|---|
| 233 | break; | 
|---|
| 234 |  | 
|---|
| 235 | case ACPI_SRAT_TYPE_GENERIC_AFFINITY: | 
|---|
| 236 | { | 
|---|
| 237 | struct acpi_srat_generic_affinity *p = | 
|---|
| 238 | (struct acpi_srat_generic_affinity *)header; | 
|---|
| 239 |  | 
|---|
| 240 | if (p->device_handle_type == 0) { | 
|---|
| 241 | /* | 
|---|
| 242 | * For pci devices this may be the only place they | 
|---|
| 243 | * are assigned a proximity domain | 
|---|
| 244 | */ | 
|---|
| 245 | pr_debug( "SRAT Generic Initiator(Seg:%u BDF:%u) in proximity domain %d %s\n", | 
|---|
| 246 | *(u16 *)(&p->device_handle[0]), | 
|---|
| 247 | *(u16 *)(&p->device_handle[2]), | 
|---|
| 248 | p->proximity_domain, | 
|---|
| 249 | str_enabled_disabled(p->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED)); | 
|---|
| 250 | } else { | 
|---|
| 251 | /* | 
|---|
| 252 | * In this case we can rely on the device having a | 
|---|
| 253 | * proximity domain reference | 
|---|
| 254 | */ | 
|---|
| 255 | pr_debug( "SRAT Generic Initiator(HID=%.8s UID=%.4s) in proximity domain %d %s\n", | 
|---|
| 256 | (char *)(&p->device_handle[0]), | 
|---|
| 257 | (char *)(&p->device_handle[8]), | 
|---|
| 258 | p->proximity_domain, | 
|---|
| 259 | str_enabled_disabled(p->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED)); | 
|---|
| 260 | } | 
|---|
| 261 | } | 
|---|
| 262 | break; | 
|---|
| 263 |  | 
|---|
| 264 | case ACPI_SRAT_TYPE_RINTC_AFFINITY: | 
|---|
| 265 | { | 
|---|
| 266 | struct acpi_srat_rintc_affinity *p = | 
|---|
| 267 | (struct acpi_srat_rintc_affinity *)header; | 
|---|
| 268 | pr_debug( "SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n", | 
|---|
| 269 | p->acpi_processor_uid, | 
|---|
| 270 | p->proximity_domain, | 
|---|
| 271 | str_enabled_disabled(p->flags & ACPI_SRAT_RINTC_ENABLED)); | 
|---|
| 272 | } | 
|---|
| 273 | break; | 
|---|
| 274 |  | 
|---|
| 275 | default: | 
|---|
| 276 | pr_warn( "Found unsupported SRAT entry (type = 0x%x)\n", | 
|---|
| 277 | header->type); | 
|---|
| 278 | break; | 
|---|
| 279 | } | 
|---|
| 280 | } | 
|---|
| 281 |  | 
|---|
| 282 | /* | 
|---|
| 283 | * A lot of BIOS fill in 10 (= no distance) everywhere. This messes | 
|---|
| 284 | * up the NUMA heuristics which wants the local node to have a smaller | 
|---|
| 285 | * distance than the others. | 
|---|
| 286 | * Do some quick checks here and only use the SLIT if it passes. | 
|---|
| 287 | */ | 
|---|
| 288 | static int __init slit_valid(struct acpi_table_slit *slit) | 
|---|
| 289 | { | 
|---|
| 290 | int i, j; | 
|---|
| 291 | int d = slit->locality_count; | 
|---|
| 292 | for (i = 0; i < d; i++) { | 
|---|
| 293 | for (j = 0; j < d; j++) { | 
|---|
| 294 | u8 val = slit->entry[d*i + j]; | 
|---|
| 295 | if (i == j) { | 
|---|
| 296 | if (val != LOCAL_DISTANCE) | 
|---|
| 297 | return 0; | 
|---|
| 298 | } else if (val <= LOCAL_DISTANCE) | 
|---|
| 299 | return 0; | 
|---|
| 300 | } | 
|---|
| 301 | } | 
|---|
| 302 | return 1; | 
|---|
| 303 | } | 
|---|
| 304 |  | 
|---|
| 305 | void __init bad_srat(void) | 
|---|
| 306 | { | 
|---|
| 307 | pr_err( "SRAT: SRAT not used.\n"); | 
|---|
| 308 | disable_srat(); | 
|---|
| 309 | } | 
|---|
| 310 |  | 
|---|
| 311 | int __init srat_disabled(void) | 
|---|
| 312 | { | 
|---|
| 313 | return acpi_numa < 0; | 
|---|
| 314 | } | 
|---|
| 315 |  | 
|---|
| 316 | __weak int __init numa_fill_memblks(u64 start, u64 end) | 
|---|
| 317 | { | 
|---|
| 318 | return NUMA_NO_MEMBLK; | 
|---|
| 319 | } | 
|---|
| 320 |  | 
|---|
| 321 | /* | 
|---|
| 322 | * Callback for SLIT parsing.  pxm_to_node() returns NUMA_NO_NODE for | 
|---|
| 323 | * I/O localities since SRAT does not list them.  I/O localities are | 
|---|
| 324 | * not supported at this point. | 
|---|
| 325 | */ | 
|---|
| 326 | static int __init acpi_parse_slit(struct acpi_table_header *table) | 
|---|
| 327 | { | 
|---|
| 328 | struct acpi_table_slit *slit = (struct acpi_table_slit *)table; | 
|---|
| 329 | int i, j; | 
|---|
| 330 |  | 
|---|
| 331 | if (!slit_valid(slit)) { | 
|---|
| 332 | pr_info( "SLIT table looks invalid. Not used.\n"); | 
|---|
| 333 | return -EINVAL; | 
|---|
| 334 | } | 
|---|
| 335 |  | 
|---|
| 336 | for (i = 0; i < slit->locality_count; i++) { | 
|---|
| 337 | const int from_node = pxm_to_node(i); | 
|---|
| 338 |  | 
|---|
| 339 | if (from_node == NUMA_NO_NODE) | 
|---|
| 340 | continue; | 
|---|
| 341 |  | 
|---|
| 342 | for (j = 0; j < slit->locality_count; j++) { | 
|---|
| 343 | const int to_node = pxm_to_node(j); | 
|---|
| 344 |  | 
|---|
| 345 | if (to_node == NUMA_NO_NODE) | 
|---|
| 346 | continue; | 
|---|
| 347 |  | 
|---|
| 348 | numa_set_distance(from: from_node, to: to_node, | 
|---|
| 349 | distance: slit->entry[slit->locality_count * i + j]); | 
|---|
| 350 | } | 
|---|
| 351 | } | 
|---|
| 352 |  | 
|---|
| 353 | return 0; | 
|---|
| 354 | } | 
|---|
| 355 |  | 
|---|
| 356 | static int parsed_numa_memblks __initdata; | 
|---|
| 357 |  | 
|---|
| 358 | static int __init | 
|---|
| 359 | acpi_parse_memory_affinity(union acpi_subtable_headers *, | 
|---|
| 360 | const unsigned long table_end) | 
|---|
| 361 | { | 
|---|
| 362 | struct acpi_srat_mem_affinity *ma; | 
|---|
| 363 | u64 start, end; | 
|---|
| 364 | u32 hotpluggable; | 
|---|
| 365 | int node, pxm; | 
|---|
| 366 |  | 
|---|
| 367 | ma = (struct acpi_srat_mem_affinity *)header; | 
|---|
| 368 |  | 
|---|
| 369 | acpi_table_print_srat_entry(header: &header->common); | 
|---|
| 370 |  | 
|---|
| 371 | if (srat_disabled()) | 
|---|
| 372 | return 0; | 
|---|
| 373 | if (ma->header.length < sizeof(struct acpi_srat_mem_affinity)) { | 
|---|
| 374 | pr_err( "SRAT: Unexpected header length: %d\n", | 
|---|
| 375 | ma->header.length); | 
|---|
| 376 | goto out_err_bad_srat; | 
|---|
| 377 | } | 
|---|
| 378 | if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) | 
|---|
| 379 | return 0; | 
|---|
| 380 | hotpluggable = IS_ENABLED(CONFIG_MEMORY_HOTPLUG) && | 
|---|
| 381 | (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE); | 
|---|
| 382 |  | 
|---|
| 383 | start = ma->base_address; | 
|---|
| 384 | end = start + ma->length; | 
|---|
| 385 | pxm = ma->proximity_domain; | 
|---|
| 386 | if (acpi_srat_revision <= 1) | 
|---|
| 387 | pxm &= 0xff; | 
|---|
| 388 |  | 
|---|
| 389 | node = acpi_map_pxm_to_node(pxm); | 
|---|
| 390 | if (node == NUMA_NO_NODE) { | 
|---|
| 391 | pr_err( "SRAT: Too many proximity domains.\n"); | 
|---|
| 392 | goto out_err_bad_srat; | 
|---|
| 393 | } | 
|---|
| 394 |  | 
|---|
| 395 | if (numa_add_memblk(nodeid: node, start, end) < 0) { | 
|---|
| 396 | pr_err( "SRAT: Failed to add memblk to node %u [mem %#010Lx-%#010Lx]\n", | 
|---|
| 397 | node, (unsigned long long) start, | 
|---|
| 398 | (unsigned long long) end - 1); | 
|---|
| 399 | goto out_err_bad_srat; | 
|---|
| 400 | } | 
|---|
| 401 |  | 
|---|
| 402 | node_set(node, numa_nodes_parsed); | 
|---|
| 403 |  | 
|---|
| 404 | pr_info( "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n", | 
|---|
| 405 | node, pxm, | 
|---|
| 406 | (unsigned long long) start, (unsigned long long) end - 1, | 
|---|
| 407 | hotpluggable ? " hotplug": "", | 
|---|
| 408 | ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile": ""); | 
|---|
| 409 |  | 
|---|
| 410 | /* Mark hotplug range in memblock. */ | 
|---|
| 411 | if (hotpluggable && memblock_mark_hotplug(base: start, size: ma->length)) | 
|---|
| 412 | pr_warn( "SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n", | 
|---|
| 413 | (unsigned long long)start, (unsigned long long)end - 1); | 
|---|
| 414 |  | 
|---|
| 415 | max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1)); | 
|---|
| 416 |  | 
|---|
| 417 | parsed_numa_memblks++; | 
|---|
| 418 |  | 
|---|
| 419 | return 0; | 
|---|
| 420 |  | 
|---|
| 421 | out_err_bad_srat: | 
|---|
| 422 | /* Just disable SRAT, but do not fail and ignore errors. */ | 
|---|
| 423 | bad_srat(); | 
|---|
| 424 |  | 
|---|
| 425 | return 0; | 
|---|
| 426 | } | 
|---|
| 427 |  | 
|---|
| 428 | static int __init acpi_parse_cfmws(union acpi_subtable_headers *, | 
|---|
| 429 | void *arg, const unsigned long table_end) | 
|---|
| 430 | { | 
|---|
| 431 | struct acpi_cedt_cfmws *cfmws; | 
|---|
| 432 | int *fake_pxm = arg; | 
|---|
| 433 | u64 start, end, align; | 
|---|
| 434 | int node; | 
|---|
| 435 | int err; | 
|---|
| 436 |  | 
|---|
| 437 | cfmws = (struct acpi_cedt_cfmws *)header; | 
|---|
| 438 | start = cfmws->base_hpa; | 
|---|
| 439 | end = cfmws->base_hpa + cfmws->window_size; | 
|---|
| 440 |  | 
|---|
| 441 | /* Align memblock size to CFMW regions if possible */ | 
|---|
| 442 | align = 1UL << __ffs(start | end); | 
|---|
| 443 | if (align >= SZ_256M) { | 
|---|
| 444 | err = memory_block_advise_max_size(size: align); | 
|---|
| 445 | if (err) | 
|---|
| 446 | pr_warn( "CFMWS: memblock size advise failed (%d)\n", err); | 
|---|
| 447 | } else | 
|---|
| 448 | pr_err( "CFMWS: [BIOS BUG] base/size alignment violates spec\n"); | 
|---|
| 449 |  | 
|---|
| 450 | /* | 
|---|
| 451 | * The SRAT may have already described NUMA details for all, | 
|---|
| 452 | * or a portion of, this CFMWS HPA range. Extend the memblks | 
|---|
| 453 | * found for any portion of the window to cover the entire | 
|---|
| 454 | * window. | 
|---|
| 455 | */ | 
|---|
| 456 | if (!numa_fill_memblks(start, end)) | 
|---|
| 457 | return 0; | 
|---|
| 458 |  | 
|---|
| 459 | /* No SRAT description. Create a new node. */ | 
|---|
| 460 | node = acpi_map_pxm_to_node(*fake_pxm); | 
|---|
| 461 |  | 
|---|
| 462 | if (node == NUMA_NO_NODE) { | 
|---|
| 463 | pr_err( "ACPI NUMA: Too many proximity domains while processing CFMWS.\n"); | 
|---|
| 464 | return -EINVAL; | 
|---|
| 465 | } | 
|---|
| 466 |  | 
|---|
| 467 | if (numa_add_reserved_memblk(nid: node, start, end) < 0) { | 
|---|
| 468 | /* CXL driver must handle the NUMA_NO_NODE case */ | 
|---|
| 469 | pr_warn( "ACPI NUMA: Failed to add memblk for CFMWS node %d [mem %#llx-%#llx]\n", | 
|---|
| 470 | node, start, end); | 
|---|
| 471 | } | 
|---|
| 472 | node_set(node, numa_nodes_parsed); | 
|---|
| 473 |  | 
|---|
| 474 | /* Set the next available fake_pxm value */ | 
|---|
| 475 | (*fake_pxm)++; | 
|---|
| 476 | return 0; | 
|---|
| 477 | } | 
|---|
| 478 |  | 
|---|
| 479 | void __init __weak | 
|---|
| 480 | acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) | 
|---|
| 481 | { | 
|---|
| 482 | pr_warn( "Found unsupported x2apic [0x%08x] SRAT entry\n", pa->apic_id); | 
|---|
| 483 | } | 
|---|
| 484 |  | 
|---|
| 485 | static int __init | 
|---|
| 486 | acpi_parse_x2apic_affinity(union acpi_subtable_headers *, | 
|---|
| 487 | const unsigned long end) | 
|---|
| 488 | { | 
|---|
| 489 | struct acpi_srat_x2apic_cpu_affinity *processor_affinity; | 
|---|
| 490 |  | 
|---|
| 491 | processor_affinity = (struct acpi_srat_x2apic_cpu_affinity *)header; | 
|---|
| 492 |  | 
|---|
| 493 | acpi_table_print_srat_entry(header: &header->common); | 
|---|
| 494 |  | 
|---|
| 495 | /* let architecture-dependent part to do it */ | 
|---|
| 496 | acpi_numa_x2apic_affinity_init(pa: processor_affinity); | 
|---|
| 497 |  | 
|---|
| 498 | return 0; | 
|---|
| 499 | } | 
|---|
| 500 |  | 
|---|
| 501 | static int __init | 
|---|
| 502 | acpi_parse_processor_affinity(union acpi_subtable_headers *, | 
|---|
| 503 | const unsigned long end) | 
|---|
| 504 | { | 
|---|
| 505 | struct acpi_srat_cpu_affinity *processor_affinity; | 
|---|
| 506 |  | 
|---|
| 507 | processor_affinity = (struct acpi_srat_cpu_affinity *)header; | 
|---|
| 508 |  | 
|---|
| 509 | acpi_table_print_srat_entry(header: &header->common); | 
|---|
| 510 |  | 
|---|
| 511 | /* let architecture-dependent part to do it */ | 
|---|
| 512 | acpi_numa_processor_affinity_init(pa: processor_affinity); | 
|---|
| 513 |  | 
|---|
| 514 | return 0; | 
|---|
| 515 | } | 
|---|
| 516 |  | 
|---|
| 517 | static int __init | 
|---|
| 518 | acpi_parse_gicc_affinity(union acpi_subtable_headers *, | 
|---|
| 519 | const unsigned long end) | 
|---|
| 520 | { | 
|---|
| 521 | struct acpi_srat_gicc_affinity *processor_affinity; | 
|---|
| 522 |  | 
|---|
| 523 | processor_affinity = (struct acpi_srat_gicc_affinity *)header; | 
|---|
| 524 |  | 
|---|
| 525 | acpi_table_print_srat_entry(header: &header->common); | 
|---|
| 526 |  | 
|---|
| 527 | /* let architecture-dependent part to do it */ | 
|---|
| 528 | acpi_numa_gicc_affinity_init(pa: processor_affinity); | 
|---|
| 529 |  | 
|---|
| 530 | return 0; | 
|---|
| 531 | } | 
|---|
| 532 |  | 
|---|
| 533 | #if defined(CONFIG_X86) || defined(CONFIG_ARM64) | 
|---|
| 534 | static int __init | 
|---|
| 535 | acpi_parse_gi_affinity(union acpi_subtable_headers *, | 
|---|
| 536 | const unsigned long end) | 
|---|
| 537 | { | 
|---|
| 538 | struct acpi_srat_generic_affinity *gi_affinity; | 
|---|
| 539 | int node; | 
|---|
| 540 |  | 
|---|
| 541 | gi_affinity = (struct acpi_srat_generic_affinity *)header; | 
|---|
| 542 | if (!gi_affinity) | 
|---|
| 543 | return -EINVAL; | 
|---|
| 544 | acpi_table_print_srat_entry(header: &header->common); | 
|---|
| 545 |  | 
|---|
| 546 | if (!(gi_affinity->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED)) | 
|---|
| 547 | return -EINVAL; | 
|---|
| 548 |  | 
|---|
| 549 | node = acpi_map_pxm_to_node(gi_affinity->proximity_domain); | 
|---|
| 550 | if (node == NUMA_NO_NODE) { | 
|---|
| 551 | pr_err( "SRAT: Too many proximity domains.\n"); | 
|---|
| 552 | return -EINVAL; | 
|---|
| 553 | } | 
|---|
| 554 | node_set(node, numa_nodes_parsed); | 
|---|
| 555 | node_set_state(node, state: N_GENERIC_INITIATOR); | 
|---|
| 556 |  | 
|---|
| 557 | return 0; | 
|---|
| 558 | } | 
|---|
| 559 | #else | 
|---|
| 560 | static int __init | 
|---|
| 561 | acpi_parse_gi_affinity(union acpi_subtable_headers *header, | 
|---|
| 562 | const unsigned long end) | 
|---|
| 563 | { | 
|---|
| 564 | return 0; | 
|---|
| 565 | } | 
|---|
| 566 | #endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */ | 
|---|
| 567 |  | 
|---|
| 568 | static int __init | 
|---|
| 569 | acpi_parse_rintc_affinity(union acpi_subtable_headers *, | 
|---|
| 570 | const unsigned long end) | 
|---|
| 571 | { | 
|---|
| 572 | struct acpi_srat_rintc_affinity *rintc_affinity; | 
|---|
| 573 |  | 
|---|
| 574 | rintc_affinity = (struct acpi_srat_rintc_affinity *)header; | 
|---|
| 575 | acpi_table_print_srat_entry(header: &header->common); | 
|---|
| 576 |  | 
|---|
| 577 | /* let architecture-dependent part to do it */ | 
|---|
| 578 | acpi_numa_rintc_affinity_init(pa: rintc_affinity); | 
|---|
| 579 |  | 
|---|
| 580 | return 0; | 
|---|
| 581 | } | 
|---|
| 582 |  | 
|---|
| 583 | static int __init acpi_parse_srat(struct acpi_table_header *table) | 
|---|
| 584 | { | 
|---|
| 585 | struct acpi_table_srat *srat = (struct acpi_table_srat *)table; | 
|---|
| 586 |  | 
|---|
| 587 | acpi_srat_revision = srat->header.revision; | 
|---|
| 588 |  | 
|---|
| 589 | /* Real work done in acpi_table_parse_srat below. */ | 
|---|
| 590 |  | 
|---|
| 591 | return 0; | 
|---|
| 592 | } | 
|---|
| 593 |  | 
|---|
| 594 | static int __init | 
|---|
| 595 | acpi_table_parse_srat(enum acpi_srat_type id, | 
|---|
| 596 | acpi_tbl_entry_handler handler, unsigned int max_entries) | 
|---|
| 597 | { | 
|---|
| 598 | return acpi_table_parse_entries(ACPI_SIG_SRAT, | 
|---|
| 599 | table_size: sizeof(struct acpi_table_srat), entry_id: id, | 
|---|
| 600 | handler, max_entries); | 
|---|
| 601 | } | 
|---|
| 602 |  | 
|---|
| 603 | int __init acpi_numa_init(void) | 
|---|
| 604 | { | 
|---|
| 605 | int i, fake_pxm, cnt = 0; | 
|---|
| 606 |  | 
|---|
| 607 | if (acpi_disabled) | 
|---|
| 608 | return -EINVAL; | 
|---|
| 609 |  | 
|---|
| 610 | /* | 
|---|
| 611 | * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= | 
|---|
| 612 | * SRAT cpu entries could have different order with that in MADT. | 
|---|
| 613 | * So go over all cpu entries in SRAT to get apicid to node mapping. | 
|---|
| 614 | */ | 
|---|
| 615 |  | 
|---|
| 616 | /* SRAT: System Resource Affinity Table */ | 
|---|
| 617 | if (!acpi_table_parse(ACPI_SIG_SRAT, handler: acpi_parse_srat)) { | 
|---|
| 618 | struct acpi_subtable_proc srat_proc[5]; | 
|---|
| 619 |  | 
|---|
| 620 | memset(s: srat_proc, c: 0, n: sizeof(srat_proc)); | 
|---|
| 621 | srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY; | 
|---|
| 622 | srat_proc[0].handler = acpi_parse_processor_affinity; | 
|---|
| 623 | srat_proc[1].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY; | 
|---|
| 624 | srat_proc[1].handler = acpi_parse_x2apic_affinity; | 
|---|
| 625 | srat_proc[2].id = ACPI_SRAT_TYPE_GICC_AFFINITY; | 
|---|
| 626 | srat_proc[2].handler = acpi_parse_gicc_affinity; | 
|---|
| 627 | srat_proc[3].id = ACPI_SRAT_TYPE_GENERIC_AFFINITY; | 
|---|
| 628 | srat_proc[3].handler = acpi_parse_gi_affinity; | 
|---|
| 629 | srat_proc[4].id = ACPI_SRAT_TYPE_RINTC_AFFINITY; | 
|---|
| 630 | srat_proc[4].handler = acpi_parse_rintc_affinity; | 
|---|
| 631 |  | 
|---|
| 632 | acpi_table_parse_entries_array(ACPI_SIG_SRAT, | 
|---|
| 633 | table_size: sizeof(struct acpi_table_srat), | 
|---|
| 634 | proc: srat_proc, ARRAY_SIZE(srat_proc), max_entries: 0); | 
|---|
| 635 |  | 
|---|
| 636 | cnt = acpi_table_parse_srat(id: ACPI_SRAT_TYPE_MEMORY_AFFINITY, | 
|---|
| 637 | handler: acpi_parse_memory_affinity, max_entries: 0); | 
|---|
| 638 | } | 
|---|
| 639 |  | 
|---|
| 640 | /* SLIT: System Locality Information Table */ | 
|---|
| 641 | acpi_table_parse(ACPI_SIG_SLIT, handler: acpi_parse_slit); | 
|---|
| 642 |  | 
|---|
| 643 | /* | 
|---|
| 644 | * CXL Fixed Memory Window Structures (CFMWS) must be parsed | 
|---|
| 645 | * after the SRAT. Create NUMA Nodes for CXL memory ranges that | 
|---|
| 646 | * are defined in the CFMWS and not already defined in the SRAT. | 
|---|
| 647 | * Initialize a fake_pxm as the first available PXM to emulate. | 
|---|
| 648 | */ | 
|---|
| 649 |  | 
|---|
| 650 | /* fake_pxm is the next unused PXM value after SRAT parsing */ | 
|---|
| 651 | for (i = 0, fake_pxm = -1; i < MAX_NUMNODES; i++) { | 
|---|
| 652 | if (node_to_pxm_map[i] > fake_pxm) | 
|---|
| 653 | fake_pxm = node_to_pxm_map[i]; | 
|---|
| 654 | } | 
|---|
| 655 | last_real_pxm = fake_pxm; | 
|---|
| 656 | fake_pxm++; | 
|---|
| 657 | acpi_table_parse_cedt(id: ACPI_CEDT_TYPE_CFMWS, handler_arg: acpi_parse_cfmws, | 
|---|
| 658 | arg: &fake_pxm); | 
|---|
| 659 |  | 
|---|
| 660 | if (cnt < 0) | 
|---|
| 661 | return cnt; | 
|---|
| 662 | else if (!parsed_numa_memblks) | 
|---|
| 663 | return -ENOENT; | 
|---|
| 664 | return 0; | 
|---|
| 665 | } | 
|---|
| 666 |  | 
|---|
| 667 | bool acpi_node_backed_by_real_pxm(int nid) | 
|---|
| 668 | { | 
|---|
| 669 | int pxm = node_to_pxm(nid); | 
|---|
| 670 |  | 
|---|
| 671 | return pxm <= last_real_pxm; | 
|---|
| 672 | } | 
|---|
| 673 | EXPORT_SYMBOL_GPL(acpi_node_backed_by_real_pxm); | 
|---|
| 674 |  | 
|---|
| 675 | static int acpi_get_pxm(acpi_handle h) | 
|---|
| 676 | { | 
|---|
| 677 | unsigned long long pxm; | 
|---|
| 678 | acpi_status status; | 
|---|
| 679 | acpi_handle handle; | 
|---|
| 680 | acpi_handle phandle = h; | 
|---|
| 681 |  | 
|---|
| 682 | do { | 
|---|
| 683 | handle = phandle; | 
|---|
| 684 | status = acpi_evaluate_integer(handle, pathname: "_PXM", NULL, data: &pxm); | 
|---|
| 685 | if (ACPI_SUCCESS(status)) | 
|---|
| 686 | return pxm; | 
|---|
| 687 | status = acpi_get_parent(object: handle, out_handle: &phandle); | 
|---|
| 688 | } while (ACPI_SUCCESS(status)); | 
|---|
| 689 | return -1; | 
|---|
| 690 | } | 
|---|
| 691 |  | 
|---|
| 692 | int acpi_get_node(acpi_handle handle) | 
|---|
| 693 | { | 
|---|
| 694 | int pxm; | 
|---|
| 695 |  | 
|---|
| 696 | pxm = acpi_get_pxm(h: handle); | 
|---|
| 697 |  | 
|---|
| 698 | return pxm_to_node(pxm); | 
|---|
| 699 | } | 
|---|
| 700 | EXPORT_SYMBOL(acpi_get_node); | 
|---|
| 701 |  | 
|---|