| 1 | // SPDX-License-Identifier: GPL-2.0-only | 
|---|
| 2 | /* | 
|---|
| 3 | * HyperV  Detection code. | 
|---|
| 4 | * | 
|---|
| 5 | * Copyright (C) 2010, Novell, Inc. | 
|---|
| 6 | * Author : K. Y. Srinivasan <ksrinivasan@novell.com> | 
|---|
| 7 | */ | 
|---|
| 8 |  | 
|---|
| 9 | #include <linux/types.h> | 
|---|
| 10 | #include <linux/time.h> | 
|---|
| 11 | #include <linux/clocksource.h> | 
|---|
| 12 | #include <linux/init.h> | 
|---|
| 13 | #include <linux/export.h> | 
|---|
| 14 | #include <linux/hardirq.h> | 
|---|
| 15 | #include <linux/efi.h> | 
|---|
| 16 | #include <linux/interrupt.h> | 
|---|
| 17 | #include <linux/irq.h> | 
|---|
| 18 | #include <linux/kexec.h> | 
|---|
| 19 | #include <linux/random.h> | 
|---|
| 20 | #include <asm/processor.h> | 
|---|
| 21 | #include <asm/hypervisor.h> | 
|---|
| 22 | #include <hyperv/hvhdk.h> | 
|---|
| 23 | #include <asm/mshyperv.h> | 
|---|
| 24 | #include <asm/desc.h> | 
|---|
| 25 | #include <asm/idtentry.h> | 
|---|
| 26 | #include <asm/irq_regs.h> | 
|---|
| 27 | #include <asm/i8259.h> | 
|---|
| 28 | #include <asm/apic.h> | 
|---|
| 29 | #include <asm/timer.h> | 
|---|
| 30 | #include <asm/reboot.h> | 
|---|
| 31 | #include <asm/nmi.h> | 
|---|
| 32 | #include <clocksource/hyperv_timer.h> | 
|---|
| 33 | #include <asm/msr.h> | 
|---|
| 34 | #include <asm/numa.h> | 
|---|
| 35 | #include <asm/svm.h> | 
|---|
| 36 |  | 
|---|
| 37 | /* Is Linux running on nested Microsoft Hypervisor */ | 
|---|
| 38 | bool hv_nested; | 
|---|
| 39 | struct ms_hyperv_info ms_hyperv; | 
|---|
| 40 |  | 
|---|
| 41 | #if IS_ENABLED(CONFIG_HYPERV) | 
|---|
| 42 | static inline unsigned int hv_get_nested_msr(unsigned int reg) | 
|---|
| 43 | { | 
|---|
| 44 | if (hv_is_sint_msr(reg)) | 
|---|
| 45 | return reg - HV_X64_MSR_SINT0 + HV_X64_MSR_NESTED_SINT0; | 
|---|
| 46 |  | 
|---|
| 47 | switch (reg) { | 
|---|
| 48 | case HV_X64_MSR_SIMP: | 
|---|
| 49 | return HV_X64_MSR_NESTED_SIMP; | 
|---|
| 50 | case HV_X64_MSR_SIEFP: | 
|---|
| 51 | return HV_X64_MSR_NESTED_SIEFP; | 
|---|
| 52 | case HV_X64_MSR_SVERSION: | 
|---|
| 53 | return HV_X64_MSR_NESTED_SVERSION; | 
|---|
| 54 | case HV_X64_MSR_SCONTROL: | 
|---|
| 55 | return HV_X64_MSR_NESTED_SCONTROL; | 
|---|
| 56 | case HV_X64_MSR_EOM: | 
|---|
| 57 | return HV_X64_MSR_NESTED_EOM; | 
|---|
| 58 | default: | 
|---|
| 59 | return reg; | 
|---|
| 60 | } | 
|---|
| 61 | } | 
|---|
| 62 |  | 
|---|
| 63 | u64 hv_get_non_nested_msr(unsigned int reg) | 
|---|
| 64 | { | 
|---|
| 65 | u64 value; | 
|---|
| 66 |  | 
|---|
| 67 | if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present) | 
|---|
| 68 | hv_ivm_msr_read(reg, &value); | 
|---|
| 69 | else | 
|---|
| 70 | rdmsrq(reg, value); | 
|---|
| 71 | return value; | 
|---|
| 72 | } | 
|---|
| 73 | EXPORT_SYMBOL_GPL(hv_get_non_nested_msr); | 
|---|
| 74 |  | 
|---|
| 75 | void hv_set_non_nested_msr(unsigned int reg, u64 value) | 
|---|
| 76 | { | 
|---|
| 77 | if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present) { | 
|---|
| 78 | hv_ivm_msr_write(reg, value); | 
|---|
| 79 |  | 
|---|
| 80 | /* Write proxy bit via wrmsl instruction */ | 
|---|
| 81 | if (hv_is_sint_msr(reg)) | 
|---|
| 82 | wrmsrq(reg, value | 1 << 20); | 
|---|
| 83 | } else { | 
|---|
| 84 | wrmsrq(reg, value); | 
|---|
| 85 | } | 
|---|
| 86 | } | 
|---|
| 87 | EXPORT_SYMBOL_GPL(hv_set_non_nested_msr); | 
|---|
| 88 |  | 
|---|
| 89 | u64 hv_get_msr(unsigned int reg) | 
|---|
| 90 | { | 
|---|
| 91 | if (hv_nested) | 
|---|
| 92 | reg = hv_get_nested_msr(reg); | 
|---|
| 93 |  | 
|---|
| 94 | return hv_get_non_nested_msr(reg); | 
|---|
| 95 | } | 
|---|
| 96 | EXPORT_SYMBOL_GPL(hv_get_msr); | 
|---|
| 97 |  | 
|---|
| 98 | void hv_set_msr(unsigned int reg, u64 value) | 
|---|
| 99 | { | 
|---|
| 100 | if (hv_nested) | 
|---|
| 101 | reg = hv_get_nested_msr(reg); | 
|---|
| 102 |  | 
|---|
| 103 | hv_set_non_nested_msr(reg, value); | 
|---|
| 104 | } | 
|---|
| 105 | EXPORT_SYMBOL_GPL(hv_set_msr); | 
|---|
| 106 |  | 
|---|
| 107 | static void (*mshv_handler)(void); | 
|---|
| 108 | static void (*vmbus_handler)(void); | 
|---|
| 109 | static void (*hv_stimer0_handler)(void); | 
|---|
| 110 | static void (*hv_kexec_handler)(void); | 
|---|
| 111 | static void (*hv_crash_handler)(struct pt_regs *regs); | 
|---|
| 112 |  | 
|---|
| 113 | DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback) | 
|---|
| 114 | { | 
|---|
| 115 | struct pt_regs *old_regs = set_irq_regs(regs); | 
|---|
| 116 |  | 
|---|
| 117 | inc_irq_stat(irq_hv_callback_count); | 
|---|
| 118 | if (mshv_handler) | 
|---|
| 119 | mshv_handler(); | 
|---|
| 120 |  | 
|---|
| 121 | if (vmbus_handler) | 
|---|
| 122 | vmbus_handler(); | 
|---|
| 123 |  | 
|---|
| 124 | if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED) | 
|---|
| 125 | apic_eoi(); | 
|---|
| 126 |  | 
|---|
| 127 | set_irq_regs(old_regs); | 
|---|
| 128 | } | 
|---|
| 129 |  | 
|---|
| 130 | void hv_setup_mshv_handler(void (*handler)(void)) | 
|---|
| 131 | { | 
|---|
| 132 | mshv_handler = handler; | 
|---|
| 133 | } | 
|---|
| 134 |  | 
|---|
| 135 | void hv_setup_vmbus_handler(void (*handler)(void)) | 
|---|
| 136 | { | 
|---|
| 137 | vmbus_handler = handler; | 
|---|
| 138 | } | 
|---|
| 139 |  | 
|---|
| 140 | void hv_remove_vmbus_handler(void) | 
|---|
| 141 | { | 
|---|
| 142 | /* We have no way to deallocate the interrupt gate */ | 
|---|
| 143 | vmbus_handler = NULL; | 
|---|
| 144 | } | 
|---|
| 145 |  | 
|---|
| 146 | /* | 
|---|
| 147 | * Routines to do per-architecture handling of stimer0 | 
|---|
| 148 | * interrupts when in Direct Mode | 
|---|
| 149 | */ | 
|---|
| 150 | DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0) | 
|---|
| 151 | { | 
|---|
| 152 | struct pt_regs *old_regs = set_irq_regs(regs); | 
|---|
| 153 |  | 
|---|
| 154 | inc_irq_stat(hyperv_stimer0_count); | 
|---|
| 155 | if (hv_stimer0_handler) | 
|---|
| 156 | hv_stimer0_handler(); | 
|---|
| 157 | add_interrupt_randomness(HYPERV_STIMER0_VECTOR); | 
|---|
| 158 | apic_eoi(); | 
|---|
| 159 |  | 
|---|
| 160 | set_irq_regs(old_regs); | 
|---|
| 161 | } | 
|---|
| 162 |  | 
|---|
| 163 | /* For x86/x64, override weak placeholders in hyperv_timer.c */ | 
|---|
| 164 | void hv_setup_stimer0_handler(void (*handler)(void)) | 
|---|
| 165 | { | 
|---|
| 166 | hv_stimer0_handler = handler; | 
|---|
| 167 | } | 
|---|
| 168 |  | 
|---|
| 169 | void hv_remove_stimer0_handler(void) | 
|---|
| 170 | { | 
|---|
| 171 | /* We have no way to deallocate the interrupt gate */ | 
|---|
| 172 | hv_stimer0_handler = NULL; | 
|---|
| 173 | } | 
|---|
| 174 |  | 
|---|
| 175 | void hv_setup_kexec_handler(void (*handler)(void)) | 
|---|
| 176 | { | 
|---|
| 177 | hv_kexec_handler = handler; | 
|---|
| 178 | } | 
|---|
| 179 |  | 
|---|
| 180 | void hv_remove_kexec_handler(void) | 
|---|
| 181 | { | 
|---|
| 182 | hv_kexec_handler = NULL; | 
|---|
| 183 | } | 
|---|
| 184 |  | 
|---|
| 185 | void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)) | 
|---|
| 186 | { | 
|---|
| 187 | hv_crash_handler = handler; | 
|---|
| 188 | } | 
|---|
| 189 |  | 
|---|
| 190 | void hv_remove_crash_handler(void) | 
|---|
| 191 | { | 
|---|
| 192 | hv_crash_handler = NULL; | 
|---|
| 193 | } | 
|---|
| 194 |  | 
|---|
| 195 | #ifdef CONFIG_KEXEC_CORE | 
|---|
| 196 | static void hv_machine_shutdown(void) | 
|---|
| 197 | { | 
|---|
| 198 | if (kexec_in_progress && hv_kexec_handler) | 
|---|
| 199 | hv_kexec_handler(); | 
|---|
| 200 |  | 
|---|
| 201 | /* | 
|---|
| 202 | * Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor | 
|---|
| 203 | * corrupts the old VP Assist Pages and can crash the kexec kernel. | 
|---|
| 204 | */ | 
|---|
| 205 | if (kexec_in_progress) | 
|---|
| 206 | cpuhp_remove_state(CPUHP_AP_HYPERV_ONLINE); | 
|---|
| 207 |  | 
|---|
| 208 | /* The function calls stop_other_cpus(). */ | 
|---|
| 209 | native_machine_shutdown(); | 
|---|
| 210 |  | 
|---|
| 211 | /* Disable the hypercall page when there is only 1 active CPU. */ | 
|---|
| 212 | if (kexec_in_progress) | 
|---|
| 213 | hyperv_cleanup(); | 
|---|
| 214 | } | 
|---|
| 215 | #endif /* CONFIG_KEXEC_CORE */ | 
|---|
| 216 |  | 
|---|
| 217 | #ifdef CONFIG_CRASH_DUMP | 
|---|
| 218 | static void hv_machine_crash_shutdown(struct pt_regs *regs) | 
|---|
| 219 | { | 
|---|
| 220 | if (hv_crash_handler) | 
|---|
| 221 | hv_crash_handler(regs); | 
|---|
| 222 |  | 
|---|
| 223 | /* The function calls crash_smp_send_stop(). */ | 
|---|
| 224 | native_machine_crash_shutdown(regs); | 
|---|
| 225 |  | 
|---|
| 226 | /* Disable the hypercall page when there is only 1 active CPU. */ | 
|---|
| 227 | hyperv_cleanup(); | 
|---|
| 228 | } | 
|---|
| 229 | #endif /* CONFIG_CRASH_DUMP */ | 
|---|
| 230 |  | 
|---|
| 231 | static u64 hv_ref_counter_at_suspend; | 
|---|
| 232 | static void (*old_save_sched_clock_state)(void); | 
|---|
| 233 | static void (*old_restore_sched_clock_state)(void); | 
|---|
| 234 |  | 
|---|
| 235 | /* | 
|---|
| 236 | * Hyper-V clock counter resets during hibernation. Save and restore clock | 
|---|
| 237 | * offset during suspend/resume, while also considering the time passed | 
|---|
| 238 | * before suspend. This is to make sure that sched_clock using hv tsc page | 
|---|
| 239 | * based clocksource, proceeds from where it left off during suspend and | 
|---|
| 240 | * it shows correct time for the timestamps of kernel messages after resume. | 
|---|
| 241 | */ | 
|---|
| 242 | static void save_hv_clock_tsc_state(void) | 
|---|
| 243 | { | 
|---|
| 244 | hv_ref_counter_at_suspend = hv_read_reference_counter(); | 
|---|
| 245 | } | 
|---|
| 246 |  | 
|---|
| 247 | static void restore_hv_clock_tsc_state(void) | 
|---|
| 248 | { | 
|---|
| 249 | /* | 
|---|
| 250 | * Adjust the offsets used by hv tsc clocksource to | 
|---|
| 251 | * account for the time spent before hibernation. | 
|---|
| 252 | * adjusted value = reference counter (time) at suspend | 
|---|
| 253 | *                - reference counter (time) now. | 
|---|
| 254 | */ | 
|---|
| 255 | hv_adj_sched_clock_offset(hv_ref_counter_at_suspend - hv_read_reference_counter()); | 
|---|
| 256 | } | 
|---|
| 257 |  | 
|---|
| 258 | /* | 
|---|
| 259 | * Functions to override save_sched_clock_state and restore_sched_clock_state | 
|---|
| 260 | * functions of x86_platform. The Hyper-V clock counter is reset during | 
|---|
| 261 | * suspend-resume and the offset used to measure time needs to be | 
|---|
| 262 | * corrected, post resume. | 
|---|
| 263 | */ | 
|---|
| 264 | static void hv_save_sched_clock_state(void) | 
|---|
| 265 | { | 
|---|
| 266 | old_save_sched_clock_state(); | 
|---|
| 267 | save_hv_clock_tsc_state(); | 
|---|
| 268 | } | 
|---|
| 269 |  | 
|---|
| 270 | static void hv_restore_sched_clock_state(void) | 
|---|
| 271 | { | 
|---|
| 272 | restore_hv_clock_tsc_state(); | 
|---|
| 273 | old_restore_sched_clock_state(); | 
|---|
| 274 | } | 
|---|
| 275 |  | 
|---|
| 276 | static void __init x86_setup_ops_for_tsc_pg_clock(void) | 
|---|
| 277 | { | 
|---|
| 278 | if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) | 
|---|
| 279 | return; | 
|---|
| 280 |  | 
|---|
| 281 | old_save_sched_clock_state = x86_platform.save_sched_clock_state; | 
|---|
| 282 | x86_platform.save_sched_clock_state = hv_save_sched_clock_state; | 
|---|
| 283 |  | 
|---|
| 284 | old_restore_sched_clock_state = x86_platform.restore_sched_clock_state; | 
|---|
| 285 | x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state; | 
|---|
| 286 | } | 
|---|
| 287 |  | 
|---|
| 288 | #ifdef CONFIG_X86_64 | 
|---|
| 289 | DEFINE_STATIC_CALL(hv_hypercall, hv_std_hypercall); | 
|---|
| 290 | EXPORT_STATIC_CALL_TRAMP_GPL(hv_hypercall); | 
|---|
| 291 | #define hypercall_update(hc) static_call_update(hv_hypercall, hc) | 
|---|
| 292 | #endif | 
|---|
| 293 | #endif /* CONFIG_HYPERV */ | 
|---|
| 294 |  | 
|---|
| 295 | #ifndef hypercall_update | 
|---|
| 296 | #define hypercall_update(hc) (void)hc | 
|---|
| 297 | #endif | 
|---|
| 298 |  | 
|---|
| 299 | static uint32_t  __init ms_hyperv_platform(void) | 
|---|
| 300 | { | 
|---|
| 301 | u32 eax; | 
|---|
| 302 | u32 hyp_signature[3]; | 
|---|
| 303 |  | 
|---|
| 304 | if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) | 
|---|
| 305 | return 0; | 
|---|
| 306 |  | 
|---|
| 307 | cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, | 
|---|
| 308 | eax: &eax, ebx: &hyp_signature[0], ecx: &hyp_signature[1], edx: &hyp_signature[2]); | 
|---|
| 309 |  | 
|---|
| 310 | if (eax < HYPERV_CPUID_MIN || eax > HYPERV_CPUID_MAX || | 
|---|
| 311 | memcmp( "Microsoft Hv", hyp_signature, 12)) | 
|---|
| 312 | return 0; | 
|---|
| 313 |  | 
|---|
| 314 | /* HYPERCALL and VP_INDEX MSRs are mandatory for all features. */ | 
|---|
| 315 | eax = cpuid_eax(HYPERV_CPUID_FEATURES); | 
|---|
| 316 | if (!(eax & HV_MSR_HYPERCALL_AVAILABLE)) { | 
|---|
| 317 | pr_warn( "x86/hyperv: HYPERCALL MSR not available.\n"); | 
|---|
| 318 | return 0; | 
|---|
| 319 | } | 
|---|
| 320 | if (!(eax & HV_MSR_VP_INDEX_AVAILABLE)) { | 
|---|
| 321 | pr_warn( "x86/hyperv: VP_INDEX MSR not available.\n"); | 
|---|
| 322 | return 0; | 
|---|
| 323 | } | 
|---|
| 324 |  | 
|---|
| 325 | return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS; | 
|---|
| 326 | } | 
|---|
| 327 |  | 
|---|
| 328 | #ifdef CONFIG_X86_LOCAL_APIC | 
|---|
| 329 | /* | 
|---|
| 330 | * Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes | 
|---|
| 331 | * it difficult to process CHANNELMSG_UNLOAD in case of crash. Handle | 
|---|
| 332 | * unknown NMI on the first CPU which gets it. | 
|---|
| 333 | */ | 
|---|
| 334 | static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs) | 
|---|
| 335 | { | 
|---|
| 336 | static atomic_t nmi_cpu = ATOMIC_INIT(-1); | 
|---|
| 337 | unsigned int old_cpu, this_cpu; | 
|---|
| 338 |  | 
|---|
| 339 | if (!unknown_nmi_panic) | 
|---|
| 340 | return NMI_DONE; | 
|---|
| 341 |  | 
|---|
| 342 | old_cpu = -1; | 
|---|
| 343 | this_cpu = raw_smp_processor_id(); | 
|---|
| 344 | if (!atomic_try_cmpxchg(v: &nmi_cpu, old: &old_cpu, new: this_cpu)) | 
|---|
| 345 | return NMI_HANDLED; | 
|---|
| 346 |  | 
|---|
| 347 | return NMI_DONE; | 
|---|
| 348 | } | 
|---|
| 349 | #endif | 
|---|
| 350 |  | 
|---|
| 351 | static unsigned long hv_get_tsc_khz(void) | 
|---|
| 352 | { | 
|---|
| 353 | unsigned long freq; | 
|---|
| 354 |  | 
|---|
| 355 | rdmsrq(HV_X64_MSR_TSC_FREQUENCY, freq); | 
|---|
| 356 |  | 
|---|
| 357 | return freq / 1000; | 
|---|
| 358 | } | 
|---|
| 359 |  | 
|---|
| 360 | #if defined(CONFIG_SMP) && IS_ENABLED(CONFIG_HYPERV) | 
|---|
| 361 | static void __init hv_smp_prepare_boot_cpu(void) | 
|---|
| 362 | { | 
|---|
| 363 | native_smp_prepare_boot_cpu(); | 
|---|
| 364 | #if defined(CONFIG_X86_64) && defined(CONFIG_PARAVIRT_SPINLOCKS) | 
|---|
| 365 | hv_init_spinlocks(); | 
|---|
| 366 | #endif | 
|---|
| 367 | } | 
|---|
| 368 |  | 
|---|
| 369 | static void __init hv_smp_prepare_cpus(unsigned int max_cpus) | 
|---|
| 370 | { | 
|---|
| 371 | #ifdef CONFIG_X86_64 | 
|---|
| 372 | int i; | 
|---|
| 373 | int ret; | 
|---|
| 374 | #endif | 
|---|
| 375 |  | 
|---|
| 376 | native_smp_prepare_cpus(max_cpus); | 
|---|
| 377 |  | 
|---|
| 378 | /* | 
|---|
| 379 | *  Override wakeup_secondary_cpu_64 callback for SEV-SNP | 
|---|
| 380 | *  enlightened guest. | 
|---|
| 381 | */ | 
|---|
| 382 | if (!ms_hyperv.paravisor_present && hv_isolation_type_snp()) { | 
|---|
| 383 | apic->wakeup_secondary_cpu_64 = hv_snp_boot_ap; | 
|---|
| 384 | return; | 
|---|
| 385 | } | 
|---|
| 386 |  | 
|---|
| 387 | #ifdef CONFIG_X86_64 | 
|---|
| 388 | for_each_present_cpu(i) { | 
|---|
| 389 | if (i == 0) | 
|---|
| 390 | continue; | 
|---|
| 391 | ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i)); | 
|---|
| 392 | BUG_ON(ret); | 
|---|
| 393 | } | 
|---|
| 394 |  | 
|---|
| 395 | for_each_present_cpu(i) { | 
|---|
| 396 | if (i == 0) | 
|---|
| 397 | continue; | 
|---|
| 398 | ret = hv_call_create_vp(numa_cpu_node(i), hv_current_partition_id, i, i); | 
|---|
| 399 | BUG_ON(ret); | 
|---|
| 400 | } | 
|---|
| 401 | #endif | 
|---|
| 402 | } | 
|---|
| 403 | #endif | 
|---|
| 404 |  | 
|---|
| 405 | /* | 
|---|
| 406 | * When a fully enlightened TDX VM runs on Hyper-V, the firmware sets the | 
|---|
| 407 | * HW_REDUCED flag: refer to acpi_tb_create_local_fadt(). Consequently ttyS0 | 
|---|
| 408 | * interrupts can't work because request_irq() -> ... -> irq_to_desc() returns | 
|---|
| 409 | * NULL for ttyS0. This happens because mp_config_acpi_legacy_irqs() sees a | 
|---|
| 410 | * nr_legacy_irqs() of 0, so it doesn't initialize the array 'mp_irqs[]', and | 
|---|
| 411 | * later setup_IO_APIC_irqs() -> find_irq_entry() fails to find the legacy irqs | 
|---|
| 412 | * from the array and hence doesn't create the necessary irq description info. | 
|---|
| 413 | * | 
|---|
| 414 | * Clone arch/x86/kernel/acpi/boot.c: acpi_generic_reduced_hw_init() here, | 
|---|
| 415 | * except don't change 'legacy_pic', which keeps its default value | 
|---|
| 416 | * 'default_legacy_pic'. This way, mp_config_acpi_legacy_irqs() sees a non-zero | 
|---|
| 417 | * nr_legacy_irqs() and eventually serial console interrupts works properly. | 
|---|
| 418 | */ | 
|---|
| 419 | static void __init reduced_hw_init(void) | 
|---|
| 420 | { | 
|---|
| 421 | x86_init.timers.timer_init	= x86_init_noop; | 
|---|
| 422 | x86_init.irqs.pre_vector_init	= x86_init_noop; | 
|---|
| 423 | } | 
|---|
| 424 |  | 
|---|
| 425 | int hv_get_hypervisor_version(union hv_hypervisor_version_info *info) | 
|---|
| 426 | { | 
|---|
| 427 | unsigned int hv_max_functions; | 
|---|
| 428 |  | 
|---|
| 429 | hv_max_functions = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS); | 
|---|
| 430 | if (hv_max_functions < HYPERV_CPUID_VERSION) { | 
|---|
| 431 | pr_err( "%s: Could not detect Hyper-V version\n", __func__); | 
|---|
| 432 | return -ENODEV; | 
|---|
| 433 | } | 
|---|
| 434 |  | 
|---|
| 435 | cpuid(HYPERV_CPUID_VERSION, eax: &info->eax, ebx: &info->ebx, ecx: &info->ecx, edx: &info->edx); | 
|---|
| 436 |  | 
|---|
| 437 | return 0; | 
|---|
| 438 | } | 
|---|
| 439 | EXPORT_SYMBOL_GPL(hv_get_hypervisor_version); | 
|---|
| 440 |  | 
|---|
| 441 | static void __init ms_hyperv_init_platform(void) | 
|---|
| 442 | { | 
|---|
| 443 | int hv_max_functions_eax; | 
|---|
| 444 |  | 
|---|
| 445 | #ifdef CONFIG_PARAVIRT | 
|---|
| 446 | pv_info.name = "Hyper-V"; | 
|---|
| 447 | #endif | 
|---|
| 448 |  | 
|---|
| 449 | /* | 
|---|
| 450 | * Extract the features and hints | 
|---|
| 451 | */ | 
|---|
| 452 | ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); | 
|---|
| 453 | ms_hyperv.priv_high = cpuid_ebx(HYPERV_CPUID_FEATURES); | 
|---|
| 454 | ms_hyperv.ext_features = cpuid_ecx(HYPERV_CPUID_FEATURES); | 
|---|
| 455 | ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); | 
|---|
| 456 | ms_hyperv.hints    = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); | 
|---|
| 457 |  | 
|---|
| 458 | hv_max_functions_eax = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS); | 
|---|
| 459 |  | 
|---|
| 460 | pr_info( "Hyper-V: privilege flags low %#x, high %#x, ext %#x, hints %#x, misc %#x\n", | 
|---|
| 461 | ms_hyperv.features, ms_hyperv.priv_high, | 
|---|
| 462 | ms_hyperv.ext_features, ms_hyperv.hints, | 
|---|
| 463 | ms_hyperv.misc_features); | 
|---|
| 464 |  | 
|---|
| 465 | ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS); | 
|---|
| 466 | ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS); | 
|---|
| 467 |  | 
|---|
| 468 | pr_debug( "Hyper-V: max %u virtual processors, %u logical processors\n", | 
|---|
| 469 | ms_hyperv.max_vp_index, ms_hyperv.max_lp_index); | 
|---|
| 470 |  | 
|---|
| 471 | hv_identify_partition_type(); | 
|---|
| 472 |  | 
|---|
| 473 | if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) { | 
|---|
| 474 | hv_nested = true; | 
|---|
| 475 | pr_info( "Hyper-V: running on a nested hypervisor\n"); | 
|---|
| 476 | } | 
|---|
| 477 |  | 
|---|
| 478 | if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && | 
|---|
| 479 | ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { | 
|---|
| 480 | x86_platform.calibrate_tsc = hv_get_tsc_khz; | 
|---|
| 481 | x86_platform.calibrate_cpu = hv_get_tsc_khz; | 
|---|
| 482 | setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); | 
|---|
| 483 | } | 
|---|
| 484 |  | 
|---|
| 485 | if (ms_hyperv.priv_high & HV_ISOLATION) { | 
|---|
| 486 | ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG); | 
|---|
| 487 | ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG); | 
|---|
| 488 |  | 
|---|
| 489 | if (ms_hyperv.shared_gpa_boundary_active) | 
|---|
| 490 | ms_hyperv.shared_gpa_boundary = | 
|---|
| 491 | BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); | 
|---|
| 492 |  | 
|---|
| 493 | pr_info( "Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", | 
|---|
| 494 | ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); | 
|---|
| 495 |  | 
|---|
| 496 |  | 
|---|
| 497 | if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) { | 
|---|
| 498 | static_branch_enable(&isolation_type_snp); | 
|---|
| 499 | if (!ms_hyperv.paravisor_present) | 
|---|
| 500 | hypercall_update(hv_snp_hypercall); | 
|---|
| 501 | } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) { | 
|---|
| 502 | static_branch_enable(&isolation_type_tdx); | 
|---|
| 503 |  | 
|---|
| 504 | /* A TDX VM must use x2APIC and doesn't use lazy EOI. */ | 
|---|
| 505 | ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED; | 
|---|
| 506 |  | 
|---|
| 507 | if (!ms_hyperv.paravisor_present) { | 
|---|
| 508 | hypercall_update(hv_tdx_hypercall); | 
|---|
| 509 | /* | 
|---|
| 510 | * Mark the Hyper-V TSC page feature as disabled | 
|---|
| 511 | * in a TDX VM without paravisor so that the | 
|---|
| 512 | * Invariant TSC, which is a better clocksource | 
|---|
| 513 | * anyway, is used instead. | 
|---|
| 514 | */ | 
|---|
| 515 | ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE; | 
|---|
| 516 |  | 
|---|
| 517 | /* | 
|---|
| 518 | * The Invariant TSC is expected to be available | 
|---|
| 519 | * in a TDX VM without paravisor, but if not, | 
|---|
| 520 | * print a warning message. The slower Hyper-V MSR-based | 
|---|
| 521 | * Ref Counter should end up being the clocksource. | 
|---|
| 522 | */ | 
|---|
| 523 | if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) | 
|---|
| 524 | pr_warn( "Hyper-V: Invariant TSC is unavailable\n"); | 
|---|
| 525 |  | 
|---|
| 526 | /* HV_MSR_CRASH_CTL is unsupported. */ | 
|---|
| 527 | ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; | 
|---|
| 528 |  | 
|---|
| 529 | /* Don't trust Hyper-V's TLB-flushing hypercalls. */ | 
|---|
| 530 | ms_hyperv.hints &= ~HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; | 
|---|
| 531 |  | 
|---|
| 532 | x86_init.acpi.reduced_hw_early_init = reduced_hw_init; | 
|---|
| 533 | } | 
|---|
| 534 | } | 
|---|
| 535 | } | 
|---|
| 536 |  | 
|---|
| 537 | if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) { | 
|---|
| 538 | ms_hyperv.nested_features = | 
|---|
| 539 | cpuid_eax(HYPERV_CPUID_NESTED_FEATURES); | 
|---|
| 540 | pr_info( "Hyper-V: Nested features: 0x%x\n", | 
|---|
| 541 | ms_hyperv.nested_features); | 
|---|
| 542 | } | 
|---|
| 543 |  | 
|---|
| 544 | #ifdef CONFIG_X86_LOCAL_APIC | 
|---|
| 545 | if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && | 
|---|
| 546 | ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { | 
|---|
| 547 | /* | 
|---|
| 548 | * Get the APIC frequency. | 
|---|
| 549 | */ | 
|---|
| 550 | u64	hv_lapic_frequency; | 
|---|
| 551 |  | 
|---|
| 552 | rdmsrq(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency); | 
|---|
| 553 | hv_lapic_frequency = div_u64(dividend: hv_lapic_frequency, HZ); | 
|---|
| 554 | lapic_timer_period = hv_lapic_frequency; | 
|---|
| 555 | pr_info( "Hyper-V: LAPIC Timer Frequency: %#x\n", | 
|---|
| 556 | lapic_timer_period); | 
|---|
| 557 | } | 
|---|
| 558 |  | 
|---|
| 559 | register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST, | 
|---|
| 560 | "hv_nmi_unknown"); | 
|---|
| 561 | #endif | 
|---|
| 562 |  | 
|---|
| 563 | #ifdef CONFIG_X86_IO_APIC | 
|---|
| 564 | no_timer_check = 1; | 
|---|
| 565 | #endif | 
|---|
| 566 |  | 
|---|
| 567 | #if IS_ENABLED(CONFIG_HYPERV) | 
|---|
| 568 | #if defined(CONFIG_KEXEC_CORE) | 
|---|
| 569 | machine_ops.shutdown = hv_machine_shutdown; | 
|---|
| 570 | #endif | 
|---|
| 571 | #if defined(CONFIG_CRASH_DUMP) | 
|---|
| 572 | machine_ops.crash_shutdown = hv_machine_crash_shutdown; | 
|---|
| 573 | #endif | 
|---|
| 574 | #endif | 
|---|
| 575 | /* | 
|---|
| 576 | * HV_ACCESS_TSC_INVARIANT is always zero for the root partition. Root | 
|---|
| 577 | * partition doesn't need to write to synthetic MSR to enable invariant | 
|---|
| 578 | * TSC feature. It sees what the hardware provides. | 
|---|
| 579 | */ | 
|---|
| 580 | if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { | 
|---|
| 581 | /* | 
|---|
| 582 | * Writing to synthetic MSR 0x40000118 updates/changes the | 
|---|
| 583 | * guest visible CPUIDs. Setting bit 0 of this MSR  enables | 
|---|
| 584 | * guests to report invariant TSC feature through CPUID | 
|---|
| 585 | * instruction, CPUID 0x800000007/EDX, bit 8. See code in | 
|---|
| 586 | * early_init_intel() where this bit is examined. The | 
|---|
| 587 | * setting of this MSR bit should happen before init_intel() | 
|---|
| 588 | * is called. | 
|---|
| 589 | */ | 
|---|
| 590 | wrmsrq(HV_X64_MSR_TSC_INVARIANT_CONTROL, HV_EXPOSE_INVARIANT_TSC); | 
|---|
| 591 | setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); | 
|---|
| 592 | } | 
|---|
| 593 |  | 
|---|
| 594 | /* | 
|---|
| 595 | * Generation 2 instances don't support reading the NMI status from | 
|---|
| 596 | * 0x61 port. | 
|---|
| 597 | */ | 
|---|
| 598 | if (efi_enabled(EFI_BOOT)) | 
|---|
| 599 | x86_platform.get_nmi_reason = hv_get_nmi_reason; | 
|---|
| 600 |  | 
|---|
| 601 | #if IS_ENABLED(CONFIG_HYPERV) | 
|---|
| 602 | if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) || | 
|---|
| 603 | ms_hyperv.paravisor_present) | 
|---|
| 604 | hv_vtom_init(); | 
|---|
| 605 | /* | 
|---|
| 606 | * Setup the hook to get control post apic initialization. | 
|---|
| 607 | */ | 
|---|
| 608 | x86_platform.apic_post_init = hyperv_init; | 
|---|
| 609 | hyperv_setup_mmu_ops(); | 
|---|
| 610 |  | 
|---|
| 611 | /* Install system interrupt handler for hypervisor callback */ | 
|---|
| 612 | sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback); | 
|---|
| 613 |  | 
|---|
| 614 | /* Install system interrupt handler for reenlightenment notifications */ | 
|---|
| 615 | if (ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT) { | 
|---|
| 616 | sysvec_install(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment); | 
|---|
| 617 | } | 
|---|
| 618 |  | 
|---|
| 619 | /* Install system interrupt handler for stimer0 */ | 
|---|
| 620 | if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) { | 
|---|
| 621 | sysvec_install(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0); | 
|---|
| 622 | } | 
|---|
| 623 |  | 
|---|
| 624 | # ifdef CONFIG_SMP | 
|---|
| 625 | smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; | 
|---|
| 626 | if (hv_root_partition() || | 
|---|
| 627 | (!ms_hyperv.paravisor_present && hv_isolation_type_snp())) | 
|---|
| 628 | smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus; | 
|---|
| 629 | # endif | 
|---|
| 630 |  | 
|---|
| 631 | /* | 
|---|
| 632 | * Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic, | 
|---|
| 633 | * set x2apic destination mode to physical mode when x2apic is available | 
|---|
| 634 | * and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs | 
|---|
| 635 | * have 8-bit APIC id. | 
|---|
| 636 | */ | 
|---|
| 637 | # ifdef CONFIG_X86_X2APIC | 
|---|
| 638 | if (x2apic_supported()) | 
|---|
| 639 | x2apic_phys = 1; | 
|---|
| 640 | # endif | 
|---|
| 641 |  | 
|---|
| 642 | /* Register Hyper-V specific clocksource */ | 
|---|
| 643 | hv_init_clocksource(); | 
|---|
| 644 | x86_setup_ops_for_tsc_pg_clock(); | 
|---|
| 645 | hv_vtl_init_platform(); | 
|---|
| 646 | #endif | 
|---|
| 647 | /* | 
|---|
| 648 | * TSC should be marked as unstable only after Hyper-V | 
|---|
| 649 | * clocksource has been initialized. This ensures that the | 
|---|
| 650 | * stability of the sched_clock is not altered. | 
|---|
| 651 | * | 
|---|
| 652 | * HV_ACCESS_TSC_INVARIANT is always zero for the root partition. No | 
|---|
| 653 | * need to check for it. | 
|---|
| 654 | */ | 
|---|
| 655 | if (!hv_root_partition() && | 
|---|
| 656 | !(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) | 
|---|
| 657 | mark_tsc_unstable(reason: "running on Hyper-V"); | 
|---|
| 658 |  | 
|---|
| 659 | hardlockup_detector_disable(); | 
|---|
| 660 | } | 
|---|
| 661 |  | 
|---|
| 662 | static bool __init ms_hyperv_x2apic_available(void) | 
|---|
| 663 | { | 
|---|
| 664 | return x2apic_supported(); | 
|---|
| 665 | } | 
|---|
| 666 |  | 
|---|
| 667 | /* | 
|---|
| 668 | * If ms_hyperv_msi_ext_dest_id() returns true, hyperv_prepare_irq_remapping() | 
|---|
| 669 | * returns -ENODEV and the Hyper-V IOMMU driver is not used; instead, the | 
|---|
| 670 | * generic support of the 15-bit APIC ID is used: see __irq_msi_compose_msg(). | 
|---|
| 671 | * | 
|---|
| 672 | * Note: for a VM on Hyper-V, the I/O-APIC is the only device which | 
|---|
| 673 | * (logically) generates MSIs directly to the system APIC irq domain. | 
|---|
| 674 | * There is no HPET, and PCI MSI/MSI-X interrupts are remapped by the | 
|---|
| 675 | * pci-hyperv host bridge. | 
|---|
| 676 | * | 
|---|
| 677 | * Note: for a Hyper-V root partition, this will always return false. | 
|---|
| 678 | * The hypervisor doesn't expose these HYPERV_CPUID_VIRT_STACK_* cpuids by | 
|---|
| 679 | * default, they are implemented as intercepts by the Windows Hyper-V stack. | 
|---|
| 680 | * Even a nested root partition (L2 root) will not get them because the | 
|---|
| 681 | * nested (L1) hypervisor filters them out. | 
|---|
| 682 | */ | 
|---|
| 683 | static bool __init ms_hyperv_msi_ext_dest_id(void) | 
|---|
| 684 | { | 
|---|
| 685 | u32 eax; | 
|---|
| 686 |  | 
|---|
| 687 | eax = cpuid_eax(HYPERV_CPUID_VIRT_STACK_INTERFACE); | 
|---|
| 688 | if (eax != HYPERV_VS_INTERFACE_EAX_SIGNATURE) | 
|---|
| 689 | return false; | 
|---|
| 690 |  | 
|---|
| 691 | eax = cpuid_eax(HYPERV_CPUID_VIRT_STACK_PROPERTIES); | 
|---|
| 692 | return eax & HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE; | 
|---|
| 693 | } | 
|---|
| 694 |  | 
|---|
| 695 | #ifdef CONFIG_AMD_MEM_ENCRYPT | 
|---|
| 696 | static void hv_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs) | 
|---|
| 697 | { | 
|---|
| 698 | /* RAX and CPL are already in the GHCB */ | 
|---|
| 699 | ghcb_set_rcx(ghcb, regs->cx); | 
|---|
| 700 | ghcb_set_rdx(ghcb, regs->dx); | 
|---|
| 701 | ghcb_set_r8(ghcb, regs->r8); | 
|---|
| 702 | } | 
|---|
| 703 |  | 
|---|
| 704 | static bool hv_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs) | 
|---|
| 705 | { | 
|---|
| 706 | /* No checking of the return state needed */ | 
|---|
| 707 | return true; | 
|---|
| 708 | } | 
|---|
| 709 | #endif | 
|---|
| 710 |  | 
|---|
| 711 | const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { | 
|---|
| 712 | .name			= "Microsoft Hyper-V", | 
|---|
| 713 | .detect			= ms_hyperv_platform, | 
|---|
| 714 | .type			= X86_HYPER_MS_HYPERV, | 
|---|
| 715 | .init.x2apic_available	= ms_hyperv_x2apic_available, | 
|---|
| 716 | .init.msi_ext_dest_id	= ms_hyperv_msi_ext_dest_id, | 
|---|
| 717 | .init.init_platform	= ms_hyperv_init_platform, | 
|---|
| 718 | .init.guest_late_init	= ms_hyperv_late_init, | 
|---|
| 719 | #ifdef CONFIG_AMD_MEM_ENCRYPT | 
|---|
| 720 | .runtime.sev_es_hcall_prepare = hv_sev_es_hcall_prepare, | 
|---|
| 721 | .runtime.sev_es_hcall_finish = hv_sev_es_hcall_finish, | 
|---|
| 722 | #endif | 
|---|
| 723 | }; | 
|---|
| 724 |  | 
|---|