| 1 | // SPDX-License-Identifier: GPL-2.0 | 
|---|
| 2 | /* | 
|---|
| 3 | * Common corrected MCE threshold handler code: | 
|---|
| 4 | */ | 
|---|
| 5 | #include <linux/interrupt.h> | 
|---|
| 6 | #include <linux/kernel.h> | 
|---|
| 7 |  | 
|---|
| 8 | #include <asm/irq_vectors.h> | 
|---|
| 9 | #include <asm/traps.h> | 
|---|
| 10 | #include <asm/apic.h> | 
|---|
| 11 | #include <asm/mce.h> | 
|---|
| 12 | #include <asm/trace/irq_vectors.h> | 
|---|
| 13 |  | 
|---|
| 14 | #include "internal.h" | 
|---|
| 15 |  | 
|---|
| 16 | static void default_threshold_interrupt(void) | 
|---|
| 17 | { | 
|---|
| 18 | pr_err( "Unexpected threshold interrupt at vector %x\n", | 
|---|
| 19 | THRESHOLD_APIC_VECTOR); | 
|---|
| 20 | } | 
|---|
| 21 |  | 
|---|
| 22 | void (*mce_threshold_vector)(void) = default_threshold_interrupt; | 
|---|
| 23 |  | 
|---|
| 24 | DEFINE_IDTENTRY_SYSVEC(sysvec_threshold) | 
|---|
| 25 | { | 
|---|
| 26 | trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR); | 
|---|
| 27 | inc_irq_stat(irq_threshold_count); | 
|---|
| 28 | mce_threshold_vector(); | 
|---|
| 29 | trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR); | 
|---|
| 30 | apic_eoi(); | 
|---|
| 31 | } | 
|---|
| 32 |  | 
|---|
| 33 | DEFINE_PER_CPU(struct mca_storm_desc, storm_desc); | 
|---|
| 34 |  | 
|---|
| 35 | void mce_inherit_storm(unsigned int bank) | 
|---|
| 36 | { | 
|---|
| 37 | struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); | 
|---|
| 38 |  | 
|---|
| 39 | /* | 
|---|
| 40 | * Previous CPU owning this bank had put it into storm mode, | 
|---|
| 41 | * but the precise history of that storm is unknown. Assume | 
|---|
| 42 | * the worst (all recent polls of the bank found a valid error | 
|---|
| 43 | * logged). This will avoid the new owner prematurely declaring | 
|---|
| 44 | * the storm has ended. | 
|---|
| 45 | */ | 
|---|
| 46 | storm->banks[bank].history = ~0ull; | 
|---|
| 47 | storm->banks[bank].timestamp = jiffies; | 
|---|
| 48 | } | 
|---|
| 49 |  | 
|---|
| 50 | bool mce_get_storm_mode(void) | 
|---|
| 51 | { | 
|---|
| 52 | return __this_cpu_read(storm_desc.poll_mode); | 
|---|
| 53 | } | 
|---|
| 54 |  | 
|---|
| 55 | void mce_set_storm_mode(bool storm) | 
|---|
| 56 | { | 
|---|
| 57 | __this_cpu_write(storm_desc.poll_mode, storm); | 
|---|
| 58 | } | 
|---|
| 59 |  | 
|---|
| 60 | static void mce_handle_storm(unsigned int bank, bool on) | 
|---|
| 61 | { | 
|---|
| 62 | switch (boot_cpu_data.x86_vendor) { | 
|---|
| 63 | case X86_VENDOR_INTEL: | 
|---|
| 64 | mce_intel_handle_storm(bank, on); | 
|---|
| 65 | break; | 
|---|
| 66 | } | 
|---|
| 67 | } | 
|---|
| 68 |  | 
|---|
| 69 | void cmci_storm_begin(unsigned int bank) | 
|---|
| 70 | { | 
|---|
| 71 | struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); | 
|---|
| 72 |  | 
|---|
| 73 | __set_bit(bank, this_cpu_ptr(mce_poll_banks)); | 
|---|
| 74 | storm->banks[bank].in_storm_mode = true; | 
|---|
| 75 |  | 
|---|
| 76 | /* | 
|---|
| 77 | * If this is the first bank on this CPU to enter storm mode | 
|---|
| 78 | * start polling. | 
|---|
| 79 | */ | 
|---|
| 80 | if (++storm->stormy_bank_count == 1) | 
|---|
| 81 | mce_timer_kick(storm: true); | 
|---|
| 82 | } | 
|---|
| 83 |  | 
|---|
| 84 | void cmci_storm_end(unsigned int bank) | 
|---|
| 85 | { | 
|---|
| 86 | struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); | 
|---|
| 87 |  | 
|---|
| 88 | __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); | 
|---|
| 89 | storm->banks[bank].history = 0; | 
|---|
| 90 | storm->banks[bank].in_storm_mode = false; | 
|---|
| 91 |  | 
|---|
| 92 | /* If no banks left in storm mode, stop polling. */ | 
|---|
| 93 | if (!--storm->stormy_bank_count) | 
|---|
| 94 | mce_timer_kick(storm: false); | 
|---|
| 95 | } | 
|---|
| 96 |  | 
|---|
| 97 | void mce_track_storm(struct mce *mce) | 
|---|
| 98 | { | 
|---|
| 99 | struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); | 
|---|
| 100 | unsigned long now = jiffies, delta; | 
|---|
| 101 | unsigned int shift = 1; | 
|---|
| 102 | u64 history = 0; | 
|---|
| 103 |  | 
|---|
| 104 | /* No tracking needed for banks that do not support CMCI */ | 
|---|
| 105 | if (storm->banks[mce->bank].poll_only) | 
|---|
| 106 | return; | 
|---|
| 107 |  | 
|---|
| 108 | /* | 
|---|
| 109 | * When a bank is in storm mode it is polled once per second and | 
|---|
| 110 | * the history mask will record about the last minute of poll results. | 
|---|
| 111 | * If it is not in storm mode, then the bank is only checked when | 
|---|
| 112 | * there is a CMCI interrupt. Check how long it has been since | 
|---|
| 113 | * this bank was last checked, and adjust the amount of "shift" | 
|---|
| 114 | * to apply to history. | 
|---|
| 115 | */ | 
|---|
| 116 | if (!storm->banks[mce->bank].in_storm_mode) { | 
|---|
| 117 | delta = now - storm->banks[mce->bank].timestamp; | 
|---|
| 118 | shift = (delta + HZ) / HZ; | 
|---|
| 119 | } | 
|---|
| 120 |  | 
|---|
| 121 | /* If it has been a long time since the last poll, clear history. */ | 
|---|
| 122 | if (shift < NUM_HISTORY_BITS) | 
|---|
| 123 | history = storm->banks[mce->bank].history << shift; | 
|---|
| 124 |  | 
|---|
| 125 | storm->banks[mce->bank].timestamp = now; | 
|---|
| 126 |  | 
|---|
| 127 | /* History keeps track of corrected errors. VAL=1 && UC=0 */ | 
|---|
| 128 | if ((mce->status & MCI_STATUS_VAL) && mce_is_correctable(m: mce)) | 
|---|
| 129 | history |= 1; | 
|---|
| 130 |  | 
|---|
| 131 | storm->banks[mce->bank].history = history; | 
|---|
| 132 |  | 
|---|
| 133 | if (storm->banks[mce->bank].in_storm_mode) { | 
|---|
| 134 | if (history & GENMASK_ULL(STORM_END_POLL_THRESHOLD, 0)) | 
|---|
| 135 | return; | 
|---|
| 136 | printk_deferred(KERN_NOTICE "CPU%d BANK%d CMCI storm subsided\n", smp_processor_id(), mce->bank); | 
|---|
| 137 | mce_handle_storm(bank: mce->bank, on: false); | 
|---|
| 138 | cmci_storm_end(bank: mce->bank); | 
|---|
| 139 | } else { | 
|---|
| 140 | if (hweight64(history) < STORM_BEGIN_THRESHOLD) | 
|---|
| 141 | return; | 
|---|
| 142 | printk_deferred(KERN_NOTICE "CPU%d BANK%d CMCI storm detected\n", smp_processor_id(), mce->bank); | 
|---|
| 143 | mce_handle_storm(bank: mce->bank, on: true); | 
|---|
| 144 | cmci_storm_begin(bank: mce->bank); | 
|---|
| 145 | } | 
|---|
| 146 | } | 
|---|
| 147 |  | 
|---|