| 1 | // SPDX-License-Identifier: GPL-2.0 | 
|---|
| 2 | /* | 
|---|
| 3 | * Intel Performance and Energy Bias Hint support. | 
|---|
| 4 | * | 
|---|
| 5 | * Copyright (C) 2019 Intel Corporation | 
|---|
| 6 | * | 
|---|
| 7 | * Author: | 
|---|
| 8 | *	Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 
|---|
| 9 | */ | 
|---|
| 10 |  | 
|---|
| 11 | #include <linux/cpuhotplug.h> | 
|---|
| 12 | #include <linux/cpu.h> | 
|---|
| 13 | #include <linux/device.h> | 
|---|
| 14 | #include <linux/kernel.h> | 
|---|
| 15 | #include <linux/string.h> | 
|---|
| 16 | #include <linux/syscore_ops.h> | 
|---|
| 17 | #include <linux/pm.h> | 
|---|
| 18 |  | 
|---|
| 19 | #include <asm/cpu_device_id.h> | 
|---|
| 20 | #include <asm/cpufeature.h> | 
|---|
| 21 | #include <asm/msr.h> | 
|---|
| 22 |  | 
|---|
| 23 | /** | 
|---|
| 24 | * DOC: overview | 
|---|
| 25 | * | 
|---|
| 26 | * The Performance and Energy Bias Hint (EPB) allows software to specify its | 
|---|
| 27 | * preference with respect to the power-performance tradeoffs present in the | 
|---|
| 28 | * processor.  Generally, the EPB is expected to be set by user space (directly | 
|---|
| 29 | * via sysfs or with the help of the x86_energy_perf_policy tool), but there are | 
|---|
| 30 | * two reasons for the kernel to update it. | 
|---|
| 31 | * | 
|---|
| 32 | * First, there are systems where the platform firmware resets the EPB during | 
|---|
| 33 | * system-wide transitions from sleep states back into the working state | 
|---|
| 34 | * effectively causing the previous EPB updates by user space to be lost. | 
|---|
| 35 | * Thus the kernel needs to save the current EPB values for all CPUs during | 
|---|
| 36 | * system-wide transitions to sleep states and restore them on the way back to | 
|---|
| 37 | * the working state.  That can be achieved by saving EPB for secondary CPUs | 
|---|
| 38 | * when they are taken offline during transitions into system sleep states and | 
|---|
| 39 | * for the boot CPU in a syscore suspend operation, so that it can be restored | 
|---|
| 40 | * for the boot CPU in a syscore resume operation and for the other CPUs when | 
|---|
| 41 | * they are brought back online.  However, CPUs that are already offline when | 
|---|
| 42 | * a system-wide PM transition is started are not taken offline again, but their | 
|---|
| 43 | * EPB values may still be reset by the platform firmware during the transition, | 
|---|
| 44 | * so in fact it is necessary to save the EPB of any CPU taken offline and to | 
|---|
| 45 | * restore it when the given CPU goes back online at all times. | 
|---|
| 46 | * | 
|---|
| 47 | * Second, on many systems the initial EPB value coming from the platform | 
|---|
| 48 | * firmware is 0 ('performance') and at least on some of them that is because | 
|---|
| 49 | * the platform firmware does not initialize EPB at all with the assumption that | 
|---|
| 50 | * the OS will do that anyway.  That sometimes is problematic, as it may cause | 
|---|
| 51 | * the system battery to drain too fast, for example, so it is better to adjust | 
|---|
| 52 | * it on CPU bring-up and if the initial EPB value for a given CPU is 0, the | 
|---|
| 53 | * kernel changes it to 6 ('normal'). | 
|---|
| 54 | */ | 
|---|
| 55 |  | 
|---|
| 56 | static DEFINE_PER_CPU(u8, saved_epb); | 
|---|
| 57 |  | 
|---|
| 58 | #define EPB_MASK	0x0fULL | 
|---|
| 59 | #define EPB_SAVED	0x10ULL | 
|---|
| 60 | #define MAX_EPB		EPB_MASK | 
|---|
| 61 |  | 
|---|
| 62 | enum energy_perf_value_index { | 
|---|
| 63 | EPB_INDEX_PERFORMANCE, | 
|---|
| 64 | EPB_INDEX_BALANCE_PERFORMANCE, | 
|---|
| 65 | EPB_INDEX_NORMAL, | 
|---|
| 66 | EPB_INDEX_BALANCE_POWERSAVE, | 
|---|
| 67 | EPB_INDEX_POWERSAVE, | 
|---|
| 68 | }; | 
|---|
| 69 |  | 
|---|
| 70 | static u8 energ_perf_values[] = { | 
|---|
| 71 | [EPB_INDEX_PERFORMANCE] = ENERGY_PERF_BIAS_PERFORMANCE, | 
|---|
| 72 | [EPB_INDEX_BALANCE_PERFORMANCE] = ENERGY_PERF_BIAS_BALANCE_PERFORMANCE, | 
|---|
| 73 | [EPB_INDEX_NORMAL] = ENERGY_PERF_BIAS_NORMAL, | 
|---|
| 74 | [EPB_INDEX_BALANCE_POWERSAVE] = ENERGY_PERF_BIAS_BALANCE_POWERSAVE, | 
|---|
| 75 | [EPB_INDEX_POWERSAVE] = ENERGY_PERF_BIAS_POWERSAVE, | 
|---|
| 76 | }; | 
|---|
| 77 |  | 
|---|
| 78 | static int intel_epb_save(void) | 
|---|
| 79 | { | 
|---|
| 80 | u64 epb; | 
|---|
| 81 |  | 
|---|
| 82 | rdmsrq(MSR_IA32_ENERGY_PERF_BIAS, epb); | 
|---|
| 83 | /* | 
|---|
| 84 | * Ensure that saved_epb will always be nonzero after this write even if | 
|---|
| 85 | * the EPB value read from the MSR is 0. | 
|---|
| 86 | */ | 
|---|
| 87 | this_cpu_write(saved_epb, (epb & EPB_MASK) | EPB_SAVED); | 
|---|
| 88 |  | 
|---|
| 89 | return 0; | 
|---|
| 90 | } | 
|---|
| 91 |  | 
|---|
| 92 | static void intel_epb_restore(void) | 
|---|
| 93 | { | 
|---|
| 94 | u64 val = this_cpu_read(saved_epb); | 
|---|
| 95 | u64 epb; | 
|---|
| 96 |  | 
|---|
| 97 | rdmsrq(MSR_IA32_ENERGY_PERF_BIAS, epb); | 
|---|
| 98 | if (val) { | 
|---|
| 99 | val &= EPB_MASK; | 
|---|
| 100 | } else { | 
|---|
| 101 | /* | 
|---|
| 102 | * Because intel_epb_save() has not run for the current CPU yet, | 
|---|
| 103 | * it is going online for the first time, so if its EPB value is | 
|---|
| 104 | * 0 ('performance') at this point, assume that it has not been | 
|---|
| 105 | * initialized by the platform firmware and set it to 6 | 
|---|
| 106 | * ('normal'). | 
|---|
| 107 | */ | 
|---|
| 108 | val = epb & EPB_MASK; | 
|---|
| 109 | if (val == ENERGY_PERF_BIAS_PERFORMANCE) { | 
|---|
| 110 | val = energ_perf_values[EPB_INDEX_NORMAL]; | 
|---|
| 111 | pr_warn_once( "ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); | 
|---|
| 112 | } | 
|---|
| 113 | } | 
|---|
| 114 | wrmsrq(MSR_IA32_ENERGY_PERF_BIAS, val: (epb & ~EPB_MASK) | val); | 
|---|
| 115 | } | 
|---|
| 116 |  | 
|---|
| 117 | static struct syscore_ops intel_epb_syscore_ops = { | 
|---|
| 118 | .suspend = intel_epb_save, | 
|---|
| 119 | .resume = intel_epb_restore, | 
|---|
| 120 | }; | 
|---|
| 121 |  | 
|---|
| 122 | static const char * const energy_perf_strings[] = { | 
|---|
| 123 | [EPB_INDEX_PERFORMANCE] = "performance", | 
|---|
| 124 | [EPB_INDEX_BALANCE_PERFORMANCE] = "balance-performance", | 
|---|
| 125 | [EPB_INDEX_NORMAL] = "normal", | 
|---|
| 126 | [EPB_INDEX_BALANCE_POWERSAVE] = "balance-power", | 
|---|
| 127 | [EPB_INDEX_POWERSAVE] = "power", | 
|---|
| 128 | }; | 
|---|
| 129 |  | 
|---|
| 130 | static ssize_t energy_perf_bias_show(struct device *dev, | 
|---|
| 131 | struct device_attribute *attr, | 
|---|
| 132 | char *buf) | 
|---|
| 133 | { | 
|---|
| 134 | unsigned int cpu = dev->id; | 
|---|
| 135 | u64 epb; | 
|---|
| 136 | int ret; | 
|---|
| 137 |  | 
|---|
| 138 | ret = rdmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, q: &epb); | 
|---|
| 139 | if (ret < 0) | 
|---|
| 140 | return ret; | 
|---|
| 141 |  | 
|---|
| 142 | return sprintf(buf, fmt: "%llu\n", epb); | 
|---|
| 143 | } | 
|---|
| 144 |  | 
|---|
| 145 | static ssize_t energy_perf_bias_store(struct device *dev, | 
|---|
| 146 | struct device_attribute *attr, | 
|---|
| 147 | const char *buf, size_t count) | 
|---|
| 148 | { | 
|---|
| 149 | unsigned int cpu = dev->id; | 
|---|
| 150 | u64 epb, val; | 
|---|
| 151 | int ret; | 
|---|
| 152 |  | 
|---|
| 153 | ret = __sysfs_match_string(array: energy_perf_strings, | 
|---|
| 154 | ARRAY_SIZE(energy_perf_strings), s: buf); | 
|---|
| 155 | if (ret >= 0) | 
|---|
| 156 | val = energ_perf_values[ret]; | 
|---|
| 157 | else if (kstrtou64(s: buf, base: 0, res: &val) || val > MAX_EPB) | 
|---|
| 158 | return -EINVAL; | 
|---|
| 159 |  | 
|---|
| 160 | ret = rdmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, q: &epb); | 
|---|
| 161 | if (ret < 0) | 
|---|
| 162 | return ret; | 
|---|
| 163 |  | 
|---|
| 164 | ret = wrmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, | 
|---|
| 165 | q: (epb & ~EPB_MASK) | val); | 
|---|
| 166 | if (ret < 0) | 
|---|
| 167 | return ret; | 
|---|
| 168 |  | 
|---|
| 169 | return count; | 
|---|
| 170 | } | 
|---|
| 171 |  | 
|---|
| 172 | static DEVICE_ATTR_RW(energy_perf_bias); | 
|---|
| 173 |  | 
|---|
| 174 | static struct attribute *intel_epb_attrs[] = { | 
|---|
| 175 | &dev_attr_energy_perf_bias.attr, | 
|---|
| 176 | NULL | 
|---|
| 177 | }; | 
|---|
| 178 |  | 
|---|
| 179 | static const struct attribute_group intel_epb_attr_group = { | 
|---|
| 180 | .name = power_group_name, | 
|---|
| 181 | .attrs =  intel_epb_attrs | 
|---|
| 182 | }; | 
|---|
| 183 |  | 
|---|
| 184 | static int intel_epb_online(unsigned int cpu) | 
|---|
| 185 | { | 
|---|
| 186 | struct device *cpu_dev = get_cpu_device(cpu); | 
|---|
| 187 |  | 
|---|
| 188 | intel_epb_restore(); | 
|---|
| 189 | if (!cpuhp_tasks_frozen) | 
|---|
| 190 | sysfs_merge_group(kobj: &cpu_dev->kobj, grp: &intel_epb_attr_group); | 
|---|
| 191 |  | 
|---|
| 192 | return 0; | 
|---|
| 193 | } | 
|---|
| 194 |  | 
|---|
| 195 | static int intel_epb_offline(unsigned int cpu) | 
|---|
| 196 | { | 
|---|
| 197 | struct device *cpu_dev = get_cpu_device(cpu); | 
|---|
| 198 |  | 
|---|
| 199 | if (!cpuhp_tasks_frozen) | 
|---|
| 200 | sysfs_unmerge_group(kobj: &cpu_dev->kobj, grp: &intel_epb_attr_group); | 
|---|
| 201 |  | 
|---|
| 202 | intel_epb_save(); | 
|---|
| 203 | return 0; | 
|---|
| 204 | } | 
|---|
| 205 |  | 
|---|
| 206 | static const struct x86_cpu_id intel_epb_normal[] = { | 
|---|
| 207 | X86_MATCH_VFM(INTEL_ALDERLAKE_L, | 
|---|
| 208 | ENERGY_PERF_BIAS_NORMAL_POWERSAVE), | 
|---|
| 209 | X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, | 
|---|
| 210 | ENERGY_PERF_BIAS_NORMAL_POWERSAVE), | 
|---|
| 211 | X86_MATCH_VFM(INTEL_RAPTORLAKE_P, | 
|---|
| 212 | ENERGY_PERF_BIAS_NORMAL_POWERSAVE), | 
|---|
| 213 | {} | 
|---|
| 214 | }; | 
|---|
| 215 |  | 
|---|
| 216 | static __init int intel_epb_init(void) | 
|---|
| 217 | { | 
|---|
| 218 | const struct x86_cpu_id *id = x86_match_cpu(match: intel_epb_normal); | 
|---|
| 219 | int ret; | 
|---|
| 220 |  | 
|---|
| 221 | if (!boot_cpu_has(X86_FEATURE_EPB)) | 
|---|
| 222 | return -ENODEV; | 
|---|
| 223 |  | 
|---|
| 224 | if (id) | 
|---|
| 225 | energ_perf_values[EPB_INDEX_NORMAL] = id->driver_data; | 
|---|
| 226 |  | 
|---|
| 227 | ret = cpuhp_setup_state(state: CPUHP_AP_X86_INTEL_EPB_ONLINE, | 
|---|
| 228 | name: "x86/intel/epb:online", startup: intel_epb_online, | 
|---|
| 229 | teardown: intel_epb_offline); | 
|---|
| 230 | if (ret < 0) | 
|---|
| 231 | goto err_out_online; | 
|---|
| 232 |  | 
|---|
| 233 | register_syscore_ops(ops: &intel_epb_syscore_ops); | 
|---|
| 234 | return 0; | 
|---|
| 235 |  | 
|---|
| 236 | err_out_online: | 
|---|
| 237 | cpuhp_remove_state(state: CPUHP_AP_X86_INTEL_EPB_ONLINE); | 
|---|
| 238 | return ret; | 
|---|
| 239 | } | 
|---|
| 240 | late_initcall(intel_epb_init); | 
|---|
| 241 |  | 
|---|