| 1 | // SPDX-License-Identifier: GPL-2.0-only | 
|---|
| 2 | /* | 
|---|
| 3 | * x86_pkg_temp_thermal driver | 
|---|
| 4 | * Copyright (c) 2013, Intel Corporation. | 
|---|
| 5 | */ | 
|---|
| 6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 
|---|
| 7 |  | 
|---|
| 8 | #include <linux/module.h> | 
|---|
| 9 | #include <linux/init.h> | 
|---|
| 10 | #include <linux/intel_tcc.h> | 
|---|
| 11 | #include <linux/err.h> | 
|---|
| 12 | #include <linux/param.h> | 
|---|
| 13 | #include <linux/device.h> | 
|---|
| 14 | #include <linux/platform_device.h> | 
|---|
| 15 | #include <linux/cpu.h> | 
|---|
| 16 | #include <linux/smp.h> | 
|---|
| 17 | #include <linux/slab.h> | 
|---|
| 18 | #include <linux/pm.h> | 
|---|
| 19 | #include <linux/thermal.h> | 
|---|
| 20 | #include <linux/debugfs.h> | 
|---|
| 21 |  | 
|---|
| 22 | #include <asm/cpu_device_id.h> | 
|---|
| 23 | #include <asm/msr.h> | 
|---|
| 24 |  | 
|---|
| 25 | #include "thermal_interrupt.h" | 
|---|
| 26 |  | 
|---|
| 27 | /* | 
|---|
| 28 | * Rate control delay: Idea is to introduce denounce effect | 
|---|
| 29 | * This should be long enough to avoid reduce events, when | 
|---|
| 30 | * threshold is set to a temperature, which is constantly | 
|---|
| 31 | * violated, but at the short enough to take any action. | 
|---|
| 32 | * The action can be remove threshold or change it to next | 
|---|
| 33 | * interesting setting. Based on experiments, in around | 
|---|
| 34 | * every 5 seconds under load will give us a significant | 
|---|
| 35 | * temperature change. | 
|---|
| 36 | */ | 
|---|
| 37 | #define PKG_TEMP_THERMAL_NOTIFY_DELAY	5000 | 
|---|
| 38 | static int notify_delay_ms = PKG_TEMP_THERMAL_NOTIFY_DELAY; | 
|---|
| 39 | module_param(notify_delay_ms, int, 0644); | 
|---|
| 40 | MODULE_PARM_DESC(notify_delay_ms, | 
|---|
| 41 | "User space notification delay in milli seconds."); | 
|---|
| 42 |  | 
|---|
| 43 | /* Number of trip points in thermal zone. Currently it can't | 
|---|
| 44 | * be more than 2. MSR can allow setting and getting notifications | 
|---|
| 45 | * for only 2 thresholds. This define enforces this, if there | 
|---|
| 46 | * is some wrong values returned by cpuid for number of thresholds. | 
|---|
| 47 | */ | 
|---|
| 48 | #define MAX_NUMBER_OF_TRIPS	2 | 
|---|
| 49 |  | 
|---|
| 50 | struct zone_device { | 
|---|
| 51 | int				cpu; | 
|---|
| 52 | bool				work_scheduled; | 
|---|
| 53 | u32				msr_pkg_therm_low; | 
|---|
| 54 | u32				msr_pkg_therm_high; | 
|---|
| 55 | struct delayed_work		work; | 
|---|
| 56 | struct thermal_zone_device	*tzone; | 
|---|
| 57 | struct cpumask			cpumask; | 
|---|
| 58 | }; | 
|---|
| 59 |  | 
|---|
| 60 | static struct thermal_zone_params pkg_temp_tz_params = { | 
|---|
| 61 | .no_hwmon	= true, | 
|---|
| 62 | }; | 
|---|
| 63 |  | 
|---|
| 64 | /* Keep track of how many zone pointers we allocated in init() */ | 
|---|
| 65 | static int max_id __read_mostly; | 
|---|
| 66 | /* Array of zone pointers */ | 
|---|
| 67 | static struct zone_device **zones; | 
|---|
| 68 | /* Serializes interrupt notification, work and hotplug */ | 
|---|
| 69 | static DEFINE_RAW_SPINLOCK(pkg_temp_lock); | 
|---|
| 70 | /* Protects zone operation in the work function against hotplug removal */ | 
|---|
| 71 | static DEFINE_MUTEX(thermal_zone_mutex); | 
|---|
| 72 |  | 
|---|
| 73 | /* The dynamically assigned cpu hotplug state for module_exit() */ | 
|---|
| 74 | static enum cpuhp_state pkg_thermal_hp_state __read_mostly; | 
|---|
| 75 |  | 
|---|
| 76 | /* Debug counters to show using debugfs */ | 
|---|
| 77 | static struct dentry *debugfs; | 
|---|
| 78 | static unsigned int pkg_interrupt_cnt; | 
|---|
| 79 | static unsigned int pkg_work_cnt; | 
|---|
| 80 |  | 
|---|
| 81 | static void pkg_temp_debugfs_init(void) | 
|---|
| 82 | { | 
|---|
| 83 | debugfs = debugfs_create_dir(name: "pkg_temp_thermal", NULL); | 
|---|
| 84 |  | 
|---|
| 85 | debugfs_create_u32(name: "pkg_thres_interrupt", S_IRUGO, parent: debugfs, | 
|---|
| 86 | value: &pkg_interrupt_cnt); | 
|---|
| 87 | debugfs_create_u32(name: "pkg_thres_work", S_IRUGO, parent: debugfs, | 
|---|
| 88 | value: &pkg_work_cnt); | 
|---|
| 89 | } | 
|---|
| 90 |  | 
|---|
| 91 | /* | 
|---|
| 92 | * Protection: | 
|---|
| 93 | * | 
|---|
| 94 | * - cpu hotplug: Read serialized by cpu hotplug lock | 
|---|
| 95 | *		  Write must hold pkg_temp_lock | 
|---|
| 96 | * | 
|---|
| 97 | * - Other callsites: Must hold pkg_temp_lock | 
|---|
| 98 | */ | 
|---|
| 99 | static struct zone_device *pkg_temp_thermal_get_dev(unsigned int cpu) | 
|---|
| 100 | { | 
|---|
| 101 | int id = topology_logical_die_id(cpu); | 
|---|
| 102 |  | 
|---|
| 103 | if (id >= 0 && id < max_id) | 
|---|
| 104 | return zones[id]; | 
|---|
| 105 | return NULL; | 
|---|
| 106 | } | 
|---|
| 107 |  | 
|---|
| 108 | static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp) | 
|---|
| 109 | { | 
|---|
| 110 | struct zone_device *zonedev = thermal_zone_device_priv(tzd); | 
|---|
| 111 | int val, ret; | 
|---|
| 112 |  | 
|---|
| 113 | ret = intel_tcc_get_temp(cpu: zonedev->cpu, temp: &val, pkg: true); | 
|---|
| 114 | if (ret < 0) | 
|---|
| 115 | return ret; | 
|---|
| 116 |  | 
|---|
| 117 | *temp = val * 1000; | 
|---|
| 118 | pr_debug( "sys_get_curr_temp %d\n", *temp); | 
|---|
| 119 | return 0; | 
|---|
| 120 | } | 
|---|
| 121 |  | 
|---|
| 122 | static int | 
|---|
| 123 | sys_set_trip_temp(struct thermal_zone_device *tzd, | 
|---|
| 124 | const struct thermal_trip *trip, int temp) | 
|---|
| 125 | { | 
|---|
| 126 | struct zone_device *zonedev = thermal_zone_device_priv(tzd); | 
|---|
| 127 | unsigned int trip_index = THERMAL_TRIP_PRIV_TO_INT(trip->priv); | 
|---|
| 128 | u32 l, h, mask, shift, intr; | 
|---|
| 129 | int tj_max, val, ret; | 
|---|
| 130 |  | 
|---|
| 131 | tj_max = intel_tcc_get_tjmax(cpu: zonedev->cpu); | 
|---|
| 132 | if (tj_max < 0) | 
|---|
| 133 | return tj_max; | 
|---|
| 134 | tj_max *= 1000; | 
|---|
| 135 |  | 
|---|
| 136 | val = (tj_max - temp)/1000; | 
|---|
| 137 |  | 
|---|
| 138 | if (trip_index >= MAX_NUMBER_OF_TRIPS || val < 0 || val > 0x7f) | 
|---|
| 139 | return -EINVAL; | 
|---|
| 140 |  | 
|---|
| 141 | ret = rdmsr_on_cpu(cpu: zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, | 
|---|
| 142 | l: &l, h: &h); | 
|---|
| 143 | if (ret < 0) | 
|---|
| 144 | return ret; | 
|---|
| 145 |  | 
|---|
| 146 | if (trip_index) { | 
|---|
| 147 | mask = THERM_MASK_THRESHOLD1; | 
|---|
| 148 | shift = THERM_SHIFT_THRESHOLD1; | 
|---|
| 149 | intr = THERM_INT_THRESHOLD1_ENABLE; | 
|---|
| 150 | } else { | 
|---|
| 151 | mask = THERM_MASK_THRESHOLD0; | 
|---|
| 152 | shift = THERM_SHIFT_THRESHOLD0; | 
|---|
| 153 | intr = THERM_INT_THRESHOLD0_ENABLE; | 
|---|
| 154 | } | 
|---|
| 155 | l &= ~mask; | 
|---|
| 156 | /* | 
|---|
| 157 | * When users space sets a trip temperature == 0, which is indication | 
|---|
| 158 | * that, it is no longer interested in receiving notifications. | 
|---|
| 159 | */ | 
|---|
| 160 | if (!temp) { | 
|---|
| 161 | l &= ~intr; | 
|---|
| 162 | } else { | 
|---|
| 163 | l |= val << shift; | 
|---|
| 164 | l |= intr; | 
|---|
| 165 | } | 
|---|
| 166 |  | 
|---|
| 167 | return wrmsr_on_cpu(cpu: zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, | 
|---|
| 168 | l, h); | 
|---|
| 169 | } | 
|---|
| 170 |  | 
|---|
| 171 | /* Thermal zone callback registry */ | 
|---|
| 172 | static const struct thermal_zone_device_ops tzone_ops = { | 
|---|
| 173 | .get_temp = sys_get_curr_temp, | 
|---|
| 174 | .set_trip_temp = sys_set_trip_temp, | 
|---|
| 175 | }; | 
|---|
| 176 |  | 
|---|
| 177 | static bool pkg_thermal_rate_control(void) | 
|---|
| 178 | { | 
|---|
| 179 | return true; | 
|---|
| 180 | } | 
|---|
| 181 |  | 
|---|
| 182 | /* Enable threshold interrupt on local package/cpu */ | 
|---|
| 183 | static inline void enable_pkg_thres_interrupt(void) | 
|---|
| 184 | { | 
|---|
| 185 | u8 thres_0, thres_1; | 
|---|
| 186 | u32 l, h; | 
|---|
| 187 |  | 
|---|
| 188 | rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | 
|---|
| 189 | /* only enable/disable if it had valid threshold value */ | 
|---|
| 190 | thres_0 = (l & THERM_MASK_THRESHOLD0) >> THERM_SHIFT_THRESHOLD0; | 
|---|
| 191 | thres_1 = (l & THERM_MASK_THRESHOLD1) >> THERM_SHIFT_THRESHOLD1; | 
|---|
| 192 | if (thres_0) | 
|---|
| 193 | l |= THERM_INT_THRESHOLD0_ENABLE; | 
|---|
| 194 | if (thres_1) | 
|---|
| 195 | l |= THERM_INT_THRESHOLD1_ENABLE; | 
|---|
| 196 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, low: l, high: h); | 
|---|
| 197 | } | 
|---|
| 198 |  | 
|---|
| 199 | /* Disable threshold interrupt on local package/cpu */ | 
|---|
| 200 | static inline void disable_pkg_thres_interrupt(void) | 
|---|
| 201 | { | 
|---|
| 202 | u32 l, h; | 
|---|
| 203 |  | 
|---|
| 204 | rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | 
|---|
| 205 |  | 
|---|
| 206 | l &= ~(THERM_INT_THRESHOLD0_ENABLE | THERM_INT_THRESHOLD1_ENABLE); | 
|---|
| 207 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, low: l, high: h); | 
|---|
| 208 | } | 
|---|
| 209 |  | 
|---|
| 210 | static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) | 
|---|
| 211 | { | 
|---|
| 212 | struct thermal_zone_device *tzone = NULL; | 
|---|
| 213 | int cpu = smp_processor_id(); | 
|---|
| 214 | struct zone_device *zonedev; | 
|---|
| 215 |  | 
|---|
| 216 | mutex_lock(lock: &thermal_zone_mutex); | 
|---|
| 217 | raw_spin_lock_irq(&pkg_temp_lock); | 
|---|
| 218 | ++pkg_work_cnt; | 
|---|
| 219 |  | 
|---|
| 220 | zonedev = pkg_temp_thermal_get_dev(cpu); | 
|---|
| 221 | if (!zonedev) { | 
|---|
| 222 | raw_spin_unlock_irq(&pkg_temp_lock); | 
|---|
| 223 | mutex_unlock(lock: &thermal_zone_mutex); | 
|---|
| 224 | return; | 
|---|
| 225 | } | 
|---|
| 226 | zonedev->work_scheduled = false; | 
|---|
| 227 |  | 
|---|
| 228 | thermal_clear_package_intr_status(PACKAGE_LEVEL, THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1); | 
|---|
| 229 | tzone = zonedev->tzone; | 
|---|
| 230 |  | 
|---|
| 231 | enable_pkg_thres_interrupt(); | 
|---|
| 232 | raw_spin_unlock_irq(&pkg_temp_lock); | 
|---|
| 233 |  | 
|---|
| 234 | /* | 
|---|
| 235 | * If tzone is not NULL, then thermal_zone_mutex will prevent the | 
|---|
| 236 | * concurrent removal in the cpu offline callback. | 
|---|
| 237 | */ | 
|---|
| 238 | if (tzone) | 
|---|
| 239 | thermal_zone_device_update(tzone, THERMAL_EVENT_UNSPECIFIED); | 
|---|
| 240 |  | 
|---|
| 241 | mutex_unlock(lock: &thermal_zone_mutex); | 
|---|
| 242 | } | 
|---|
| 243 |  | 
|---|
| 244 | static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work) | 
|---|
| 245 | { | 
|---|
| 246 | unsigned long ms = msecs_to_jiffies(m: notify_delay_ms); | 
|---|
| 247 |  | 
|---|
| 248 | schedule_delayed_work_on(cpu, dwork: work, delay: ms); | 
|---|
| 249 | } | 
|---|
| 250 |  | 
|---|
| 251 | static int pkg_thermal_notify(u64 msr_val) | 
|---|
| 252 | { | 
|---|
| 253 | int cpu = smp_processor_id(); | 
|---|
| 254 | struct zone_device *zonedev; | 
|---|
| 255 | unsigned long flags; | 
|---|
| 256 |  | 
|---|
| 257 | raw_spin_lock_irqsave(&pkg_temp_lock, flags); | 
|---|
| 258 | ++pkg_interrupt_cnt; | 
|---|
| 259 |  | 
|---|
| 260 | disable_pkg_thres_interrupt(); | 
|---|
| 261 |  | 
|---|
| 262 | /* Work is per package, so scheduling it once is enough. */ | 
|---|
| 263 | zonedev = pkg_temp_thermal_get_dev(cpu); | 
|---|
| 264 | if (zonedev && !zonedev->work_scheduled) { | 
|---|
| 265 | zonedev->work_scheduled = true; | 
|---|
| 266 | pkg_thermal_schedule_work(cpu: zonedev->cpu, work: &zonedev->work); | 
|---|
| 267 | } | 
|---|
| 268 |  | 
|---|
| 269 | raw_spin_unlock_irqrestore(&pkg_temp_lock, flags); | 
|---|
| 270 | return 0; | 
|---|
| 271 | } | 
|---|
| 272 |  | 
|---|
| 273 | static int pkg_temp_thermal_trips_init(int cpu, int tj_max, | 
|---|
| 274 | struct thermal_trip *trips, int num_trips) | 
|---|
| 275 | { | 
|---|
| 276 | unsigned long thres_reg_value; | 
|---|
| 277 | u32 mask, shift, eax, edx; | 
|---|
| 278 | int ret, i; | 
|---|
| 279 |  | 
|---|
| 280 | for (i = 0; i < num_trips; i++) { | 
|---|
| 281 |  | 
|---|
| 282 | if (i) { | 
|---|
| 283 | mask = THERM_MASK_THRESHOLD1; | 
|---|
| 284 | shift = THERM_SHIFT_THRESHOLD1; | 
|---|
| 285 | } else { | 
|---|
| 286 | mask = THERM_MASK_THRESHOLD0; | 
|---|
| 287 | shift = THERM_SHIFT_THRESHOLD0; | 
|---|
| 288 | } | 
|---|
| 289 |  | 
|---|
| 290 | ret = rdmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, | 
|---|
| 291 | l: &eax, h: &edx); | 
|---|
| 292 | if (ret < 0) | 
|---|
| 293 | return ret; | 
|---|
| 294 |  | 
|---|
| 295 | thres_reg_value = (eax & mask) >> shift; | 
|---|
| 296 |  | 
|---|
| 297 | trips[i].temperature = thres_reg_value ? | 
|---|
| 298 | tj_max - thres_reg_value * 1000 : THERMAL_TEMP_INVALID; | 
|---|
| 299 |  | 
|---|
| 300 | trips[i].type = THERMAL_TRIP_PASSIVE; | 
|---|
| 301 | trips[i].flags |= THERMAL_TRIP_FLAG_RW_TEMP; | 
|---|
| 302 | trips[i].priv = THERMAL_INT_TO_TRIP_PRIV(i); | 
|---|
| 303 |  | 
|---|
| 304 | pr_debug( "%s: cpu=%d, trip=%d, temp=%d\n", | 
|---|
| 305 | __func__, cpu, i, trips[i].temperature); | 
|---|
| 306 | } | 
|---|
| 307 |  | 
|---|
| 308 | return 0; | 
|---|
| 309 | } | 
|---|
| 310 |  | 
|---|
| 311 | static int pkg_temp_thermal_device_add(unsigned int cpu) | 
|---|
| 312 | { | 
|---|
| 313 | struct thermal_trip trips[MAX_NUMBER_OF_TRIPS] = { 0 }; | 
|---|
| 314 | int id = topology_logical_die_id(cpu); | 
|---|
| 315 | u32 eax, ebx, ecx, edx; | 
|---|
| 316 | struct zone_device *zonedev; | 
|---|
| 317 | int thres_count, err; | 
|---|
| 318 | int tj_max; | 
|---|
| 319 |  | 
|---|
| 320 | if (id >= max_id) | 
|---|
| 321 | return -ENOMEM; | 
|---|
| 322 |  | 
|---|
| 323 | cpuid(op: 6, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx); | 
|---|
| 324 | thres_count = ebx & 0x07; | 
|---|
| 325 | if (!thres_count) | 
|---|
| 326 | return -ENODEV; | 
|---|
| 327 |  | 
|---|
| 328 | thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS); | 
|---|
| 329 |  | 
|---|
| 330 | tj_max = intel_tcc_get_tjmax(cpu); | 
|---|
| 331 | if (tj_max < 0) | 
|---|
| 332 | return tj_max; | 
|---|
| 333 | tj_max *= 1000; | 
|---|
| 334 |  | 
|---|
| 335 | zonedev = kzalloc(sizeof(*zonedev), GFP_KERNEL); | 
|---|
| 336 | if (!zonedev) | 
|---|
| 337 | return -ENOMEM; | 
|---|
| 338 |  | 
|---|
| 339 | err = pkg_temp_thermal_trips_init(cpu, tj_max, trips, num_trips: thres_count); | 
|---|
| 340 | if (err) | 
|---|
| 341 | goto out_kfree_zonedev; | 
|---|
| 342 |  | 
|---|
| 343 | INIT_DELAYED_WORK(&zonedev->work, pkg_temp_thermal_threshold_work_fn); | 
|---|
| 344 | zonedev->cpu = cpu; | 
|---|
| 345 | zonedev->tzone = thermal_zone_device_register_with_trips(type: "x86_pkg_temp", | 
|---|
| 346 | trips, num_trips: thres_count, | 
|---|
| 347 | devdata: zonedev, ops: &tzone_ops, tzp: &pkg_temp_tz_params, passive_delay: 0, polling_delay: 0); | 
|---|
| 348 | if (IS_ERR(ptr: zonedev->tzone)) { | 
|---|
| 349 | err = PTR_ERR(ptr: zonedev->tzone); | 
|---|
| 350 | goto out_kfree_zonedev; | 
|---|
| 351 | } | 
|---|
| 352 | err = thermal_zone_device_enable(tz: zonedev->tzone); | 
|---|
| 353 | if (err) | 
|---|
| 354 | goto out_unregister_tz; | 
|---|
| 355 |  | 
|---|
| 356 | /* Store MSR value for package thermal interrupt, to restore at exit */ | 
|---|
| 357 | rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, zonedev->msr_pkg_therm_low, | 
|---|
| 358 | zonedev->msr_pkg_therm_high); | 
|---|
| 359 |  | 
|---|
| 360 | cpumask_set_cpu(cpu, dstp: &zonedev->cpumask); | 
|---|
| 361 | raw_spin_lock_irq(&pkg_temp_lock); | 
|---|
| 362 | zones[id] = zonedev; | 
|---|
| 363 | raw_spin_unlock_irq(&pkg_temp_lock); | 
|---|
| 364 |  | 
|---|
| 365 | return 0; | 
|---|
| 366 |  | 
|---|
| 367 | out_unregister_tz: | 
|---|
| 368 | thermal_zone_device_unregister(tz: zonedev->tzone); | 
|---|
| 369 | out_kfree_zonedev: | 
|---|
| 370 | kfree(objp: zonedev); | 
|---|
| 371 | return err; | 
|---|
| 372 | } | 
|---|
| 373 |  | 
|---|
| 374 | static int pkg_thermal_cpu_offline(unsigned int cpu) | 
|---|
| 375 | { | 
|---|
| 376 | struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu); | 
|---|
| 377 | bool lastcpu, was_target; | 
|---|
| 378 | int target; | 
|---|
| 379 |  | 
|---|
| 380 | if (!zonedev) | 
|---|
| 381 | return 0; | 
|---|
| 382 |  | 
|---|
| 383 | target = cpumask_any_but(mask: &zonedev->cpumask, cpu); | 
|---|
| 384 | cpumask_clear_cpu(cpu, dstp: &zonedev->cpumask); | 
|---|
| 385 | lastcpu = target >= nr_cpu_ids; | 
|---|
| 386 | /* | 
|---|
| 387 | * Remove the sysfs files, if this is the last cpu in the package | 
|---|
| 388 | * before doing further cleanups. | 
|---|
| 389 | */ | 
|---|
| 390 | if (lastcpu) { | 
|---|
| 391 | struct thermal_zone_device *tzone = zonedev->tzone; | 
|---|
| 392 |  | 
|---|
| 393 | /* | 
|---|
| 394 | * We must protect against a work function calling | 
|---|
| 395 | * thermal_zone_update, after/while unregister. We null out | 
|---|
| 396 | * the pointer under the zone mutex, so the worker function | 
|---|
| 397 | * won't try to call. | 
|---|
| 398 | */ | 
|---|
| 399 | mutex_lock(lock: &thermal_zone_mutex); | 
|---|
| 400 | zonedev->tzone = NULL; | 
|---|
| 401 | mutex_unlock(lock: &thermal_zone_mutex); | 
|---|
| 402 |  | 
|---|
| 403 | thermal_zone_device_unregister(tz: tzone); | 
|---|
| 404 | } | 
|---|
| 405 |  | 
|---|
| 406 | /* Protect against work and interrupts */ | 
|---|
| 407 | raw_spin_lock_irq(&pkg_temp_lock); | 
|---|
| 408 |  | 
|---|
| 409 | /* | 
|---|
| 410 | * Check whether this cpu was the current target and store the new | 
|---|
| 411 | * one. When we drop the lock, then the interrupt notify function | 
|---|
| 412 | * will see the new target. | 
|---|
| 413 | */ | 
|---|
| 414 | was_target = zonedev->cpu == cpu; | 
|---|
| 415 | zonedev->cpu = target; | 
|---|
| 416 |  | 
|---|
| 417 | /* | 
|---|
| 418 | * If this is the last CPU in the package remove the package | 
|---|
| 419 | * reference from the array and restore the interrupt MSR. When we | 
|---|
| 420 | * drop the lock neither the interrupt notify function nor the | 
|---|
| 421 | * worker will see the package anymore. | 
|---|
| 422 | */ | 
|---|
| 423 | if (lastcpu) { | 
|---|
| 424 | zones[topology_logical_die_id(cpu)] = NULL; | 
|---|
| 425 | /* After this point nothing touches the MSR anymore. */ | 
|---|
| 426 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, | 
|---|
| 427 | low: zonedev->msr_pkg_therm_low, high: zonedev->msr_pkg_therm_high); | 
|---|
| 428 | } | 
|---|
| 429 |  | 
|---|
| 430 | /* | 
|---|
| 431 | * Check whether there is work scheduled and whether the work is | 
|---|
| 432 | * targeted at the outgoing CPU. | 
|---|
| 433 | */ | 
|---|
| 434 | if (zonedev->work_scheduled && was_target) { | 
|---|
| 435 | /* | 
|---|
| 436 | * To cancel the work we need to drop the lock, otherwise | 
|---|
| 437 | * we might deadlock if the work needs to be flushed. | 
|---|
| 438 | */ | 
|---|
| 439 | raw_spin_unlock_irq(&pkg_temp_lock); | 
|---|
| 440 | cancel_delayed_work_sync(dwork: &zonedev->work); | 
|---|
| 441 | raw_spin_lock_irq(&pkg_temp_lock); | 
|---|
| 442 | /* | 
|---|
| 443 | * If this is not the last cpu in the package and the work | 
|---|
| 444 | * did not run after we dropped the lock above, then we | 
|---|
| 445 | * need to reschedule the work, otherwise the interrupt | 
|---|
| 446 | * stays disabled forever. | 
|---|
| 447 | */ | 
|---|
| 448 | if (!lastcpu && zonedev->work_scheduled) | 
|---|
| 449 | pkg_thermal_schedule_work(cpu: target, work: &zonedev->work); | 
|---|
| 450 | } | 
|---|
| 451 |  | 
|---|
| 452 | raw_spin_unlock_irq(&pkg_temp_lock); | 
|---|
| 453 |  | 
|---|
| 454 | /* Final cleanup if this is the last cpu */ | 
|---|
| 455 | if (lastcpu) | 
|---|
| 456 | kfree(objp: zonedev); | 
|---|
| 457 |  | 
|---|
| 458 | return 0; | 
|---|
| 459 | } | 
|---|
| 460 |  | 
|---|
| 461 | static int pkg_thermal_cpu_online(unsigned int cpu) | 
|---|
| 462 | { | 
|---|
| 463 | struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu); | 
|---|
| 464 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 
|---|
| 465 |  | 
|---|
| 466 | /* Paranoia check */ | 
|---|
| 467 | if (!cpu_has(c, X86_FEATURE_DTHERM) || !cpu_has(c, X86_FEATURE_PTS)) | 
|---|
| 468 | return -ENODEV; | 
|---|
| 469 |  | 
|---|
| 470 | /* If the package exists, nothing to do */ | 
|---|
| 471 | if (zonedev) { | 
|---|
| 472 | cpumask_set_cpu(cpu, dstp: &zonedev->cpumask); | 
|---|
| 473 | return 0; | 
|---|
| 474 | } | 
|---|
| 475 | return pkg_temp_thermal_device_add(cpu); | 
|---|
| 476 | } | 
|---|
| 477 |  | 
|---|
| 478 | static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = { | 
|---|
| 479 | X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_PTS, NULL), | 
|---|
| 480 | {} | 
|---|
| 481 | }; | 
|---|
| 482 | MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids); | 
|---|
| 483 |  | 
|---|
| 484 | static int __init pkg_temp_thermal_init(void) | 
|---|
| 485 | { | 
|---|
| 486 | int ret; | 
|---|
| 487 |  | 
|---|
| 488 | if (!x86_match_cpu(match: pkg_temp_thermal_ids)) | 
|---|
| 489 | return -ENODEV; | 
|---|
| 490 |  | 
|---|
| 491 | max_id = topology_max_packages() * topology_max_dies_per_package(); | 
|---|
| 492 | zones = kcalloc(max_id, sizeof(struct zone_device *), | 
|---|
| 493 | GFP_KERNEL); | 
|---|
| 494 | if (!zones) | 
|---|
| 495 | return -ENOMEM; | 
|---|
| 496 |  | 
|---|
| 497 | ret = cpuhp_setup_state(state: CPUHP_AP_ONLINE_DYN, name: "thermal/x86_pkg:online", | 
|---|
| 498 | startup: pkg_thermal_cpu_online,	teardown: pkg_thermal_cpu_offline); | 
|---|
| 499 | if (ret < 0) | 
|---|
| 500 | goto err; | 
|---|
| 501 |  | 
|---|
| 502 | /* Store the state for module exit */ | 
|---|
| 503 | pkg_thermal_hp_state = ret; | 
|---|
| 504 |  | 
|---|
| 505 | platform_thermal_package_notify = pkg_thermal_notify; | 
|---|
| 506 | platform_thermal_package_rate_control = pkg_thermal_rate_control; | 
|---|
| 507 |  | 
|---|
| 508 | /* Don't care if it fails */ | 
|---|
| 509 | pkg_temp_debugfs_init(); | 
|---|
| 510 | return 0; | 
|---|
| 511 |  | 
|---|
| 512 | err: | 
|---|
| 513 | kfree(objp: zones); | 
|---|
| 514 | return ret; | 
|---|
| 515 | } | 
|---|
| 516 | module_init(pkg_temp_thermal_init) | 
|---|
| 517 |  | 
|---|
| 518 | static void __exit pkg_temp_thermal_exit(void) | 
|---|
| 519 | { | 
|---|
| 520 | platform_thermal_package_notify = NULL; | 
|---|
| 521 | platform_thermal_package_rate_control = NULL; | 
|---|
| 522 |  | 
|---|
| 523 | cpuhp_remove_state(state: pkg_thermal_hp_state); | 
|---|
| 524 | debugfs_remove_recursive(dentry: debugfs); | 
|---|
| 525 | kfree(objp: zones); | 
|---|
| 526 | } | 
|---|
| 527 | module_exit(pkg_temp_thermal_exit) | 
|---|
| 528 |  | 
|---|
| 529 | MODULE_IMPORT_NS( "INTEL_TCC"); | 
|---|
| 530 | MODULE_DESCRIPTION( "X86 PKG TEMP Thermal Driver"); | 
|---|
| 531 | MODULE_AUTHOR( "Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>"); | 
|---|
| 532 | MODULE_LICENSE( "GPL v2"); | 
|---|
| 533 |  | 
|---|