| 1 | // SPDX-License-Identifier: GPL-2.0-only | 
|---|
| 2 | /* | 
|---|
| 3 | * menu.c - the menu idle governor | 
|---|
| 4 | * | 
|---|
| 5 | * Copyright (C) 2006-2007 Adam Belay <abelay@novell.com> | 
|---|
| 6 | * Copyright (C) 2009 Intel Corporation | 
|---|
| 7 | * Author: | 
|---|
| 8 | *        Arjan van de Ven <arjan@linux.intel.com> | 
|---|
| 9 | */ | 
|---|
| 10 |  | 
|---|
| 11 | #include <linux/kernel.h> | 
|---|
| 12 | #include <linux/cpuidle.h> | 
|---|
| 13 | #include <linux/time.h> | 
|---|
| 14 | #include <linux/ktime.h> | 
|---|
| 15 | #include <linux/hrtimer.h> | 
|---|
| 16 | #include <linux/tick.h> | 
|---|
| 17 | #include <linux/sched/stat.h> | 
|---|
| 18 | #include <linux/math64.h> | 
|---|
| 19 |  | 
|---|
| 20 | #include "gov.h" | 
|---|
| 21 |  | 
|---|
| 22 | #define BUCKETS 6 | 
|---|
| 23 | #define INTERVAL_SHIFT 3 | 
|---|
| 24 | #define INTERVALS (1UL << INTERVAL_SHIFT) | 
|---|
| 25 | #define RESOLUTION 1024 | 
|---|
| 26 | #define DECAY 8 | 
|---|
| 27 | #define MAX_INTERESTING (50000 * NSEC_PER_USEC) | 
|---|
| 28 |  | 
|---|
| 29 | /* | 
|---|
| 30 | * Concepts and ideas behind the menu governor | 
|---|
| 31 | * | 
|---|
| 32 | * For the menu governor, there are 2 decision factors for picking a C | 
|---|
| 33 | * state: | 
|---|
| 34 | * 1) Energy break even point | 
|---|
| 35 | * 2) Latency tolerance (from pmqos infrastructure) | 
|---|
| 36 | * These two factors are treated independently. | 
|---|
| 37 | * | 
|---|
| 38 | * Energy break even point | 
|---|
| 39 | * ----------------------- | 
|---|
| 40 | * C state entry and exit have an energy cost, and a certain amount of time in | 
|---|
| 41 | * the  C state is required to actually break even on this cost. CPUIDLE | 
|---|
| 42 | * provides us this duration in the "target_residency" field. So all that we | 
|---|
| 43 | * need is a good prediction of how long we'll be idle. Like the traditional | 
|---|
| 44 | * menu governor, we take the actual known "next timer event" time. | 
|---|
| 45 | * | 
|---|
| 46 | * Since there are other source of wakeups (interrupts for example) than | 
|---|
| 47 | * the next timer event, this estimation is rather optimistic. To get a | 
|---|
| 48 | * more realistic estimate, a correction factor is applied to the estimate, | 
|---|
| 49 | * that is based on historic behavior. For example, if in the past the actual | 
|---|
| 50 | * duration always was 50% of the next timer tick, the correction factor will | 
|---|
| 51 | * be 0.5. | 
|---|
| 52 | * | 
|---|
| 53 | * menu uses a running average for this correction factor, but it uses a set of | 
|---|
| 54 | * factors, not just a single factor. This stems from the realization that the | 
|---|
| 55 | * ratio is dependent on the order of magnitude of the expected duration; if we | 
|---|
| 56 | * expect 500 milliseconds of idle time the likelihood of getting an interrupt | 
|---|
| 57 | * very early is much higher than if we expect 50 micro seconds of idle time. | 
|---|
| 58 | * For this reason, menu keeps an array of 6 independent factors, that gets | 
|---|
| 59 | * indexed based on the magnitude of the expected duration. | 
|---|
| 60 | * | 
|---|
| 61 | * Repeatable-interval-detector | 
|---|
| 62 | * ---------------------------- | 
|---|
| 63 | * There are some cases where "next timer" is a completely unusable predictor: | 
|---|
| 64 | * Those cases where the interval is fixed, for example due to hardware | 
|---|
| 65 | * interrupt mitigation, but also due to fixed transfer rate devices like mice. | 
|---|
| 66 | * For this, we use a different predictor: We track the duration of the last 8 | 
|---|
| 67 | * intervals and use them to estimate the duration of the next one. | 
|---|
| 68 | */ | 
|---|
| 69 |  | 
|---|
| 70 | struct  { | 
|---|
| 71 | int             ; | 
|---|
| 72 | int             ; | 
|---|
| 73 |  | 
|---|
| 74 | u64		; | 
|---|
| 75 | unsigned int	; | 
|---|
| 76 | unsigned int	[BUCKETS]; | 
|---|
| 77 | unsigned int	[INTERVALS]; | 
|---|
| 78 | int		; | 
|---|
| 79 | }; | 
|---|
| 80 |  | 
|---|
| 81 | static inline int which_bucket(u64 duration_ns) | 
|---|
| 82 | { | 
|---|
| 83 | int bucket = 0; | 
|---|
| 84 |  | 
|---|
| 85 | if (duration_ns < 10ULL * NSEC_PER_USEC) | 
|---|
| 86 | return bucket; | 
|---|
| 87 | if (duration_ns < 100ULL * NSEC_PER_USEC) | 
|---|
| 88 | return bucket + 1; | 
|---|
| 89 | if (duration_ns < 1000ULL * NSEC_PER_USEC) | 
|---|
| 90 | return bucket + 2; | 
|---|
| 91 | if (duration_ns < 10000ULL * NSEC_PER_USEC) | 
|---|
| 92 | return bucket + 3; | 
|---|
| 93 | if (duration_ns < 100000ULL * NSEC_PER_USEC) | 
|---|
| 94 | return bucket + 4; | 
|---|
| 95 | return bucket + 5; | 
|---|
| 96 | } | 
|---|
| 97 |  | 
|---|
| 98 | static DEFINE_PER_CPU(struct menu_device, ); | 
|---|
| 99 |  | 
|---|
| 100 | static void (struct menu_device *data, unsigned int interval_us) | 
|---|
| 101 | { | 
|---|
| 102 | /* Update the repeating-pattern data. */ | 
|---|
| 103 | data->intervals[data->interval_ptr++] = interval_us; | 
|---|
| 104 | if (data->interval_ptr >= INTERVALS) | 
|---|
| 105 | data->interval_ptr = 0; | 
|---|
| 106 | } | 
|---|
| 107 |  | 
|---|
| 108 | static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); | 
|---|
| 109 |  | 
|---|
| 110 | /* | 
|---|
| 111 | * Try detecting repeating patterns by keeping track of the last 8 | 
|---|
| 112 | * intervals, and checking if the standard deviation of that set | 
|---|
| 113 | * of points is below a threshold. If it is... then use the | 
|---|
| 114 | * average of these 8 points as the estimated value. | 
|---|
| 115 | */ | 
|---|
| 116 | static unsigned int get_typical_interval(struct menu_device *data) | 
|---|
| 117 | { | 
|---|
| 118 | s64 value, min_thresh = -1, max_thresh = UINT_MAX; | 
|---|
| 119 | unsigned int max, min, divisor; | 
|---|
| 120 | u64 avg, variance, avg_sq; | 
|---|
| 121 | int i; | 
|---|
| 122 |  | 
|---|
| 123 | again: | 
|---|
| 124 | /* Compute the average and variance of past intervals. */ | 
|---|
| 125 | max = 0; | 
|---|
| 126 | min = UINT_MAX; | 
|---|
| 127 | avg = 0; | 
|---|
| 128 | variance = 0; | 
|---|
| 129 | divisor = 0; | 
|---|
| 130 | for (i = 0; i < INTERVALS; i++) { | 
|---|
| 131 | value = data->intervals[i]; | 
|---|
| 132 | /* | 
|---|
| 133 | * Discard the samples outside the interval between the min and | 
|---|
| 134 | * max thresholds. | 
|---|
| 135 | */ | 
|---|
| 136 | if (value <= min_thresh || value >= max_thresh) | 
|---|
| 137 | continue; | 
|---|
| 138 |  | 
|---|
| 139 | divisor++; | 
|---|
| 140 |  | 
|---|
| 141 | avg += value; | 
|---|
| 142 | variance += value * value; | 
|---|
| 143 |  | 
|---|
| 144 | if (value > max) | 
|---|
| 145 | max = value; | 
|---|
| 146 |  | 
|---|
| 147 | if (value < min) | 
|---|
| 148 | min = value; | 
|---|
| 149 | } | 
|---|
| 150 |  | 
|---|
| 151 | if (!max) | 
|---|
| 152 | return UINT_MAX; | 
|---|
| 153 |  | 
|---|
| 154 | if (divisor == INTERVALS) { | 
|---|
| 155 | avg >>= INTERVAL_SHIFT; | 
|---|
| 156 | variance >>= INTERVAL_SHIFT; | 
|---|
| 157 | } else { | 
|---|
| 158 | do_div(avg, divisor); | 
|---|
| 159 | do_div(variance, divisor); | 
|---|
| 160 | } | 
|---|
| 161 |  | 
|---|
| 162 | avg_sq = avg * avg; | 
|---|
| 163 | variance -= avg_sq; | 
|---|
| 164 |  | 
|---|
| 165 | /* | 
|---|
| 166 | * The typical interval is obtained when standard deviation is | 
|---|
| 167 | * small (stddev <= 20 us, variance <= 400 us^2) or standard | 
|---|
| 168 | * deviation is small compared to the average interval (avg > | 
|---|
| 169 | * 6*stddev, avg^2 > 36*variance). The average is smaller than | 
|---|
| 170 | * UINT_MAX aka U32_MAX, so computing its square does not | 
|---|
| 171 | * overflow a u64. We simply reject this candidate average if | 
|---|
| 172 | * the standard deviation is greater than 715 s (which is | 
|---|
| 173 | * rather unlikely). | 
|---|
| 174 | * | 
|---|
| 175 | * Use this result only if there is no timer to wake us up sooner. | 
|---|
| 176 | */ | 
|---|
| 177 | if (likely(variance <= U64_MAX/36)) { | 
|---|
| 178 | if ((avg_sq > variance * 36 && divisor * 4 >= INTERVALS * 3) || | 
|---|
| 179 | variance <= 400) | 
|---|
| 180 | return avg; | 
|---|
| 181 | } | 
|---|
| 182 |  | 
|---|
| 183 | /* | 
|---|
| 184 | * If there are outliers, discard them by setting thresholds to exclude | 
|---|
| 185 | * data points at a large enough distance from the average, then | 
|---|
| 186 | * calculate the average and standard deviation again. Once we get | 
|---|
| 187 | * down to the last 3/4 of our samples, stop excluding samples. | 
|---|
| 188 | * | 
|---|
| 189 | * This can deal with workloads that have long pauses interspersed | 
|---|
| 190 | * with sporadic activity with a bunch of short pauses. | 
|---|
| 191 | */ | 
|---|
| 192 | if (divisor * 4 <= INTERVALS * 3) { | 
|---|
| 193 | /* | 
|---|
| 194 | * If there are sufficiently many data points still under | 
|---|
| 195 | * consideration after the outliers have been eliminated, | 
|---|
| 196 | * returning without a prediction would be a mistake because it | 
|---|
| 197 | * is likely that the next interval will not exceed the current | 
|---|
| 198 | * maximum, so return the latter in that case. | 
|---|
| 199 | */ | 
|---|
| 200 | if (divisor >= INTERVALS / 2) | 
|---|
| 201 | return max; | 
|---|
| 202 |  | 
|---|
| 203 | return UINT_MAX; | 
|---|
| 204 | } | 
|---|
| 205 |  | 
|---|
| 206 | /* Update the thresholds for the next round. */ | 
|---|
| 207 | if (avg - min > max - avg) | 
|---|
| 208 | min_thresh = min; | 
|---|
| 209 | else | 
|---|
| 210 | max_thresh = max; | 
|---|
| 211 |  | 
|---|
| 212 | goto again; | 
|---|
| 213 | } | 
|---|
| 214 |  | 
|---|
| 215 | /** | 
|---|
| 216 | * menu_select - selects the next idle state to enter | 
|---|
| 217 | * @drv: cpuidle driver containing state data | 
|---|
| 218 | * @dev: the CPU | 
|---|
| 219 | * @stop_tick: indication on whether or not to stop the tick | 
|---|
| 220 | */ | 
|---|
| 221 | static int (struct cpuidle_driver *drv, struct cpuidle_device *dev, | 
|---|
| 222 | bool *stop_tick) | 
|---|
| 223 | { | 
|---|
| 224 | struct menu_device *data = this_cpu_ptr(&menu_devices); | 
|---|
| 225 | s64 latency_req = cpuidle_governor_latency_req(cpu: dev->cpu); | 
|---|
| 226 | u64 predicted_ns; | 
|---|
| 227 | ktime_t delta, delta_tick; | 
|---|
| 228 | int i, idx; | 
|---|
| 229 |  | 
|---|
| 230 | if (data->needs_update) { | 
|---|
| 231 | menu_update(drv, dev); | 
|---|
| 232 | data->needs_update = 0; | 
|---|
| 233 | } else if (!dev->last_residency_ns) { | 
|---|
| 234 | /* | 
|---|
| 235 | * This happens when the driver rejects the previously selected | 
|---|
| 236 | * idle state and returns an error, so update the recent | 
|---|
| 237 | * intervals table to prevent invalid information from being | 
|---|
| 238 | * used going forward. | 
|---|
| 239 | */ | 
|---|
| 240 | menu_update_intervals(data, UINT_MAX); | 
|---|
| 241 | } | 
|---|
| 242 |  | 
|---|
| 243 | /* Find the shortest expected idle interval. */ | 
|---|
| 244 | predicted_ns = get_typical_interval(data) * NSEC_PER_USEC; | 
|---|
| 245 | if (predicted_ns > RESIDENCY_THRESHOLD_NS) { | 
|---|
| 246 | unsigned int timer_us; | 
|---|
| 247 |  | 
|---|
| 248 | /* Determine the time till the closest timer. */ | 
|---|
| 249 | delta = tick_nohz_get_sleep_length(delta_next: &delta_tick); | 
|---|
| 250 | if (unlikely(delta < 0)) { | 
|---|
| 251 | delta = 0; | 
|---|
| 252 | delta_tick = 0; | 
|---|
| 253 | } | 
|---|
| 254 |  | 
|---|
| 255 | data->next_timer_ns = delta; | 
|---|
| 256 | data->bucket = which_bucket(duration_ns: data->next_timer_ns); | 
|---|
| 257 |  | 
|---|
| 258 | /* Round up the result for half microseconds. */ | 
|---|
| 259 | timer_us = div_u64(dividend: (RESOLUTION * DECAY * NSEC_PER_USEC) / 2 + | 
|---|
| 260 | data->next_timer_ns * | 
|---|
| 261 | data->correction_factor[data->bucket], | 
|---|
| 262 | RESOLUTION * DECAY * NSEC_PER_USEC); | 
|---|
| 263 | /* Use the lowest expected idle interval to pick the idle state. */ | 
|---|
| 264 | predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns); | 
|---|
| 265 | } else { | 
|---|
| 266 | /* | 
|---|
| 267 | * Because the next timer event is not going to be determined | 
|---|
| 268 | * in this case, assume that without the tick the closest timer | 
|---|
| 269 | * will be in distant future and that the closest tick will occur | 
|---|
| 270 | * after 1/2 of the tick period. | 
|---|
| 271 | */ | 
|---|
| 272 | data->next_timer_ns = KTIME_MAX; | 
|---|
| 273 | delta_tick = TICK_NSEC / 2; | 
|---|
| 274 | data->bucket = BUCKETS - 1; | 
|---|
| 275 | } | 
|---|
| 276 |  | 
|---|
| 277 | if (unlikely(drv->state_count <= 1 || latency_req == 0) || | 
|---|
| 278 | ((data->next_timer_ns < drv->states[1].target_residency_ns || | 
|---|
| 279 | latency_req < drv->states[1].exit_latency_ns) && | 
|---|
| 280 | !dev->states_usage[0].disable)) { | 
|---|
| 281 | /* | 
|---|
| 282 | * In this case state[0] will be used no matter what, so return | 
|---|
| 283 | * it right away and keep the tick running if state[0] is a | 
|---|
| 284 | * polling one. | 
|---|
| 285 | */ | 
|---|
| 286 | *stop_tick = !(drv->states[0].flags & CPUIDLE_FLAG_POLLING); | 
|---|
| 287 | return 0; | 
|---|
| 288 | } | 
|---|
| 289 |  | 
|---|
| 290 | /* | 
|---|
| 291 | * If the tick is already stopped, the cost of possible short idle | 
|---|
| 292 | * duration misprediction is much higher, because the CPU may be stuck | 
|---|
| 293 | * in a shallow idle state for a long time as a result of it.  In that | 
|---|
| 294 | * case, say we might mispredict and use the known time till the closest | 
|---|
| 295 | * timer event for the idle state selection. | 
|---|
| 296 | */ | 
|---|
| 297 | if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC) | 
|---|
| 298 | predicted_ns = data->next_timer_ns; | 
|---|
| 299 |  | 
|---|
| 300 | /* | 
|---|
| 301 | * Find the idle state with the lowest power while satisfying | 
|---|
| 302 | * our constraints. | 
|---|
| 303 | */ | 
|---|
| 304 | idx = -1; | 
|---|
| 305 | for (i = 0; i < drv->state_count; i++) { | 
|---|
| 306 | struct cpuidle_state *s = &drv->states[i]; | 
|---|
| 307 |  | 
|---|
| 308 | if (dev->states_usage[i].disable) | 
|---|
| 309 | continue; | 
|---|
| 310 |  | 
|---|
| 311 | if (idx == -1) | 
|---|
| 312 | idx = i; /* first enabled state */ | 
|---|
| 313 |  | 
|---|
| 314 | if (s->exit_latency_ns > latency_req) | 
|---|
| 315 | break; | 
|---|
| 316 |  | 
|---|
| 317 | if (s->target_residency_ns <= predicted_ns) { | 
|---|
| 318 | idx = i; | 
|---|
| 319 | continue; | 
|---|
| 320 | } | 
|---|
| 321 |  | 
|---|
| 322 | /* | 
|---|
| 323 | * Use a physical idle state, not busy polling, unless a timer | 
|---|
| 324 | * is going to trigger soon enough. | 
|---|
| 325 | */ | 
|---|
| 326 | if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && | 
|---|
| 327 | s->target_residency_ns <= data->next_timer_ns) { | 
|---|
| 328 | predicted_ns = s->target_residency_ns; | 
|---|
| 329 | idx = i; | 
|---|
| 330 | break; | 
|---|
| 331 | } | 
|---|
| 332 |  | 
|---|
| 333 | if (predicted_ns < TICK_NSEC) | 
|---|
| 334 | break; | 
|---|
| 335 |  | 
|---|
| 336 | if (!tick_nohz_tick_stopped()) { | 
|---|
| 337 | /* | 
|---|
| 338 | * If the state selected so far is shallow, waking up | 
|---|
| 339 | * early won't hurt, so retain the tick in that case and | 
|---|
| 340 | * let the governor run again in the next iteration of | 
|---|
| 341 | * the idle loop. | 
|---|
| 342 | */ | 
|---|
| 343 | predicted_ns = drv->states[idx].target_residency_ns; | 
|---|
| 344 | break; | 
|---|
| 345 | } | 
|---|
| 346 |  | 
|---|
| 347 | /* | 
|---|
| 348 | * If the state selected so far is shallow and this state's | 
|---|
| 349 | * target residency matches the time till the closest timer | 
|---|
| 350 | * event, select this one to avoid getting stuck in the shallow | 
|---|
| 351 | * one for too long. | 
|---|
| 352 | */ | 
|---|
| 353 | if (drv->states[idx].target_residency_ns < TICK_NSEC && | 
|---|
| 354 | s->target_residency_ns <= delta_tick) | 
|---|
| 355 | idx = i; | 
|---|
| 356 |  | 
|---|
| 357 | return idx; | 
|---|
| 358 | } | 
|---|
| 359 |  | 
|---|
| 360 | if (idx == -1) | 
|---|
| 361 | idx = 0; /* No states enabled. Must use 0. */ | 
|---|
| 362 |  | 
|---|
| 363 | /* | 
|---|
| 364 | * Don't stop the tick if the selected state is a polling one or if the | 
|---|
| 365 | * expected idle duration is shorter than the tick period length. | 
|---|
| 366 | */ | 
|---|
| 367 | if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) || | 
|---|
| 368 | predicted_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) { | 
|---|
| 369 | *stop_tick = false; | 
|---|
| 370 |  | 
|---|
| 371 | if (idx > 0 && drv->states[idx].target_residency_ns > delta_tick) { | 
|---|
| 372 | /* | 
|---|
| 373 | * The tick is not going to be stopped and the target | 
|---|
| 374 | * residency of the state to be returned is not within | 
|---|
| 375 | * the time until the next timer event including the | 
|---|
| 376 | * tick, so try to correct that. | 
|---|
| 377 | */ | 
|---|
| 378 | for (i = idx - 1; i >= 0; i--) { | 
|---|
| 379 | if (dev->states_usage[i].disable) | 
|---|
| 380 | continue; | 
|---|
| 381 |  | 
|---|
| 382 | idx = i; | 
|---|
| 383 | if (drv->states[i].target_residency_ns <= delta_tick) | 
|---|
| 384 | break; | 
|---|
| 385 | } | 
|---|
| 386 | } | 
|---|
| 387 | } | 
|---|
| 388 |  | 
|---|
| 389 | return idx; | 
|---|
| 390 | } | 
|---|
| 391 |  | 
|---|
| 392 | /** | 
|---|
| 393 | * menu_reflect - records that data structures need update | 
|---|
| 394 | * @dev: the CPU | 
|---|
| 395 | * @index: the index of actual entered state | 
|---|
| 396 | * | 
|---|
| 397 | * NOTE: it's important to be fast here because this operation will add to | 
|---|
| 398 | *       the overall exit latency. | 
|---|
| 399 | */ | 
|---|
| 400 | static void (struct cpuidle_device *dev, int index) | 
|---|
| 401 | { | 
|---|
| 402 | struct menu_device *data = this_cpu_ptr(&menu_devices); | 
|---|
| 403 |  | 
|---|
| 404 | dev->last_state_idx = index; | 
|---|
| 405 | data->needs_update = 1; | 
|---|
| 406 | data->tick_wakeup = tick_nohz_idle_got_tick(); | 
|---|
| 407 | } | 
|---|
| 408 |  | 
|---|
| 409 | /** | 
|---|
| 410 | * menu_update - attempts to guess what happened after entry | 
|---|
| 411 | * @drv: cpuidle driver containing state data | 
|---|
| 412 | * @dev: the CPU | 
|---|
| 413 | */ | 
|---|
| 414 | static void (struct cpuidle_driver *drv, struct cpuidle_device *dev) | 
|---|
| 415 | { | 
|---|
| 416 | struct menu_device *data = this_cpu_ptr(&menu_devices); | 
|---|
| 417 | int last_idx = dev->last_state_idx; | 
|---|
| 418 | struct cpuidle_state *target = &drv->states[last_idx]; | 
|---|
| 419 | u64 measured_ns; | 
|---|
| 420 | unsigned int new_factor; | 
|---|
| 421 |  | 
|---|
| 422 | /* | 
|---|
| 423 | * Try to figure out how much time passed between entry to low | 
|---|
| 424 | * power state and occurrence of the wakeup event. | 
|---|
| 425 | * | 
|---|
| 426 | * If the entered idle state didn't support residency measurements, | 
|---|
| 427 | * we use them anyway if they are short, and if long, | 
|---|
| 428 | * truncate to the whole expected time. | 
|---|
| 429 | * | 
|---|
| 430 | * Any measured amount of time will include the exit latency. | 
|---|
| 431 | * Since we are interested in when the wakeup begun, not when it | 
|---|
| 432 | * was completed, we must subtract the exit latency. However, if | 
|---|
| 433 | * the measured amount of time is less than the exit latency, | 
|---|
| 434 | * assume the state was never reached and the exit latency is 0. | 
|---|
| 435 | */ | 
|---|
| 436 |  | 
|---|
| 437 | if (data->tick_wakeup && data->next_timer_ns > TICK_NSEC) { | 
|---|
| 438 | /* | 
|---|
| 439 | * The nohz code said that there wouldn't be any events within | 
|---|
| 440 | * the tick boundary (if the tick was stopped), but the idle | 
|---|
| 441 | * duration predictor had a differing opinion.  Since the CPU | 
|---|
| 442 | * was woken up by a tick (that wasn't stopped after all), the | 
|---|
| 443 | * predictor was not quite right, so assume that the CPU could | 
|---|
| 444 | * have been idle long (but not forever) to help the idle | 
|---|
| 445 | * duration predictor do a better job next time. | 
|---|
| 446 | */ | 
|---|
| 447 | measured_ns = 9 * MAX_INTERESTING / 10; | 
|---|
| 448 | } else if ((drv->states[last_idx].flags & CPUIDLE_FLAG_POLLING) && | 
|---|
| 449 | dev->poll_time_limit) { | 
|---|
| 450 | /* | 
|---|
| 451 | * The CPU exited the "polling" state due to a time limit, so | 
|---|
| 452 | * the idle duration prediction leading to the selection of that | 
|---|
| 453 | * state was inaccurate.  If a better prediction had been made, | 
|---|
| 454 | * the CPU might have been woken up from idle by the next timer. | 
|---|
| 455 | * Assume that to be the case. | 
|---|
| 456 | */ | 
|---|
| 457 | measured_ns = data->next_timer_ns; | 
|---|
| 458 | } else { | 
|---|
| 459 | /* measured value */ | 
|---|
| 460 | measured_ns = dev->last_residency_ns; | 
|---|
| 461 |  | 
|---|
| 462 | /* Deduct exit latency */ | 
|---|
| 463 | if (measured_ns > 2 * target->exit_latency_ns) | 
|---|
| 464 | measured_ns -= target->exit_latency_ns; | 
|---|
| 465 | else | 
|---|
| 466 | measured_ns /= 2; | 
|---|
| 467 | } | 
|---|
| 468 |  | 
|---|
| 469 | /* Make sure our coefficients do not exceed unity */ | 
|---|
| 470 | if (measured_ns > data->next_timer_ns) | 
|---|
| 471 | measured_ns = data->next_timer_ns; | 
|---|
| 472 |  | 
|---|
| 473 | /* Update our correction ratio */ | 
|---|
| 474 | new_factor = data->correction_factor[data->bucket]; | 
|---|
| 475 | new_factor -= new_factor / DECAY; | 
|---|
| 476 |  | 
|---|
| 477 | if (data->next_timer_ns > 0 && measured_ns < MAX_INTERESTING) | 
|---|
| 478 | new_factor += div64_u64(RESOLUTION * measured_ns, | 
|---|
| 479 | divisor: data->next_timer_ns); | 
|---|
| 480 | else | 
|---|
| 481 | /* | 
|---|
| 482 | * we were idle so long that we count it as a perfect | 
|---|
| 483 | * prediction | 
|---|
| 484 | */ | 
|---|
| 485 | new_factor += RESOLUTION; | 
|---|
| 486 |  | 
|---|
| 487 | /* | 
|---|
| 488 | * We don't want 0 as factor; we always want at least | 
|---|
| 489 | * a tiny bit of estimated time. Fortunately, due to rounding, | 
|---|
| 490 | * new_factor will stay nonzero regardless of measured_us values | 
|---|
| 491 | * and the compiler can eliminate this test as long as DECAY > 1. | 
|---|
| 492 | */ | 
|---|
| 493 | if (DECAY == 1 && unlikely(new_factor == 0)) | 
|---|
| 494 | new_factor = 1; | 
|---|
| 495 |  | 
|---|
| 496 | data->correction_factor[data->bucket] = new_factor; | 
|---|
| 497 |  | 
|---|
| 498 | menu_update_intervals(data, interval_us: ktime_to_us(kt: measured_ns)); | 
|---|
| 499 | } | 
|---|
| 500 |  | 
|---|
| 501 | /** | 
|---|
| 502 | * menu_enable_device - scans a CPU's states and does setup | 
|---|
| 503 | * @drv: cpuidle driver | 
|---|
| 504 | * @dev: the CPU | 
|---|
| 505 | */ | 
|---|
| 506 | static int (struct cpuidle_driver *drv, | 
|---|
| 507 | struct cpuidle_device *dev) | 
|---|
| 508 | { | 
|---|
| 509 | struct menu_device *data = &per_cpu(menu_devices, dev->cpu); | 
|---|
| 510 | int i; | 
|---|
| 511 |  | 
|---|
| 512 | memset(s: data, c: 0, n: sizeof(struct menu_device)); | 
|---|
| 513 |  | 
|---|
| 514 | /* | 
|---|
| 515 | * if the correction factor is 0 (eg first time init or cpu hotplug | 
|---|
| 516 | * etc), we actually want to start out with a unity factor. | 
|---|
| 517 | */ | 
|---|
| 518 | for(i = 0; i < BUCKETS; i++) | 
|---|
| 519 | data->correction_factor[i] = RESOLUTION * DECAY; | 
|---|
| 520 |  | 
|---|
| 521 | return 0; | 
|---|
| 522 | } | 
|---|
| 523 |  | 
|---|
| 524 | static struct cpuidle_governor  = { | 
|---|
| 525 | .name = "menu", | 
|---|
| 526 | .rating =	20, | 
|---|
| 527 | .enable =	menu_enable_device, | 
|---|
| 528 | .select =	menu_select, | 
|---|
| 529 | .reflect =	menu_reflect, | 
|---|
| 530 | }; | 
|---|
| 531 |  | 
|---|
| 532 | /** | 
|---|
| 533 | * init_menu - initializes the governor | 
|---|
| 534 | */ | 
|---|
| 535 | static int __init (void) | 
|---|
| 536 | { | 
|---|
| 537 | return cpuidle_register_governor(gov: &menu_governor); | 
|---|
| 538 | } | 
|---|
| 539 |  | 
|---|
| 540 | postcore_initcall(init_menu); | 
|---|
| 541 |  | 
|---|