| 1 | // SPDX-License-Identifier: GPL-2.0 | 
|---|
| 2 | /* | 
|---|
| 3 | * CPUFreq governor based on scheduler-provided CPU utilization data. | 
|---|
| 4 | * | 
|---|
| 5 | * Copyright (C) 2016, Intel Corporation | 
|---|
| 6 | * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 
|---|
| 7 | */ | 
|---|
| 8 | #include <uapi/linux/sched/types.h> | 
|---|
| 9 | #include "sched.h" | 
|---|
| 10 |  | 
|---|
| 11 | #define IOWAIT_BOOST_MIN	(SCHED_CAPACITY_SCALE / 8) | 
|---|
| 12 |  | 
|---|
| 13 | struct sugov_tunables { | 
|---|
| 14 | struct gov_attr_set	attr_set; | 
|---|
| 15 | unsigned int		rate_limit_us; | 
|---|
| 16 | }; | 
|---|
| 17 |  | 
|---|
| 18 | struct sugov_policy { | 
|---|
| 19 | struct cpufreq_policy	*policy; | 
|---|
| 20 |  | 
|---|
| 21 | struct sugov_tunables	*tunables; | 
|---|
| 22 | struct list_head	tunables_hook; | 
|---|
| 23 |  | 
|---|
| 24 | raw_spinlock_t		update_lock; | 
|---|
| 25 | u64			last_freq_update_time; | 
|---|
| 26 | s64			freq_update_delay_ns; | 
|---|
| 27 | unsigned int		next_freq; | 
|---|
| 28 | unsigned int		cached_raw_freq; | 
|---|
| 29 |  | 
|---|
| 30 | /* The next fields are only needed if fast switch cannot be used: */ | 
|---|
| 31 | struct			irq_work irq_work; | 
|---|
| 32 | struct			kthread_work work; | 
|---|
| 33 | struct			mutex work_lock; | 
|---|
| 34 | struct			kthread_worker worker; | 
|---|
| 35 | struct task_struct	*thread; | 
|---|
| 36 | bool			work_in_progress; | 
|---|
| 37 |  | 
|---|
| 38 | bool			limits_changed; | 
|---|
| 39 | bool			need_freq_update; | 
|---|
| 40 | }; | 
|---|
| 41 |  | 
|---|
| 42 | struct sugov_cpu { | 
|---|
| 43 | struct update_util_data	update_util; | 
|---|
| 44 | struct sugov_policy	*sg_policy; | 
|---|
| 45 | unsigned int		cpu; | 
|---|
| 46 |  | 
|---|
| 47 | bool			iowait_boost_pending; | 
|---|
| 48 | unsigned int		iowait_boost; | 
|---|
| 49 | u64			last_update; | 
|---|
| 50 |  | 
|---|
| 51 | unsigned long		util; | 
|---|
| 52 | unsigned long		bw_min; | 
|---|
| 53 |  | 
|---|
| 54 | /* The field below is for single-CPU policies only: */ | 
|---|
| 55 | #ifdef CONFIG_NO_HZ_COMMON | 
|---|
| 56 | unsigned long		saved_idle_calls; | 
|---|
| 57 | #endif | 
|---|
| 58 | }; | 
|---|
| 59 |  | 
|---|
| 60 | static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); | 
|---|
| 61 |  | 
|---|
| 62 | /************************ Governor internals ***********************/ | 
|---|
| 63 |  | 
|---|
| 64 | static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) | 
|---|
| 65 | { | 
|---|
| 66 | s64 delta_ns; | 
|---|
| 67 |  | 
|---|
| 68 | /* | 
|---|
| 69 | * Since cpufreq_update_util() is called with rq->lock held for | 
|---|
| 70 | * the @target_cpu, our per-CPU data is fully serialized. | 
|---|
| 71 | * | 
|---|
| 72 | * However, drivers cannot in general deal with cross-CPU | 
|---|
| 73 | * requests, so while get_next_freq() will work, our | 
|---|
| 74 | * sugov_update_commit() call may not for the fast switching platforms. | 
|---|
| 75 | * | 
|---|
| 76 | * Hence stop here for remote requests if they aren't supported | 
|---|
| 77 | * by the hardware, as calculating the frequency is pointless if | 
|---|
| 78 | * we cannot in fact act on it. | 
|---|
| 79 | * | 
|---|
| 80 | * This is needed on the slow switching platforms too to prevent CPUs | 
|---|
| 81 | * going offline from leaving stale IRQ work items behind. | 
|---|
| 82 | */ | 
|---|
| 83 | if (!cpufreq_this_cpu_can_update(policy: sg_policy->policy)) | 
|---|
| 84 | return false; | 
|---|
| 85 |  | 
|---|
| 86 | if (unlikely(READ_ONCE(sg_policy->limits_changed))) { | 
|---|
| 87 | WRITE_ONCE(sg_policy->limits_changed, false); | 
|---|
| 88 | sg_policy->need_freq_update = true; | 
|---|
| 89 |  | 
|---|
| 90 | /* | 
|---|
| 91 | * The above limits_changed update must occur before the reads | 
|---|
| 92 | * of policy limits in cpufreq_driver_resolve_freq() or a policy | 
|---|
| 93 | * limits update might be missed, so use a memory barrier to | 
|---|
| 94 | * ensure it. | 
|---|
| 95 | * | 
|---|
| 96 | * This pairs with the write memory barrier in sugov_limits(). | 
|---|
| 97 | */ | 
|---|
| 98 | smp_mb(); | 
|---|
| 99 |  | 
|---|
| 100 | return true; | 
|---|
| 101 | } else if (sg_policy->need_freq_update) { | 
|---|
| 102 | /* ignore_dl_rate_limit() wants a new frequency to be found. */ | 
|---|
| 103 | return true; | 
|---|
| 104 | } | 
|---|
| 105 |  | 
|---|
| 106 | delta_ns = time - sg_policy->last_freq_update_time; | 
|---|
| 107 |  | 
|---|
| 108 | return delta_ns >= sg_policy->freq_update_delay_ns; | 
|---|
| 109 | } | 
|---|
| 110 |  | 
|---|
| 111 | static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, | 
|---|
| 112 | unsigned int next_freq) | 
|---|
| 113 | { | 
|---|
| 114 | if (sg_policy->need_freq_update) { | 
|---|
| 115 | sg_policy->need_freq_update = false; | 
|---|
| 116 | /* | 
|---|
| 117 | * The policy limits have changed, but if the return value of | 
|---|
| 118 | * cpufreq_driver_resolve_freq() after applying the new limits | 
|---|
| 119 | * is still equal to the previously selected frequency, the | 
|---|
| 120 | * driver callback need not be invoked unless the driver | 
|---|
| 121 | * specifically wants that to happen on every update of the | 
|---|
| 122 | * policy limits. | 
|---|
| 123 | */ | 
|---|
| 124 | if (sg_policy->next_freq == next_freq && | 
|---|
| 125 | !cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS)) | 
|---|
| 126 | return false; | 
|---|
| 127 | } else if (sg_policy->next_freq == next_freq) { | 
|---|
| 128 | return false; | 
|---|
| 129 | } | 
|---|
| 130 |  | 
|---|
| 131 | sg_policy->next_freq = next_freq; | 
|---|
| 132 | sg_policy->last_freq_update_time = time; | 
|---|
| 133 |  | 
|---|
| 134 | return true; | 
|---|
| 135 | } | 
|---|
| 136 |  | 
|---|
| 137 | static void sugov_deferred_update(struct sugov_policy *sg_policy) | 
|---|
| 138 | { | 
|---|
| 139 | if (!sg_policy->work_in_progress) { | 
|---|
| 140 | sg_policy->work_in_progress = true; | 
|---|
| 141 | irq_work_queue(work: &sg_policy->irq_work); | 
|---|
| 142 | } | 
|---|
| 143 | } | 
|---|
| 144 |  | 
|---|
| 145 | /** | 
|---|
| 146 | * get_capacity_ref_freq - get the reference frequency that has been used to | 
|---|
| 147 | * correlate frequency and compute capacity for a given cpufreq policy. We use | 
|---|
| 148 | * the CPU managing it for the arch_scale_freq_ref() call in the function. | 
|---|
| 149 | * @policy: the cpufreq policy of the CPU in question. | 
|---|
| 150 | * | 
|---|
| 151 | * Return: the reference CPU frequency to compute a capacity. | 
|---|
| 152 | */ | 
|---|
| 153 | static __always_inline | 
|---|
| 154 | unsigned long get_capacity_ref_freq(struct cpufreq_policy *policy) | 
|---|
| 155 | { | 
|---|
| 156 | unsigned int freq = arch_scale_freq_ref(cpu: policy->cpu); | 
|---|
| 157 |  | 
|---|
| 158 | if (freq) | 
|---|
| 159 | return freq; | 
|---|
| 160 |  | 
|---|
| 161 | if (arch_scale_freq_invariant()) | 
|---|
| 162 | return policy->cpuinfo.max_freq; | 
|---|
| 163 |  | 
|---|
| 164 | /* | 
|---|
| 165 | * Apply a 25% margin so that we select a higher frequency than | 
|---|
| 166 | * the current one before the CPU is fully busy: | 
|---|
| 167 | */ | 
|---|
| 168 | return policy->cur + (policy->cur >> 2); | 
|---|
| 169 | } | 
|---|
| 170 |  | 
|---|
| 171 | /** | 
|---|
| 172 | * get_next_freq - Compute a new frequency for a given cpufreq policy. | 
|---|
| 173 | * @sg_policy: schedutil policy object to compute the new frequency for. | 
|---|
| 174 | * @util: Current CPU utilization. | 
|---|
| 175 | * @max: CPU capacity. | 
|---|
| 176 | * | 
|---|
| 177 | * If the utilization is frequency-invariant, choose the new frequency to be | 
|---|
| 178 | * proportional to it, that is | 
|---|
| 179 | * | 
|---|
| 180 | * next_freq = C * max_freq * util / max | 
|---|
| 181 | * | 
|---|
| 182 | * Otherwise, approximate the would-be frequency-invariant utilization by | 
|---|
| 183 | * util_raw * (curr_freq / max_freq) which leads to | 
|---|
| 184 | * | 
|---|
| 185 | * next_freq = C * curr_freq * util_raw / max | 
|---|
| 186 | * | 
|---|
| 187 | * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. | 
|---|
| 188 | * | 
|---|
| 189 | * The lowest driver-supported frequency which is equal or greater than the raw | 
|---|
| 190 | * next_freq (as calculated above) is returned, subject to policy min/max and | 
|---|
| 191 | * cpufreq driver limitations. | 
|---|
| 192 | */ | 
|---|
| 193 | static unsigned int get_next_freq(struct sugov_policy *sg_policy, | 
|---|
| 194 | unsigned long util, unsigned long max) | 
|---|
| 195 | { | 
|---|
| 196 | struct cpufreq_policy *policy = sg_policy->policy; | 
|---|
| 197 | unsigned int freq; | 
|---|
| 198 |  | 
|---|
| 199 | freq = get_capacity_ref_freq(policy); | 
|---|
| 200 | freq = map_util_freq(util, freq, cap: max); | 
|---|
| 201 |  | 
|---|
| 202 | if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update) | 
|---|
| 203 | return sg_policy->next_freq; | 
|---|
| 204 |  | 
|---|
| 205 | sg_policy->cached_raw_freq = freq; | 
|---|
| 206 | return cpufreq_driver_resolve_freq(policy, target_freq: freq); | 
|---|
| 207 | } | 
|---|
| 208 |  | 
|---|
| 209 | unsigned long sugov_effective_cpu_perf(int cpu, unsigned long actual, | 
|---|
| 210 | unsigned long min, | 
|---|
| 211 | unsigned long max) | 
|---|
| 212 | { | 
|---|
| 213 | /* Add dvfs headroom to actual utilization */ | 
|---|
| 214 | actual = map_util_perf(util: actual); | 
|---|
| 215 | /* Actually we don't need to target the max performance */ | 
|---|
| 216 | if (actual < max) | 
|---|
| 217 | max = actual; | 
|---|
| 218 |  | 
|---|
| 219 | /* | 
|---|
| 220 | * Ensure at least minimum performance while providing more compute | 
|---|
| 221 | * capacity when possible. | 
|---|
| 222 | */ | 
|---|
| 223 | return max(min, max); | 
|---|
| 224 | } | 
|---|
| 225 |  | 
|---|
| 226 | static void sugov_get_util(struct sugov_cpu *sg_cpu, unsigned long boost) | 
|---|
| 227 | { | 
|---|
| 228 | unsigned long min, max, util = scx_cpuperf_target(cpu: sg_cpu->cpu); | 
|---|
| 229 |  | 
|---|
| 230 | if (!scx_switched_all()) | 
|---|
| 231 | util += cpu_util_cfs_boost(cpu: sg_cpu->cpu); | 
|---|
| 232 | util = effective_cpu_util(cpu: sg_cpu->cpu, util_cfs: util, min: &min, max: &max); | 
|---|
| 233 | util = max(util, boost); | 
|---|
| 234 | sg_cpu->bw_min = min; | 
|---|
| 235 | sg_cpu->util = sugov_effective_cpu_perf(cpu: sg_cpu->cpu, actual: util, min, max); | 
|---|
| 236 | } | 
|---|
| 237 |  | 
|---|
| 238 | /** | 
|---|
| 239 | * sugov_iowait_reset() - Reset the IO boost status of a CPU. | 
|---|
| 240 | * @sg_cpu: the sugov data for the CPU to boost | 
|---|
| 241 | * @time: the update time from the caller | 
|---|
| 242 | * @set_iowait_boost: true if an IO boost has been requested | 
|---|
| 243 | * | 
|---|
| 244 | * The IO wait boost of a task is disabled after a tick since the last update | 
|---|
| 245 | * of a CPU. If a new IO wait boost is requested after more then a tick, then | 
|---|
| 246 | * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy | 
|---|
| 247 | * efficiency by ignoring sporadic wakeups from IO. | 
|---|
| 248 | */ | 
|---|
| 249 | static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, | 
|---|
| 250 | bool set_iowait_boost) | 
|---|
| 251 | { | 
|---|
| 252 | s64 delta_ns = time - sg_cpu->last_update; | 
|---|
| 253 |  | 
|---|
| 254 | /* Reset boost only if a tick has elapsed since last request */ | 
|---|
| 255 | if (delta_ns <= TICK_NSEC) | 
|---|
| 256 | return false; | 
|---|
| 257 |  | 
|---|
| 258 | sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0; | 
|---|
| 259 | sg_cpu->iowait_boost_pending = set_iowait_boost; | 
|---|
| 260 |  | 
|---|
| 261 | return true; | 
|---|
| 262 | } | 
|---|
| 263 |  | 
|---|
| 264 | /** | 
|---|
| 265 | * sugov_iowait_boost() - Updates the IO boost status of a CPU. | 
|---|
| 266 | * @sg_cpu: the sugov data for the CPU to boost | 
|---|
| 267 | * @time: the update time from the caller | 
|---|
| 268 | * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait | 
|---|
| 269 | * | 
|---|
| 270 | * Each time a task wakes up after an IO operation, the CPU utilization can be | 
|---|
| 271 | * boosted to a certain utilization which doubles at each "frequent and | 
|---|
| 272 | * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization | 
|---|
| 273 | * of the maximum OPP. | 
|---|
| 274 | * | 
|---|
| 275 | * To keep doubling, an IO boost has to be requested at least once per tick, | 
|---|
| 276 | * otherwise we restart from the utilization of the minimum OPP. | 
|---|
| 277 | */ | 
|---|
| 278 | static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, | 
|---|
| 279 | unsigned int flags) | 
|---|
| 280 | { | 
|---|
| 281 | bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT; | 
|---|
| 282 |  | 
|---|
| 283 | /* Reset boost if the CPU appears to have been idle enough */ | 
|---|
| 284 | if (sg_cpu->iowait_boost && | 
|---|
| 285 | sugov_iowait_reset(sg_cpu, time, set_iowait_boost)) | 
|---|
| 286 | return; | 
|---|
| 287 |  | 
|---|
| 288 | /* Boost only tasks waking up after IO */ | 
|---|
| 289 | if (!set_iowait_boost) | 
|---|
| 290 | return; | 
|---|
| 291 |  | 
|---|
| 292 | /* Ensure boost doubles only one time at each request */ | 
|---|
| 293 | if (sg_cpu->iowait_boost_pending) | 
|---|
| 294 | return; | 
|---|
| 295 | sg_cpu->iowait_boost_pending = true; | 
|---|
| 296 |  | 
|---|
| 297 | /* Double the boost at each request */ | 
|---|
| 298 | if (sg_cpu->iowait_boost) { | 
|---|
| 299 | sg_cpu->iowait_boost = | 
|---|
| 300 | min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE); | 
|---|
| 301 | return; | 
|---|
| 302 | } | 
|---|
| 303 |  | 
|---|
| 304 | /* First wakeup after IO: start with minimum boost */ | 
|---|
| 305 | sg_cpu->iowait_boost = IOWAIT_BOOST_MIN; | 
|---|
| 306 | } | 
|---|
| 307 |  | 
|---|
| 308 | /** | 
|---|
| 309 | * sugov_iowait_apply() - Apply the IO boost to a CPU. | 
|---|
| 310 | * @sg_cpu: the sugov data for the cpu to boost | 
|---|
| 311 | * @time: the update time from the caller | 
|---|
| 312 | * @max_cap: the max CPU capacity | 
|---|
| 313 | * | 
|---|
| 314 | * A CPU running a task which woken up after an IO operation can have its | 
|---|
| 315 | * utilization boosted to speed up the completion of those IO operations. | 
|---|
| 316 | * The IO boost value is increased each time a task wakes up from IO, in | 
|---|
| 317 | * sugov_iowait_apply(), and it's instead decreased by this function, | 
|---|
| 318 | * each time an increase has not been requested (!iowait_boost_pending). | 
|---|
| 319 | * | 
|---|
| 320 | * A CPU which also appears to have been idle for at least one tick has also | 
|---|
| 321 | * its IO boost utilization reset. | 
|---|
| 322 | * | 
|---|
| 323 | * This mechanism is designed to boost high frequently IO waiting tasks, while | 
|---|
| 324 | * being more conservative on tasks which does sporadic IO operations. | 
|---|
| 325 | */ | 
|---|
| 326 | static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, | 
|---|
| 327 | unsigned long max_cap) | 
|---|
| 328 | { | 
|---|
| 329 | /* No boost currently required */ | 
|---|
| 330 | if (!sg_cpu->iowait_boost) | 
|---|
| 331 | return 0; | 
|---|
| 332 |  | 
|---|
| 333 | /* Reset boost if the CPU appears to have been idle enough */ | 
|---|
| 334 | if (sugov_iowait_reset(sg_cpu, time, set_iowait_boost: false)) | 
|---|
| 335 | return 0; | 
|---|
| 336 |  | 
|---|
| 337 | if (!sg_cpu->iowait_boost_pending) { | 
|---|
| 338 | /* | 
|---|
| 339 | * No boost pending; reduce the boost value. | 
|---|
| 340 | */ | 
|---|
| 341 | sg_cpu->iowait_boost >>= 1; | 
|---|
| 342 | if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) { | 
|---|
| 343 | sg_cpu->iowait_boost = 0; | 
|---|
| 344 | return 0; | 
|---|
| 345 | } | 
|---|
| 346 | } | 
|---|
| 347 |  | 
|---|
| 348 | sg_cpu->iowait_boost_pending = false; | 
|---|
| 349 |  | 
|---|
| 350 | /* | 
|---|
| 351 | * sg_cpu->util is already in capacity scale; convert iowait_boost | 
|---|
| 352 | * into the same scale so we can compare. | 
|---|
| 353 | */ | 
|---|
| 354 | return (sg_cpu->iowait_boost * max_cap) >> SCHED_CAPACITY_SHIFT; | 
|---|
| 355 | } | 
|---|
| 356 |  | 
|---|
| 357 | #ifdef CONFIG_NO_HZ_COMMON | 
|---|
| 358 | static bool sugov_hold_freq(struct sugov_cpu *sg_cpu) | 
|---|
| 359 | { | 
|---|
| 360 | unsigned long idle_calls; | 
|---|
| 361 | bool ret; | 
|---|
| 362 |  | 
|---|
| 363 | /* | 
|---|
| 364 | * The heuristics in this function is for the fair class. For SCX, the | 
|---|
| 365 | * performance target comes directly from the BPF scheduler. Let's just | 
|---|
| 366 | * follow it. | 
|---|
| 367 | */ | 
|---|
| 368 | if (scx_switched_all()) | 
|---|
| 369 | return false; | 
|---|
| 370 |  | 
|---|
| 371 | /* if capped by uclamp_max, always update to be in compliance */ | 
|---|
| 372 | if (uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu))) | 
|---|
| 373 | return false; | 
|---|
| 374 |  | 
|---|
| 375 | /* | 
|---|
| 376 | * Maintain the frequency if the CPU has not been idle recently, as | 
|---|
| 377 | * reduction is likely to be premature. | 
|---|
| 378 | */ | 
|---|
| 379 | idle_calls = tick_nohz_get_idle_calls_cpu(cpu: sg_cpu->cpu); | 
|---|
| 380 | ret = idle_calls == sg_cpu->saved_idle_calls; | 
|---|
| 381 |  | 
|---|
| 382 | sg_cpu->saved_idle_calls = idle_calls; | 
|---|
| 383 | return ret; | 
|---|
| 384 | } | 
|---|
| 385 | #else /* !CONFIG_NO_HZ_COMMON: */ | 
|---|
| 386 | static inline bool sugov_hold_freq(struct sugov_cpu *sg_cpu) { return false; } | 
|---|
| 387 | #endif /* !CONFIG_NO_HZ_COMMON */ | 
|---|
| 388 |  | 
|---|
| 389 | /* | 
|---|
| 390 | * Make sugov_should_update_freq() ignore the rate limit when DL | 
|---|
| 391 | * has increased the utilization. | 
|---|
| 392 | */ | 
|---|
| 393 | static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) | 
|---|
| 394 | { | 
|---|
| 395 | if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_min) | 
|---|
| 396 | sg_cpu->sg_policy->need_freq_update = true; | 
|---|
| 397 | } | 
|---|
| 398 |  | 
|---|
| 399 | static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, | 
|---|
| 400 | u64 time, unsigned long max_cap, | 
|---|
| 401 | unsigned int flags) | 
|---|
| 402 | { | 
|---|
| 403 | unsigned long boost; | 
|---|
| 404 |  | 
|---|
| 405 | sugov_iowait_boost(sg_cpu, time, flags); | 
|---|
| 406 | sg_cpu->last_update = time; | 
|---|
| 407 |  | 
|---|
| 408 | ignore_dl_rate_limit(sg_cpu); | 
|---|
| 409 |  | 
|---|
| 410 | if (!sugov_should_update_freq(sg_policy: sg_cpu->sg_policy, time)) | 
|---|
| 411 | return false; | 
|---|
| 412 |  | 
|---|
| 413 | boost = sugov_iowait_apply(sg_cpu, time, max_cap); | 
|---|
| 414 | sugov_get_util(sg_cpu, boost); | 
|---|
| 415 |  | 
|---|
| 416 | return true; | 
|---|
| 417 | } | 
|---|
| 418 |  | 
|---|
| 419 | static void sugov_update_single_freq(struct update_util_data *hook, u64 time, | 
|---|
| 420 | unsigned int flags) | 
|---|
| 421 | { | 
|---|
| 422 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); | 
|---|
| 423 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; | 
|---|
| 424 | unsigned int cached_freq = sg_policy->cached_raw_freq; | 
|---|
| 425 | unsigned long max_cap; | 
|---|
| 426 | unsigned int next_f; | 
|---|
| 427 |  | 
|---|
| 428 | max_cap = arch_scale_cpu_capacity(cpu: sg_cpu->cpu); | 
|---|
| 429 |  | 
|---|
| 430 | if (!sugov_update_single_common(sg_cpu, time, max_cap, flags)) | 
|---|
| 431 | return; | 
|---|
| 432 |  | 
|---|
| 433 | next_f = get_next_freq(sg_policy, util: sg_cpu->util, max: max_cap); | 
|---|
| 434 |  | 
|---|
| 435 | if (sugov_hold_freq(sg_cpu) && next_f < sg_policy->next_freq && | 
|---|
| 436 | !sg_policy->need_freq_update) { | 
|---|
| 437 | next_f = sg_policy->next_freq; | 
|---|
| 438 |  | 
|---|
| 439 | /* Restore cached freq as next_freq has changed */ | 
|---|
| 440 | sg_policy->cached_raw_freq = cached_freq; | 
|---|
| 441 | } | 
|---|
| 442 |  | 
|---|
| 443 | if (!sugov_update_next_freq(sg_policy, time, next_freq: next_f)) | 
|---|
| 444 | return; | 
|---|
| 445 |  | 
|---|
| 446 | /* | 
|---|
| 447 | * This code runs under rq->lock for the target CPU, so it won't run | 
|---|
| 448 | * concurrently on two different CPUs for the same target and it is not | 
|---|
| 449 | * necessary to acquire the lock in the fast switch case. | 
|---|
| 450 | */ | 
|---|
| 451 | if (sg_policy->policy->fast_switch_enabled) { | 
|---|
| 452 | cpufreq_driver_fast_switch(policy: sg_policy->policy, target_freq: next_f); | 
|---|
| 453 | } else { | 
|---|
| 454 | raw_spin_lock(&sg_policy->update_lock); | 
|---|
| 455 | sugov_deferred_update(sg_policy); | 
|---|
| 456 | raw_spin_unlock(&sg_policy->update_lock); | 
|---|
| 457 | } | 
|---|
| 458 | } | 
|---|
| 459 |  | 
|---|
| 460 | static void sugov_update_single_perf(struct update_util_data *hook, u64 time, | 
|---|
| 461 | unsigned int flags) | 
|---|
| 462 | { | 
|---|
| 463 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); | 
|---|
| 464 | unsigned long prev_util = sg_cpu->util; | 
|---|
| 465 | unsigned long max_cap; | 
|---|
| 466 |  | 
|---|
| 467 | /* | 
|---|
| 468 | * Fall back to the "frequency" path if frequency invariance is not | 
|---|
| 469 | * supported, because the direct mapping between the utilization and | 
|---|
| 470 | * the performance levels depends on the frequency invariance. | 
|---|
| 471 | */ | 
|---|
| 472 | if (!arch_scale_freq_invariant()) { | 
|---|
| 473 | sugov_update_single_freq(hook, time, flags); | 
|---|
| 474 | return; | 
|---|
| 475 | } | 
|---|
| 476 |  | 
|---|
| 477 | max_cap = arch_scale_cpu_capacity(cpu: sg_cpu->cpu); | 
|---|
| 478 |  | 
|---|
| 479 | if (!sugov_update_single_common(sg_cpu, time, max_cap, flags)) | 
|---|
| 480 | return; | 
|---|
| 481 |  | 
|---|
| 482 | if (sugov_hold_freq(sg_cpu) && sg_cpu->util < prev_util) | 
|---|
| 483 | sg_cpu->util = prev_util; | 
|---|
| 484 |  | 
|---|
| 485 | cpufreq_driver_adjust_perf(cpu: sg_cpu->cpu, min_perf: sg_cpu->bw_min, | 
|---|
| 486 | target_perf: sg_cpu->util, capacity: max_cap); | 
|---|
| 487 |  | 
|---|
| 488 | sg_cpu->sg_policy->last_freq_update_time = time; | 
|---|
| 489 | } | 
|---|
| 490 |  | 
|---|
| 491 | static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) | 
|---|
| 492 | { | 
|---|
| 493 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; | 
|---|
| 494 | struct cpufreq_policy *policy = sg_policy->policy; | 
|---|
| 495 | unsigned long util = 0, max_cap; | 
|---|
| 496 | unsigned int j; | 
|---|
| 497 |  | 
|---|
| 498 | max_cap = arch_scale_cpu_capacity(cpu: sg_cpu->cpu); | 
|---|
| 499 |  | 
|---|
| 500 | for_each_cpu(j, policy->cpus) { | 
|---|
| 501 | struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); | 
|---|
| 502 | unsigned long boost; | 
|---|
| 503 |  | 
|---|
| 504 | boost = sugov_iowait_apply(sg_cpu: j_sg_cpu, time, max_cap); | 
|---|
| 505 | sugov_get_util(sg_cpu: j_sg_cpu, boost); | 
|---|
| 506 |  | 
|---|
| 507 | util = max(j_sg_cpu->util, util); | 
|---|
| 508 | } | 
|---|
| 509 |  | 
|---|
| 510 | return get_next_freq(sg_policy, util, max: max_cap); | 
|---|
| 511 | } | 
|---|
| 512 |  | 
|---|
| 513 | static void | 
|---|
| 514 | sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) | 
|---|
| 515 | { | 
|---|
| 516 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); | 
|---|
| 517 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; | 
|---|
| 518 | unsigned int next_f; | 
|---|
| 519 |  | 
|---|
| 520 | raw_spin_lock(&sg_policy->update_lock); | 
|---|
| 521 |  | 
|---|
| 522 | sugov_iowait_boost(sg_cpu, time, flags); | 
|---|
| 523 | sg_cpu->last_update = time; | 
|---|
| 524 |  | 
|---|
| 525 | ignore_dl_rate_limit(sg_cpu); | 
|---|
| 526 |  | 
|---|
| 527 | if (sugov_should_update_freq(sg_policy, time)) { | 
|---|
| 528 | next_f = sugov_next_freq_shared(sg_cpu, time); | 
|---|
| 529 |  | 
|---|
| 530 | if (!sugov_update_next_freq(sg_policy, time, next_freq: next_f)) | 
|---|
| 531 | goto unlock; | 
|---|
| 532 |  | 
|---|
| 533 | if (sg_policy->policy->fast_switch_enabled) | 
|---|
| 534 | cpufreq_driver_fast_switch(policy: sg_policy->policy, target_freq: next_f); | 
|---|
| 535 | else | 
|---|
| 536 | sugov_deferred_update(sg_policy); | 
|---|
| 537 | } | 
|---|
| 538 | unlock: | 
|---|
| 539 | raw_spin_unlock(&sg_policy->update_lock); | 
|---|
| 540 | } | 
|---|
| 541 |  | 
|---|
| 542 | static void sugov_work(struct kthread_work *work) | 
|---|
| 543 | { | 
|---|
| 544 | struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); | 
|---|
| 545 | unsigned int freq; | 
|---|
| 546 | unsigned long flags; | 
|---|
| 547 |  | 
|---|
| 548 | /* | 
|---|
| 549 | * Hold sg_policy->update_lock shortly to handle the case where: | 
|---|
| 550 | * in case sg_policy->next_freq is read here, and then updated by | 
|---|
| 551 | * sugov_deferred_update() just before work_in_progress is set to false | 
|---|
| 552 | * here, we may miss queueing the new update. | 
|---|
| 553 | * | 
|---|
| 554 | * Note: If a work was queued after the update_lock is released, | 
|---|
| 555 | * sugov_work() will just be called again by kthread_work code; and the | 
|---|
| 556 | * request will be proceed before the sugov thread sleeps. | 
|---|
| 557 | */ | 
|---|
| 558 | raw_spin_lock_irqsave(&sg_policy->update_lock, flags); | 
|---|
| 559 | freq = sg_policy->next_freq; | 
|---|
| 560 | sg_policy->work_in_progress = false; | 
|---|
| 561 | raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags); | 
|---|
| 562 |  | 
|---|
| 563 | mutex_lock(lock: &sg_policy->work_lock); | 
|---|
| 564 | __cpufreq_driver_target(policy: sg_policy->policy, target_freq: freq, CPUFREQ_RELATION_L); | 
|---|
| 565 | mutex_unlock(lock: &sg_policy->work_lock); | 
|---|
| 566 | } | 
|---|
| 567 |  | 
|---|
| 568 | static void sugov_irq_work(struct irq_work *irq_work) | 
|---|
| 569 | { | 
|---|
| 570 | struct sugov_policy *sg_policy; | 
|---|
| 571 |  | 
|---|
| 572 | sg_policy = container_of(irq_work, struct sugov_policy, irq_work); | 
|---|
| 573 |  | 
|---|
| 574 | kthread_queue_work(worker: &sg_policy->worker, work: &sg_policy->work); | 
|---|
| 575 | } | 
|---|
| 576 |  | 
|---|
| 577 | /************************** sysfs interface ************************/ | 
|---|
| 578 |  | 
|---|
| 579 | static struct sugov_tunables *global_tunables; | 
|---|
| 580 | static DEFINE_MUTEX(global_tunables_lock); | 
|---|
| 581 |  | 
|---|
| 582 | static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) | 
|---|
| 583 | { | 
|---|
| 584 | return container_of(attr_set, struct sugov_tunables, attr_set); | 
|---|
| 585 | } | 
|---|
| 586 |  | 
|---|
| 587 | static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) | 
|---|
| 588 | { | 
|---|
| 589 | struct sugov_tunables *tunables = to_sugov_tunables(attr_set); | 
|---|
| 590 |  | 
|---|
| 591 | return sprintf(buf, fmt: "%u\n", tunables->rate_limit_us); | 
|---|
| 592 | } | 
|---|
| 593 |  | 
|---|
| 594 | static ssize_t | 
|---|
| 595 | rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count) | 
|---|
| 596 | { | 
|---|
| 597 | struct sugov_tunables *tunables = to_sugov_tunables(attr_set); | 
|---|
| 598 | struct sugov_policy *sg_policy; | 
|---|
| 599 | unsigned int rate_limit_us; | 
|---|
| 600 |  | 
|---|
| 601 | if (kstrtouint(s: buf, base: 10, res: &rate_limit_us)) | 
|---|
| 602 | return -EINVAL; | 
|---|
| 603 |  | 
|---|
| 604 | tunables->rate_limit_us = rate_limit_us; | 
|---|
| 605 |  | 
|---|
| 606 | list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) | 
|---|
| 607 | sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; | 
|---|
| 608 |  | 
|---|
| 609 | return count; | 
|---|
| 610 | } | 
|---|
| 611 |  | 
|---|
| 612 | static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); | 
|---|
| 613 |  | 
|---|
| 614 | static struct attribute *sugov_attrs[] = { | 
|---|
| 615 | &rate_limit_us.attr, | 
|---|
| 616 | NULL | 
|---|
| 617 | }; | 
|---|
| 618 | ATTRIBUTE_GROUPS(sugov); | 
|---|
| 619 |  | 
|---|
| 620 | static void sugov_tunables_free(struct kobject *kobj) | 
|---|
| 621 | { | 
|---|
| 622 | struct gov_attr_set *attr_set = to_gov_attr_set(kobj); | 
|---|
| 623 |  | 
|---|
| 624 | kfree(objp: to_sugov_tunables(attr_set)); | 
|---|
| 625 | } | 
|---|
| 626 |  | 
|---|
| 627 | static const struct kobj_type sugov_tunables_ktype = { | 
|---|
| 628 | .default_groups = sugov_groups, | 
|---|
| 629 | .sysfs_ops = &governor_sysfs_ops, | 
|---|
| 630 | .release = &sugov_tunables_free, | 
|---|
| 631 | }; | 
|---|
| 632 |  | 
|---|
| 633 | /********************** cpufreq governor interface *********************/ | 
|---|
| 634 |  | 
|---|
| 635 | static struct cpufreq_governor schedutil_gov; | 
|---|
| 636 |  | 
|---|
| 637 | static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) | 
|---|
| 638 | { | 
|---|
| 639 | struct sugov_policy *sg_policy; | 
|---|
| 640 |  | 
|---|
| 641 | sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); | 
|---|
| 642 | if (!sg_policy) | 
|---|
| 643 | return NULL; | 
|---|
| 644 |  | 
|---|
| 645 | sg_policy->policy = policy; | 
|---|
| 646 | raw_spin_lock_init(&sg_policy->update_lock); | 
|---|
| 647 | return sg_policy; | 
|---|
| 648 | } | 
|---|
| 649 |  | 
|---|
| 650 | static void sugov_policy_free(struct sugov_policy *sg_policy) | 
|---|
| 651 | { | 
|---|
| 652 | kfree(objp: sg_policy); | 
|---|
| 653 | } | 
|---|
| 654 |  | 
|---|
| 655 | static int sugov_kthread_create(struct sugov_policy *sg_policy) | 
|---|
| 656 | { | 
|---|
| 657 | struct task_struct *thread; | 
|---|
| 658 | struct sched_attr attr = { | 
|---|
| 659 | .size		= sizeof(struct sched_attr), | 
|---|
| 660 | .sched_policy	= SCHED_DEADLINE, | 
|---|
| 661 | .sched_flags	= SCHED_FLAG_SUGOV, | 
|---|
| 662 | .sched_nice	= 0, | 
|---|
| 663 | .sched_priority	= 0, | 
|---|
| 664 | /* | 
|---|
| 665 | * Fake (unused) bandwidth; workaround to "fix" | 
|---|
| 666 | * priority inheritance. | 
|---|
| 667 | */ | 
|---|
| 668 | .sched_runtime	= NSEC_PER_MSEC, | 
|---|
| 669 | .sched_deadline = 10 * NSEC_PER_MSEC, | 
|---|
| 670 | .sched_period	= 10 * NSEC_PER_MSEC, | 
|---|
| 671 | }; | 
|---|
| 672 | struct cpufreq_policy *policy = sg_policy->policy; | 
|---|
| 673 | int ret; | 
|---|
| 674 |  | 
|---|
| 675 | /* kthread only required for slow path */ | 
|---|
| 676 | if (policy->fast_switch_enabled) | 
|---|
| 677 | return 0; | 
|---|
| 678 |  | 
|---|
| 679 | kthread_init_work(&sg_policy->work, sugov_work); | 
|---|
| 680 | kthread_init_worker(&sg_policy->worker); | 
|---|
| 681 | thread = kthread_create(kthread_worker_fn, &sg_policy->worker, | 
|---|
| 682 | "sugov:%d", | 
|---|
| 683 | cpumask_first(policy->related_cpus)); | 
|---|
| 684 | if (IS_ERR(ptr: thread)) { | 
|---|
| 685 | pr_err( "failed to create sugov thread: %ld\n", PTR_ERR(thread)); | 
|---|
| 686 | return PTR_ERR(ptr: thread); | 
|---|
| 687 | } | 
|---|
| 688 |  | 
|---|
| 689 | ret = sched_setattr_nocheck(thread, &attr); | 
|---|
| 690 | if (ret) { | 
|---|
| 691 | kthread_stop(k: thread); | 
|---|
| 692 | pr_warn( "%s: failed to set SCHED_DEADLINE\n", __func__); | 
|---|
| 693 | return ret; | 
|---|
| 694 | } | 
|---|
| 695 |  | 
|---|
| 696 | sg_policy->thread = thread; | 
|---|
| 697 | if (policy->dvfs_possible_from_any_cpu) | 
|---|
| 698 | set_cpus_allowed_ptr(p: thread, new_mask: policy->related_cpus); | 
|---|
| 699 | else | 
|---|
| 700 | kthread_bind_mask(k: thread, mask: policy->related_cpus); | 
|---|
| 701 |  | 
|---|
| 702 | init_irq_work(work: &sg_policy->irq_work, func: sugov_irq_work); | 
|---|
| 703 | mutex_init(&sg_policy->work_lock); | 
|---|
| 704 |  | 
|---|
| 705 | wake_up_process(tsk: thread); | 
|---|
| 706 |  | 
|---|
| 707 | return 0; | 
|---|
| 708 | } | 
|---|
| 709 |  | 
|---|
| 710 | static void sugov_kthread_stop(struct sugov_policy *sg_policy) | 
|---|
| 711 | { | 
|---|
| 712 | /* kthread only required for slow path */ | 
|---|
| 713 | if (sg_policy->policy->fast_switch_enabled) | 
|---|
| 714 | return; | 
|---|
| 715 |  | 
|---|
| 716 | kthread_flush_worker(worker: &sg_policy->worker); | 
|---|
| 717 | kthread_stop(k: sg_policy->thread); | 
|---|
| 718 | mutex_destroy(lock: &sg_policy->work_lock); | 
|---|
| 719 | } | 
|---|
| 720 |  | 
|---|
| 721 | static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) | 
|---|
| 722 | { | 
|---|
| 723 | struct sugov_tunables *tunables; | 
|---|
| 724 |  | 
|---|
| 725 | tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); | 
|---|
| 726 | if (tunables) { | 
|---|
| 727 | gov_attr_set_init(attr_set: &tunables->attr_set, list_node: &sg_policy->tunables_hook); | 
|---|
| 728 | if (!have_governor_per_policy()) | 
|---|
| 729 | global_tunables = tunables; | 
|---|
| 730 | } | 
|---|
| 731 | return tunables; | 
|---|
| 732 | } | 
|---|
| 733 |  | 
|---|
| 734 | static void sugov_clear_global_tunables(void) | 
|---|
| 735 | { | 
|---|
| 736 | if (!have_governor_per_policy()) | 
|---|
| 737 | global_tunables = NULL; | 
|---|
| 738 | } | 
|---|
| 739 |  | 
|---|
| 740 | static int sugov_init(struct cpufreq_policy *policy) | 
|---|
| 741 | { | 
|---|
| 742 | struct sugov_policy *sg_policy; | 
|---|
| 743 | struct sugov_tunables *tunables; | 
|---|
| 744 | int ret = 0; | 
|---|
| 745 |  | 
|---|
| 746 | /* State should be equivalent to EXIT */ | 
|---|
| 747 | if (policy->governor_data) | 
|---|
| 748 | return -EBUSY; | 
|---|
| 749 |  | 
|---|
| 750 | cpufreq_enable_fast_switch(policy); | 
|---|
| 751 |  | 
|---|
| 752 | sg_policy = sugov_policy_alloc(policy); | 
|---|
| 753 | if (!sg_policy) { | 
|---|
| 754 | ret = -ENOMEM; | 
|---|
| 755 | goto disable_fast_switch; | 
|---|
| 756 | } | 
|---|
| 757 |  | 
|---|
| 758 | ret = sugov_kthread_create(sg_policy); | 
|---|
| 759 | if (ret) | 
|---|
| 760 | goto free_sg_policy; | 
|---|
| 761 |  | 
|---|
| 762 | mutex_lock(lock: &global_tunables_lock); | 
|---|
| 763 |  | 
|---|
| 764 | if (global_tunables) { | 
|---|
| 765 | if (WARN_ON(have_governor_per_policy())) { | 
|---|
| 766 | ret = -EINVAL; | 
|---|
| 767 | goto stop_kthread; | 
|---|
| 768 | } | 
|---|
| 769 | policy->governor_data = sg_policy; | 
|---|
| 770 | sg_policy->tunables = global_tunables; | 
|---|
| 771 |  | 
|---|
| 772 | gov_attr_set_get(attr_set: &global_tunables->attr_set, list_node: &sg_policy->tunables_hook); | 
|---|
| 773 | goto out; | 
|---|
| 774 | } | 
|---|
| 775 |  | 
|---|
| 776 | tunables = sugov_tunables_alloc(sg_policy); | 
|---|
| 777 | if (!tunables) { | 
|---|
| 778 | ret = -ENOMEM; | 
|---|
| 779 | goto stop_kthread; | 
|---|
| 780 | } | 
|---|
| 781 |  | 
|---|
| 782 | tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy); | 
|---|
| 783 |  | 
|---|
| 784 | policy->governor_data = sg_policy; | 
|---|
| 785 | sg_policy->tunables = tunables; | 
|---|
| 786 |  | 
|---|
| 787 | ret = kobject_init_and_add(kobj: &tunables->attr_set.kobj, ktype: &sugov_tunables_ktype, | 
|---|
| 788 | parent: get_governor_parent_kobj(policy), fmt: "%s", | 
|---|
| 789 | schedutil_gov.name); | 
|---|
| 790 | if (ret) | 
|---|
| 791 | goto fail; | 
|---|
| 792 |  | 
|---|
| 793 | out: | 
|---|
| 794 | /* | 
|---|
| 795 | * Schedutil is the preferred governor for EAS, so rebuild sched domains | 
|---|
| 796 | * on governor changes to make sure the scheduler knows about them. | 
|---|
| 797 | */ | 
|---|
| 798 | em_rebuild_sched_domains(); | 
|---|
| 799 | mutex_unlock(lock: &global_tunables_lock); | 
|---|
| 800 | return 0; | 
|---|
| 801 |  | 
|---|
| 802 | fail: | 
|---|
| 803 | kobject_put(kobj: &tunables->attr_set.kobj); | 
|---|
| 804 | policy->governor_data = NULL; | 
|---|
| 805 | sugov_clear_global_tunables(); | 
|---|
| 806 |  | 
|---|
| 807 | stop_kthread: | 
|---|
| 808 | sugov_kthread_stop(sg_policy); | 
|---|
| 809 | mutex_unlock(lock: &global_tunables_lock); | 
|---|
| 810 |  | 
|---|
| 811 | free_sg_policy: | 
|---|
| 812 | sugov_policy_free(sg_policy); | 
|---|
| 813 |  | 
|---|
| 814 | disable_fast_switch: | 
|---|
| 815 | cpufreq_disable_fast_switch(policy); | 
|---|
| 816 |  | 
|---|
| 817 | pr_err( "initialization failed (error %d)\n", ret); | 
|---|
| 818 | return ret; | 
|---|
| 819 | } | 
|---|
| 820 |  | 
|---|
| 821 | static void sugov_exit(struct cpufreq_policy *policy) | 
|---|
| 822 | { | 
|---|
| 823 | struct sugov_policy *sg_policy = policy->governor_data; | 
|---|
| 824 | struct sugov_tunables *tunables = sg_policy->tunables; | 
|---|
| 825 | unsigned int count; | 
|---|
| 826 |  | 
|---|
| 827 | mutex_lock(lock: &global_tunables_lock); | 
|---|
| 828 |  | 
|---|
| 829 | count = gov_attr_set_put(attr_set: &tunables->attr_set, list_node: &sg_policy->tunables_hook); | 
|---|
| 830 | policy->governor_data = NULL; | 
|---|
| 831 | if (!count) | 
|---|
| 832 | sugov_clear_global_tunables(); | 
|---|
| 833 |  | 
|---|
| 834 | mutex_unlock(lock: &global_tunables_lock); | 
|---|
| 835 |  | 
|---|
| 836 | sugov_kthread_stop(sg_policy); | 
|---|
| 837 | sugov_policy_free(sg_policy); | 
|---|
| 838 | cpufreq_disable_fast_switch(policy); | 
|---|
| 839 |  | 
|---|
| 840 | em_rebuild_sched_domains(); | 
|---|
| 841 | } | 
|---|
| 842 |  | 
|---|
| 843 | static int sugov_start(struct cpufreq_policy *policy) | 
|---|
| 844 | { | 
|---|
| 845 | struct sugov_policy *sg_policy = policy->governor_data; | 
|---|
| 846 | void (*uu)(struct update_util_data *data, u64 time, unsigned int flags); | 
|---|
| 847 | unsigned int cpu; | 
|---|
| 848 |  | 
|---|
| 849 | sg_policy->freq_update_delay_ns	= sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; | 
|---|
| 850 | sg_policy->last_freq_update_time	= 0; | 
|---|
| 851 | sg_policy->next_freq			= 0; | 
|---|
| 852 | sg_policy->work_in_progress		= false; | 
|---|
| 853 | sg_policy->limits_changed		= false; | 
|---|
| 854 | sg_policy->cached_raw_freq		= 0; | 
|---|
| 855 |  | 
|---|
| 856 | sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); | 
|---|
| 857 |  | 
|---|
| 858 | if (policy_is_shared(policy)) | 
|---|
| 859 | uu = sugov_update_shared; | 
|---|
| 860 | else if (policy->fast_switch_enabled && cpufreq_driver_has_adjust_perf()) | 
|---|
| 861 | uu = sugov_update_single_perf; | 
|---|
| 862 | else | 
|---|
| 863 | uu = sugov_update_single_freq; | 
|---|
| 864 |  | 
|---|
| 865 | for_each_cpu(cpu, policy->cpus) { | 
|---|
| 866 | struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); | 
|---|
| 867 |  | 
|---|
| 868 | memset(s: sg_cpu, c: 0, n: sizeof(*sg_cpu)); | 
|---|
| 869 | sg_cpu->cpu = cpu; | 
|---|
| 870 | sg_cpu->sg_policy = sg_policy; | 
|---|
| 871 | cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, uu); | 
|---|
| 872 | } | 
|---|
| 873 | return 0; | 
|---|
| 874 | } | 
|---|
| 875 |  | 
|---|
| 876 | static void sugov_stop(struct cpufreq_policy *policy) | 
|---|
| 877 | { | 
|---|
| 878 | struct sugov_policy *sg_policy = policy->governor_data; | 
|---|
| 879 | unsigned int cpu; | 
|---|
| 880 |  | 
|---|
| 881 | for_each_cpu(cpu, policy->cpus) | 
|---|
| 882 | cpufreq_remove_update_util_hook(cpu); | 
|---|
| 883 |  | 
|---|
| 884 | synchronize_rcu(); | 
|---|
| 885 |  | 
|---|
| 886 | if (!policy->fast_switch_enabled) { | 
|---|
| 887 | irq_work_sync(work: &sg_policy->irq_work); | 
|---|
| 888 | kthread_cancel_work_sync(work: &sg_policy->work); | 
|---|
| 889 | } | 
|---|
| 890 | } | 
|---|
| 891 |  | 
|---|
| 892 | static void sugov_limits(struct cpufreq_policy *policy) | 
|---|
| 893 | { | 
|---|
| 894 | struct sugov_policy *sg_policy = policy->governor_data; | 
|---|
| 895 |  | 
|---|
| 896 | if (!policy->fast_switch_enabled) { | 
|---|
| 897 | mutex_lock(lock: &sg_policy->work_lock); | 
|---|
| 898 | cpufreq_policy_apply_limits(policy); | 
|---|
| 899 | mutex_unlock(lock: &sg_policy->work_lock); | 
|---|
| 900 | } | 
|---|
| 901 |  | 
|---|
| 902 | /* | 
|---|
| 903 | * The limits_changed update below must take place before the updates | 
|---|
| 904 | * of policy limits in cpufreq_set_policy() or a policy limits update | 
|---|
| 905 | * might be missed, so use a memory barrier to ensure it. | 
|---|
| 906 | * | 
|---|
| 907 | * This pairs with the memory barrier in sugov_should_update_freq(). | 
|---|
| 908 | */ | 
|---|
| 909 | smp_wmb(); | 
|---|
| 910 |  | 
|---|
| 911 | WRITE_ONCE(sg_policy->limits_changed, true); | 
|---|
| 912 | } | 
|---|
| 913 |  | 
|---|
| 914 | static struct cpufreq_governor schedutil_gov = { | 
|---|
| 915 | .name			= "schedutil", | 
|---|
| 916 | .owner			= THIS_MODULE, | 
|---|
| 917 | .flags			= CPUFREQ_GOV_DYNAMIC_SWITCHING, | 
|---|
| 918 | .init			= sugov_init, | 
|---|
| 919 | .exit			= sugov_exit, | 
|---|
| 920 | .start			= sugov_start, | 
|---|
| 921 | .stop			= sugov_stop, | 
|---|
| 922 | .limits			= sugov_limits, | 
|---|
| 923 | }; | 
|---|
| 924 |  | 
|---|
| 925 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL | 
|---|
| 926 | struct cpufreq_governor *cpufreq_default_governor(void) | 
|---|
| 927 | { | 
|---|
| 928 | return &schedutil_gov; | 
|---|
| 929 | } | 
|---|
| 930 | #endif | 
|---|
| 931 |  | 
|---|
| 932 | bool sugov_is_governor(struct cpufreq_policy *policy) | 
|---|
| 933 | { | 
|---|
| 934 | return policy->governor == &schedutil_gov; | 
|---|
| 935 | } | 
|---|
| 936 |  | 
|---|
| 937 | cpufreq_governor_init(schedutil_gov); | 
|---|
| 938 |  | 
|---|