| 1 | // SPDX-License-Identifier: GPL-2.0 | 
|---|
| 2 | /* | 
|---|
| 3 | *	Precise Delay Loops for i386 | 
|---|
| 4 | * | 
|---|
| 5 | *	Copyright (C) 1993 Linus Torvalds | 
|---|
| 6 | *	Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> | 
|---|
| 7 | *	Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com> | 
|---|
| 8 | * | 
|---|
| 9 | *	The __delay function must _NOT_ be inlined as its execution time | 
|---|
| 10 | *	depends wildly on alignment on many x86 processors. The additional | 
|---|
| 11 | *	jump magic is needed to get the timing stable on all the CPU's | 
|---|
| 12 | *	we have to worry about. | 
|---|
| 13 | */ | 
|---|
| 14 |  | 
|---|
| 15 | #include <linux/export.h> | 
|---|
| 16 | #include <linux/sched.h> | 
|---|
| 17 | #include <linux/timex.h> | 
|---|
| 18 | #include <linux/preempt.h> | 
|---|
| 19 | #include <linux/delay.h> | 
|---|
| 20 |  | 
|---|
| 21 | #include <asm/processor.h> | 
|---|
| 22 | #include <asm/delay.h> | 
|---|
| 23 | #include <asm/timer.h> | 
|---|
| 24 | #include <asm/mwait.h> | 
|---|
| 25 |  | 
|---|
| 26 | #ifdef CONFIG_SMP | 
|---|
| 27 | # include <asm/smp.h> | 
|---|
| 28 | #endif | 
|---|
| 29 |  | 
|---|
| 30 | static void delay_loop(u64 __loops); | 
|---|
| 31 |  | 
|---|
| 32 | /* | 
|---|
| 33 | * Calibration and selection of the delay mechanism happens only once | 
|---|
| 34 | * during boot. | 
|---|
| 35 | */ | 
|---|
| 36 | static void (*delay_fn)(u64) __ro_after_init = delay_loop; | 
|---|
| 37 | static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init; | 
|---|
| 38 |  | 
|---|
| 39 | /* simple loop based delay: */ | 
|---|
| 40 | static void delay_loop(u64 __loops) | 
|---|
| 41 | { | 
|---|
| 42 | unsigned long loops = (unsigned long)__loops; | 
|---|
| 43 |  | 
|---|
| 44 | asm volatile( | 
|---|
| 45 | "	test %0,%0	\n" | 
|---|
| 46 | "	jz 3f		\n" | 
|---|
| 47 | "	jmp 1f		\n" | 
|---|
| 48 |  | 
|---|
| 49 | ".align 16		\n" | 
|---|
| 50 | "1:	jmp 2f		\n" | 
|---|
| 51 |  | 
|---|
| 52 | ".align 16		\n" | 
|---|
| 53 | "2:	dec %0		\n" | 
|---|
| 54 | "	jnz 2b		\n" | 
|---|
| 55 | "3:	dec %0		\n" | 
|---|
| 56 |  | 
|---|
| 57 | : "+a"(loops) | 
|---|
| 58 | : | 
|---|
| 59 | ); | 
|---|
| 60 | } | 
|---|
| 61 |  | 
|---|
| 62 | /* TSC based delay: */ | 
|---|
| 63 | static void delay_tsc(u64 cycles) | 
|---|
| 64 | { | 
|---|
| 65 | u64 bclock, now; | 
|---|
| 66 | int cpu; | 
|---|
| 67 |  | 
|---|
| 68 | preempt_disable(); | 
|---|
| 69 | cpu = smp_processor_id(); | 
|---|
| 70 | bclock = rdtsc_ordered(); | 
|---|
| 71 | for (;;) { | 
|---|
| 72 | now = rdtsc_ordered(); | 
|---|
| 73 | if ((now - bclock) >= cycles) | 
|---|
| 74 | break; | 
|---|
| 75 |  | 
|---|
| 76 | /* Allow RT tasks to run */ | 
|---|
| 77 | preempt_enable(); | 
|---|
| 78 | native_pause(); | 
|---|
| 79 | preempt_disable(); | 
|---|
| 80 |  | 
|---|
| 81 | /* | 
|---|
| 82 | * It is possible that we moved to another CPU, and | 
|---|
| 83 | * since TSC's are per-cpu we need to calculate | 
|---|
| 84 | * that. The delay must guarantee that we wait "at | 
|---|
| 85 | * least" the amount of time. Being moved to another | 
|---|
| 86 | * CPU could make the wait longer but we just need to | 
|---|
| 87 | * make sure we waited long enough. Rebalance the | 
|---|
| 88 | * counter for this CPU. | 
|---|
| 89 | */ | 
|---|
| 90 | if (unlikely(cpu != smp_processor_id())) { | 
|---|
| 91 | cycles -= (now - bclock); | 
|---|
| 92 | cpu = smp_processor_id(); | 
|---|
| 93 | bclock = rdtsc_ordered(); | 
|---|
| 94 | } | 
|---|
| 95 | } | 
|---|
| 96 | preempt_enable(); | 
|---|
| 97 | } | 
|---|
| 98 |  | 
|---|
| 99 | /* | 
|---|
| 100 | * On Intel the TPAUSE instruction waits until any of: | 
|---|
| 101 | * 1) the TSC counter exceeds the value provided in EDX:EAX | 
|---|
| 102 | * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded | 
|---|
| 103 | * 3) an external interrupt occurs | 
|---|
| 104 | */ | 
|---|
| 105 | static void delay_halt_tpause(u64 start, u64 cycles) | 
|---|
| 106 | { | 
|---|
| 107 | u64 until = start + cycles; | 
|---|
| 108 | u32 eax, edx; | 
|---|
| 109 |  | 
|---|
| 110 | eax = lower_32_bits(until); | 
|---|
| 111 | edx = upper_32_bits(until); | 
|---|
| 112 |  | 
|---|
| 113 | /* | 
|---|
| 114 | * Hard code the deeper (C0.2) sleep state because exit latency is | 
|---|
| 115 | * small compared to the "microseconds" that usleep() will delay. | 
|---|
| 116 | */ | 
|---|
| 117 | __tpause(TPAUSE_C02_STATE, edx, eax); | 
|---|
| 118 | } | 
|---|
| 119 |  | 
|---|
| 120 | /* | 
|---|
| 121 | * On some AMD platforms, MWAITX has a configurable 32-bit timer, that | 
|---|
| 122 | * counts with TSC frequency. The input value is the number of TSC cycles | 
|---|
| 123 | * to wait. MWAITX will also exit when the timer expires. | 
|---|
| 124 | */ | 
|---|
| 125 | static void delay_halt_mwaitx(u64 unused, u64 cycles) | 
|---|
| 126 | { | 
|---|
| 127 | u64 delay; | 
|---|
| 128 |  | 
|---|
| 129 | delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles); | 
|---|
| 130 | /* | 
|---|
| 131 | * Use cpu_tss_rw as a cacheline-aligned, seldom accessed per-cpu | 
|---|
| 132 | * variable as the monitor target. | 
|---|
| 133 | */ | 
|---|
| 134 | __monitorx(raw_cpu_ptr(&cpu_tss_rw), ecx: 0, edx: 0); | 
|---|
| 135 |  | 
|---|
| 136 | /* | 
|---|
| 137 | * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not | 
|---|
| 138 | * enter any deep C-state and we use it here in delay() to minimize | 
|---|
| 139 | * wakeup latency. | 
|---|
| 140 | */ | 
|---|
| 141 | __mwaitx(MWAITX_DISABLE_CSTATES, ebx: delay, MWAITX_ECX_TIMER_ENABLE); | 
|---|
| 142 | } | 
|---|
| 143 |  | 
|---|
| 144 | /* | 
|---|
| 145 | * Call a vendor specific function to delay for a given amount of time. Because | 
|---|
| 146 | * these functions may return earlier than requested, check for actual elapsed | 
|---|
| 147 | * time and call again until done. | 
|---|
| 148 | */ | 
|---|
| 149 | static void delay_halt(u64 __cycles) | 
|---|
| 150 | { | 
|---|
| 151 | u64 start, end, cycles = __cycles; | 
|---|
| 152 |  | 
|---|
| 153 | /* | 
|---|
| 154 | * Timer value of 0 causes MWAITX to wait indefinitely, unless there | 
|---|
| 155 | * is a store on the memory monitored by MONITORX. | 
|---|
| 156 | */ | 
|---|
| 157 | if (!cycles) | 
|---|
| 158 | return; | 
|---|
| 159 |  | 
|---|
| 160 | start = rdtsc_ordered(); | 
|---|
| 161 |  | 
|---|
| 162 | for (;;) { | 
|---|
| 163 | delay_halt_fn(start, cycles); | 
|---|
| 164 | end = rdtsc_ordered(); | 
|---|
| 165 |  | 
|---|
| 166 | if (cycles <= end - start) | 
|---|
| 167 | break; | 
|---|
| 168 |  | 
|---|
| 169 | cycles -= end - start; | 
|---|
| 170 | start = end; | 
|---|
| 171 | } | 
|---|
| 172 | } | 
|---|
| 173 |  | 
|---|
| 174 | void __init use_tsc_delay(void) | 
|---|
| 175 | { | 
|---|
| 176 | if (delay_fn == delay_loop) | 
|---|
| 177 | delay_fn = delay_tsc; | 
|---|
| 178 | } | 
|---|
| 179 |  | 
|---|
| 180 | void __init use_tpause_delay(void) | 
|---|
| 181 | { | 
|---|
| 182 | delay_halt_fn = delay_halt_tpause; | 
|---|
| 183 | delay_fn = delay_halt; | 
|---|
| 184 | } | 
|---|
| 185 |  | 
|---|
| 186 | void use_mwaitx_delay(void) | 
|---|
| 187 | { | 
|---|
| 188 | delay_halt_fn = delay_halt_mwaitx; | 
|---|
| 189 | delay_fn = delay_halt; | 
|---|
| 190 | } | 
|---|
| 191 |  | 
|---|
| 192 | int read_current_timer(unsigned long *timer_val) | 
|---|
| 193 | { | 
|---|
| 194 | if (delay_fn == delay_tsc) { | 
|---|
| 195 | *timer_val = rdtsc(); | 
|---|
| 196 | return 0; | 
|---|
| 197 | } | 
|---|
| 198 | return -1; | 
|---|
| 199 | } | 
|---|
| 200 |  | 
|---|
| 201 | void __delay(unsigned long loops) | 
|---|
| 202 | { | 
|---|
| 203 | delay_fn(loops); | 
|---|
| 204 | } | 
|---|
| 205 | EXPORT_SYMBOL(__delay); | 
|---|
| 206 |  | 
|---|
| 207 | noinline void __const_udelay(unsigned long xloops) | 
|---|
| 208 | { | 
|---|
| 209 | unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy; | 
|---|
| 210 | int d0; | 
|---|
| 211 |  | 
|---|
| 212 | xloops *= 4; | 
|---|
| 213 | asm( "mull %%edx" | 
|---|
| 214 | : "=d"(xloops), "=&a"(d0) | 
|---|
| 215 | : "1"(xloops), "0"(lpj * (HZ / 4))); | 
|---|
| 216 |  | 
|---|
| 217 | __delay(++xloops); | 
|---|
| 218 | } | 
|---|
| 219 | EXPORT_SYMBOL(__const_udelay); | 
|---|
| 220 |  | 
|---|
| 221 | void __udelay(unsigned long usecs) | 
|---|
| 222 | { | 
|---|
| 223 | __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ | 
|---|
| 224 | } | 
|---|
| 225 | EXPORT_SYMBOL(__udelay); | 
|---|
| 226 |  | 
|---|
| 227 | void __ndelay(unsigned long nsecs) | 
|---|
| 228 | { | 
|---|
| 229 | __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ | 
|---|
| 230 | } | 
|---|
| 231 | EXPORT_SYMBOL(__ndelay); | 
|---|
| 232 |  | 
|---|