gettimeofday.h source code [Linux/arch/x86/include/asm/vdso/gettimeofday.h]

1	/ SPDX-License-Identifier: GPL-2.0 /
2	/*
3	* Fast user context implementation of clock_gettime, gettimeofday, and time.
4	*
5	* Copyright (C) 2019 ARM Limited.
6	* Copyright 2006 Andi Kleen, SUSE Labs.
7	* 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
8	* sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
9	*/
10	#ifndef __ASM_VDSO_GETTIMEOFDAY_H
11	#define __ASM_VDSO_GETTIMEOFDAY_H
12
13	#ifndef __ASSEMBLER__
14
15	#include <uapi/linux/time.h>
16	#include <asm/vgtod.h>
17	#include <asm/unistd.h>
18	#include <asm/msr.h>
19	#include <asm/pvclock.h>
20	#include <clocksource/hyperv_timer.h>
21
22	#define VDSO_HAS_TIME 1
23
24	#define VDSO_HAS_CLOCK_GETRES 1
25
26	/*
27	* Declare the memory-mapped vclock data pages. These come from hypervisors.
28	* If we ever reintroduce something like direct access to an MMIO clock like
29	* the HPET again, it will go here as well.
30	*
31	* A load from any of these pages will segfault if the clock in question is
32	* disabled, so appropriate compiler barriers and checks need to be used
33	* to prevent stray loads.
34	*
35	* These declarations MUST NOT be const. The compiler will assume that
36	* an extern const variable has genuinely constant contents, and the
37	* resulting code won't work, since the whole point is that these pages
38	* change over time, possibly while we're accessing them.
39	*/
40
41	#ifdef CONFIG_PARAVIRT_CLOCK
42	/*
43	* This is the vCPU 0 pvclock page. We only use pvclock from the vDSO
44	* if the hypervisor tells us that all vCPUs can get valid data from the
45	* vCPU 0 page.
46	*/
47	extern struct pvclock_vsyscall_time_info pvclock_page
48	__attribute__((visibility("hidden")));
49	#endif
50
51	#ifdef CONFIG_HYPERV_TIMER
52	extern struct ms_hyperv_tsc_page hvclock_page
53	__attribute__((visibility("hidden")));
54	#endif
55
56	#ifndef BUILD_VDSO32
57
58	static __always_inline
59	long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
60	{
61	long ret;
62
63	asm ("syscall" : "=a" (ret), "=m" (*_ts) :
64	"0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) :
65	"rcx", "r11");
66
67	return ret;
68	}
69
70	static __always_inline
71	long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
72	struct timezone *_tz)
73	{
74	long ret;
75
76	asm("syscall" : "=a" (ret) :
77	"0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory");
78
79	return ret;
80	}
81
82	static __always_inline
83	long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
84	{
85	long ret;
86
87	asm ("syscall" : "=a" (ret), "=m" (*_ts) :
88	"0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) :
89	"rcx", "r11");
90
91	return ret;
92	}
93
94	#else
95
96	static __always_inline
97	long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
98	{
99	long ret;
100
101	asm (
102	"mov %%ebx, %%edx \n"
103	"mov %[clock], %%ebx \n"
104	"call __kernel_vsyscall \n"
105	"mov %%edx, %%ebx \n"
106	: "=a" (ret), "=m" (*_ts)
107	: "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts)
108	: "edx");
109
110	return ret;
111	}
112
113	static __always_inline
114	long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
115	{
116	long ret;
117
118	asm (
119	"mov %%ebx, %%edx \n"
120	"mov %[clock], %%ebx \n"
121	"call __kernel_vsyscall \n"
122	"mov %%edx, %%ebx \n"
123	: "=a" (ret), "=m" (*_ts)
124	: "0" (__NR_clock_gettime), [clock] "g" (_clkid), "c" (_ts)
125	: "edx");
126
127	return ret;
128	}
129
130	static __always_inline
131	long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
132	struct timezone *_tz)
133	{
134	long ret;
135
136	asm(
137	"mov %%ebx, %%edx \n"
138	"mov %2, %%ebx \n"
139	"call __kernel_vsyscall \n"
140	"mov %%edx, %%ebx \n"
141	: "=a" (ret)
142	: "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz)
143	: "memory", "edx");
144
145	return ret;
146	}
147
148	static __always_inline long
149	clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
150	{
151	long ret;
152
153	asm (
154	"mov %%ebx, %%edx \n"
155	"mov %[clock], %%ebx \n"
156	"call __kernel_vsyscall \n"
157	"mov %%edx, %%ebx \n"
158	: "=a" (ret), "=m" (*_ts)
159	: "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts)
160	: "edx");
161
162	return ret;
163	}
164
165	static __always_inline
166	long clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
167	{
168	long ret;
169
170	asm (
171	"mov %%ebx, %%edx \n"
172	"mov %[clock], %%ebx \n"
173	"call __kernel_vsyscall \n"
174	"mov %%edx, %%ebx \n"
175	: "=a" (ret), "=m" (*_ts)
176	: "0" (__NR_clock_getres), [clock] "g" (_clkid), "c" (_ts)
177	: "edx");
178
179	return ret;
180	}
181
182	#endif
183
184	#ifdef CONFIG_PARAVIRT_CLOCK
185	static u64 vread_pvclock(void)
186	{
187	const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti;
188	u32 version;
189	u64 ret;
190
191	/*
192	* Note: The kernel and hypervisor must guarantee that cpu ID
193	* number maps 1:1 to per-CPU pvclock time info.
194	*
195	* Because the hypervisor is entirely unaware of guest userspace
196	* preemption, it cannot guarantee that per-CPU pvclock time
197	* info is updated if the underlying CPU changes or that that
198	* version is increased whenever underlying CPU changes.
199	*
200	* On KVM, we are guaranteed that pvti updates for any vCPU are
201	* atomic as seen by all vCPUs. This is an even stronger
202	* guarantee than we get with a normal seqlock.
203	*
204	* On Xen, we don't appear to have that guarantee, but Xen still
205	* supplies a valid seqlock using the version field.
206	*
207	* We only do pvclock vdso timing at all if
208	* PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
209	* mean that all vCPUs have matching pvti and that the TSC is
210	* synced, so we can just look at vCPU 0's pvti.
211	*/
212
213	do {
214	version = pvclock_read_begin(src: pvti);
215
216	if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
217	return U64_MAX;
218
219	ret = __pvclock_read_cycles(src: pvti, tsc: rdtsc_ordered());
220	} while (pvclock_read_retry(src: pvti, version));
221
222	return ret & S64_MAX;
223	}
224	#endif
225
226	#ifdef CONFIG_HYPERV_TIMER
227	static u64 vread_hvclock(void)
228	{
229	u64 tsc, time;
230
231	if (hv_read_tsc_page_tsc(&hvclock_page, &tsc, &time))
232	return time & S64_MAX;
233
234	return U64_MAX;
235	}
236	#endif
237
238	static inline u64 __arch_get_hw_counter(s32 clock_mode,
239	const struct vdso_time_data *vd)
240	{
241	if (likely(clock_mode == VDSO_CLOCKMODE_TSC))
242	return (u64)rdtsc_ordered() & S64_MAX;
243	/*
244	* For any memory-mapped vclock type, we need to make sure that gcc
245	* doesn't cleverly hoist a load before the mode check. Otherwise we
246	* might end up touching the memory-mapped page even if the vclock in
247	* question isn't enabled, which will segfault. Hence the barriers.
248	*/
249	#ifdef CONFIG_PARAVIRT_CLOCK
250	if (clock_mode == VDSO_CLOCKMODE_PVCLOCK) {
251	barrier();
252	return vread_pvclock();
253	}
254	#endif
255	#ifdef CONFIG_HYPERV_TIMER
256	if (clock_mode == VDSO_CLOCKMODE_HVCLOCK) {
257	barrier();
258	return vread_hvclock();
259	}
260	#endif
261	return U64_MAX;
262	}
263
264	static inline bool arch_vdso_clocksource_ok(const struct vdso_clock *vc)
265	{
266	return true;
267	}
268	#define vdso_clocksource_ok arch_vdso_clocksource_ok
269
270	/*
271	* Clocksource read value validation to handle PV and HyperV clocksources
272	* which can be invalidated asynchronously and indicate invalidation by
273	* returning U64_MAX, which can be effectively tested by checking for a
274	* negative value after casting it to s64.
275	*
276	* This effectively forces a S64_MAX mask on the calculations, unlike the
277	* U64_MAX mask normally used by x86 clocksources.
278	*/
279	static inline bool arch_vdso_cycles_ok(u64 cycles)
280	{
281	return (s64)cycles >= `0`;
282	}
283	#define vdso_cycles_ok arch_vdso_cycles_ok
284
285	/*
286	* x86 specific calculation of nanoseconds for the current cycle count
287	*
288	* The regular implementation assumes that clocksource reads are globally
289	* monotonic. The TSC can be slightly off across sockets which can cause
290	* the regular delta calculation (@cycles - @last) to return a huge time
291	* jump.
292	*
293	* Therefore it needs to be verified that @cycles are greater than
294	* @vd->cycles_last. If not then use @vd->cycles_last, which is the base
295	* time of the current conversion period.
296	*
297	* This variant also uses a custom mask because while the clocksource mask of
298	* all the VDSO capable clocksources on x86 is U64_MAX, the above code uses
299	* U64_MASK as an exception value, additionally arch_vdso_cycles_ok() above
300	* declares everything with the MSB/Sign-bit set as invalid. Therefore the
301	* effective mask is S64_MAX.
302	*/
303	static __always_inline u64 vdso_calc_ns(const struct vdso_clock *vc, u64 cycles, u64 base)
304	{
305	u64 delta = cycles - vc->cycle_last;
306
307	/*
308	* Negative motion and deltas which can cause multiplication
309	* overflow require special treatment. This check covers both as
310	* negative motion is guaranteed to be greater than @vc::max_cycles
311	* due to unsigned comparison.
312	*
313	* Due to the MSB/Sign-bit being used as invalid marker (see
314	* arch_vdso_cycles_ok() above), the effective mask is S64_MAX, but that
315	* case is also unlikely and will also take the unlikely path here.
316	*/
317	if (unlikely(delta > vc->max_cycles)) {
318	/*
319	* Due to the above mentioned TSC wobbles, filter out
320	* negative motion. Per the above masking, the effective
321	* sign bit is now bit 62.
322	*/
323	if (delta & (`1ULL` << `62`))
324	return base >> vc->shift;
325
326	/ Handle multiplication overflow gracefully /
327	return mul_u64_u32_add_u64_shr(a: delta & S64_MAX, mul: vc->mult, b: base, shift: vc->shift);
328	}
329
330	return ((delta * vc->mult) + base) >> vc->shift;
331	}
332	#define vdso_calc_ns vdso_calc_ns
333
334	#endif /* !__ASSEMBLER__ */
335
336	#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
337

Browse the source code of Linux/arch/x86/include/asm/vdso/gettimeofday.h