| 1 | /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ | 
|---|
| 2 |  | 
|---|
| 3 | /* | 
|---|
| 4 | * This structure provides a vDSO-style clock to VM guests, exposing the | 
|---|
| 5 | * relationship (or lack thereof) between the CPU clock (TSC, timebase, arch | 
|---|
| 6 | * counter, etc.) and real time. It is designed to address the problem of | 
|---|
| 7 | * live migration, which other clock enlightenments do not. | 
|---|
| 8 | * | 
|---|
| 9 | * When a guest is live migrated, this affects the clock in two ways. | 
|---|
| 10 | * | 
|---|
| 11 | * First, even between identical hosts the actual frequency of the underlying | 
|---|
| 12 | * counter will change within the tolerances of its specification (typically | 
|---|
| 13 | * ±50PPM, or 4 seconds a day). This frequency also varies over time on the | 
|---|
| 14 | * same host, but can be tracked by NTP as it generally varies slowly. With | 
|---|
| 15 | * live migration there is a step change in the frequency, with no warning. | 
|---|
| 16 | * | 
|---|
| 17 | * Second, there may be a step change in the value of the counter itself, as | 
|---|
| 18 | * its accuracy is limited by the precision of the NTP synchronization on the | 
|---|
| 19 | * source and destination hosts. | 
|---|
| 20 | * | 
|---|
| 21 | * So any calibration (NTP, PTP, etc.) which the guest has done on the source | 
|---|
| 22 | * host before migration is invalid, and needs to be redone on the new host. | 
|---|
| 23 | * | 
|---|
| 24 | * In its most basic mode, this structure provides only an indication to the | 
|---|
| 25 | * guest that live migration has occurred. This allows the guest to know that | 
|---|
| 26 | * its clock is invalid and take remedial action. For applications that need | 
|---|
| 27 | * reliable accurate timestamps (e.g. distributed databases), the structure | 
|---|
| 28 | * can be mapped all the way to userspace. This allows the application to see | 
|---|
| 29 | * directly for itself that the clock is disrupted and take appropriate | 
|---|
| 30 | * action, even when using a vDSO-style method to get the time instead of a | 
|---|
| 31 | * system call. | 
|---|
| 32 | * | 
|---|
| 33 | * In its more advanced mode. this structure can also be used to expose the | 
|---|
| 34 | * precise relationship of the CPU counter to real time, as calibrated by the | 
|---|
| 35 | * host. This means that userspace applications can have accurate time | 
|---|
| 36 | * immediately after live migration, rather than having to pause operations | 
|---|
| 37 | * and wait for NTP to recover. This mode does, of course, rely on the | 
|---|
| 38 | * counter being reliable and consistent across CPUs. | 
|---|
| 39 | * | 
|---|
| 40 | * Note that this must be true UTC, never with smeared leap seconds. If a | 
|---|
| 41 | * guest wishes to construct a smeared clock, it can do so. Presenting a | 
|---|
| 42 | * smeared clock through this interface would be problematic because it | 
|---|
| 43 | * actually messes with the apparent counter *period*. A linear smearing | 
|---|
| 44 | * of 1 ms per second would effectively tweak the counter period by 1000PPM | 
|---|
| 45 | * at the start/end of the smearing period, while a sinusoidal smear would | 
|---|
| 46 | * basically be impossible to represent. | 
|---|
| 47 | * | 
|---|
| 48 | * This structure is offered with the intent that it be adopted into the | 
|---|
| 49 | * nascent virtio-rtc standard, as a virtio-rtc that does not address the live | 
|---|
| 50 | * migration problem seems a little less than fit for purpose. For that | 
|---|
| 51 | * reason, certain fields use precisely the same numeric definitions as in | 
|---|
| 52 | * the virtio-rtc proposal. The structure can also be exposed through an ACPI | 
|---|
| 53 | * device with the CID "VMCLOCK", modelled on the "VMGENID" device except for | 
|---|
| 54 | * the fact that it uses a real _CRS to convey the address of the structure | 
|---|
| 55 | * (which should be a full page, to allow for mapping directly to userspace). | 
|---|
| 56 | */ | 
|---|
| 57 |  | 
|---|
| 58 | #ifndef __VMCLOCK_ABI_H__ | 
|---|
| 59 | #define __VMCLOCK_ABI_H__ | 
|---|
| 60 |  | 
|---|
| 61 | #include <linux/types.h> | 
|---|
| 62 |  | 
|---|
| 63 | struct vmclock_abi { | 
|---|
| 64 | /* CONSTANT FIELDS */ | 
|---|
| 65 | __le32 magic; | 
|---|
| 66 | #define VMCLOCK_MAGIC	0x4b4c4356 /* "VCLK" */ | 
|---|
| 67 | __le32 size;		/* Size of region containing this structure */ | 
|---|
| 68 | __le16 version;	/* 1 */ | 
|---|
| 69 | __u8 counter_id; /* Matches VIRTIO_RTC_COUNTER_xxx except INVALID */ | 
|---|
| 70 | #define VMCLOCK_COUNTER_ARM_VCNT	0 | 
|---|
| 71 | #define VMCLOCK_COUNTER_X86_TSC		1 | 
|---|
| 72 | #define VMCLOCK_COUNTER_INVALID		0xff | 
|---|
| 73 | __u8 time_type; /* Matches VIRTIO_RTC_TYPE_xxx */ | 
|---|
| 74 | #define VMCLOCK_TIME_UTC			0	/* Since 1970-01-01 00:00:00z */ | 
|---|
| 75 | #define VMCLOCK_TIME_TAI			1	/* Since 1970-01-01 00:00:00z */ | 
|---|
| 76 | #define VMCLOCK_TIME_MONOTONIC			2	/* Since undefined epoch */ | 
|---|
| 77 | #define VMCLOCK_TIME_INVALID_SMEARED		3	/* Not supported */ | 
|---|
| 78 | #define VMCLOCK_TIME_INVALID_MAYBE_SMEARED	4	/* Not supported */ | 
|---|
| 79 |  | 
|---|
| 80 | /* NON-CONSTANT FIELDS PROTECTED BY SEQCOUNT LOCK */ | 
|---|
| 81 | __le32 seq_count;	/* Low bit means an update is in progress */ | 
|---|
| 82 | /* | 
|---|
| 83 | * This field changes to another non-repeating value when the CPU | 
|---|
| 84 | * counter is disrupted, for example on live migration. This lets | 
|---|
| 85 | * the guest know that it should discard any calibration it has | 
|---|
| 86 | * performed of the counter against external sources (NTP/PTP/etc.). | 
|---|
| 87 | */ | 
|---|
| 88 | __le64 disruption_marker; | 
|---|
| 89 | __le64 flags; | 
|---|
| 90 | /* Indicates that the tai_offset_sec field is valid */ | 
|---|
| 91 | #define VMCLOCK_FLAG_TAI_OFFSET_VALID		(1 << 0) | 
|---|
| 92 | /* | 
|---|
| 93 | * Optionally used to notify guests of pending maintenance events. | 
|---|
| 94 | * A guest which provides latency-sensitive services may wish to | 
|---|
| 95 | * remove itself from service if an event is coming up. Two flags | 
|---|
| 96 | * indicate the approximate imminence of the event. | 
|---|
| 97 | */ | 
|---|
| 98 | #define VMCLOCK_FLAG_DISRUPTION_SOON		(1 << 1) /* About a day */ | 
|---|
| 99 | #define VMCLOCK_FLAG_DISRUPTION_IMMINENT	(1 << 2) /* About an hour */ | 
|---|
| 100 | #define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID	(1 << 3) | 
|---|
| 101 | #define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID	(1 << 4) | 
|---|
| 102 | #define VMCLOCK_FLAG_TIME_ESTERROR_VALID	(1 << 5) | 
|---|
| 103 | #define VMCLOCK_FLAG_TIME_MAXERROR_VALID	(1 << 6) | 
|---|
| 104 | /* | 
|---|
| 105 | * If the MONOTONIC flag is set then (other than leap seconds) it is | 
|---|
| 106 | * guaranteed that the time calculated according this structure at | 
|---|
| 107 | * any given moment shall never appear to be later than the time | 
|---|
| 108 | * calculated via the structure at any *later* moment. | 
|---|
| 109 | * | 
|---|
| 110 | * In particular, a timestamp based on a counter reading taken | 
|---|
| 111 | * immediately after setting the low bit of seq_count (and the | 
|---|
| 112 | * associated memory barrier), using the previously-valid time and | 
|---|
| 113 | * period fields, shall never be later than a timestamp based on | 
|---|
| 114 | * a counter reading taken immediately before *clearing* the low | 
|---|
| 115 | * bit again after the update, using the about-to-be-valid fields. | 
|---|
| 116 | */ | 
|---|
| 117 | #define VMCLOCK_FLAG_TIME_MONOTONIC		(1 << 7) | 
|---|
| 118 |  | 
|---|
| 119 | __u8 pad[2]; | 
|---|
| 120 | __u8 clock_status; | 
|---|
| 121 | #define VMCLOCK_STATUS_UNKNOWN		0 | 
|---|
| 122 | #define VMCLOCK_STATUS_INITIALIZING	1 | 
|---|
| 123 | #define VMCLOCK_STATUS_SYNCHRONIZED	2 | 
|---|
| 124 | #define VMCLOCK_STATUS_FREERUNNING	3 | 
|---|
| 125 | #define VMCLOCK_STATUS_UNRELIABLE	4 | 
|---|
| 126 |  | 
|---|
| 127 | /* | 
|---|
| 128 | * The time exposed through this device is never smeared. This field | 
|---|
| 129 | * corresponds to the 'subtype' field in virtio-rtc, which indicates | 
|---|
| 130 | * the smearing method. However in this case it provides a *hint* to | 
|---|
| 131 | * the guest operating system, such that *if* the guest OS wants to | 
|---|
| 132 | * provide its users with an alternative clock which does not follow | 
|---|
| 133 | * UTC, it may do so in a fashion consistent with the other systems | 
|---|
| 134 | * in the nearby environment. | 
|---|
| 135 | */ | 
|---|
| 136 | __u8 leap_second_smearing_hint; /* Matches VIRTIO_RTC_SUBTYPE_xxx */ | 
|---|
| 137 | #define VMCLOCK_SMEARING_STRICT		0 | 
|---|
| 138 | #define VMCLOCK_SMEARING_NOON_LINEAR	1 | 
|---|
| 139 | #define VMCLOCK_SMEARING_UTC_SLS	2 | 
|---|
| 140 | __le16 tai_offset_sec; /* Actually two's complement signed */ | 
|---|
| 141 | __u8 leap_indicator; | 
|---|
| 142 | /* | 
|---|
| 143 | * This field is based on the VIRTIO_RTC_LEAP_xxx values as defined | 
|---|
| 144 | * in the current draft of virtio-rtc, but since smearing cannot be | 
|---|
| 145 | * used with the shared memory device, some values are not used. | 
|---|
| 146 | * | 
|---|
| 147 | * The _POST_POS and _POST_NEG values allow the guest to perform | 
|---|
| 148 | * its own smearing during the day or so after a leap second when | 
|---|
| 149 | * such smearing may need to continue being applied for a leap | 
|---|
| 150 | * second which is now theoretically "historical". | 
|---|
| 151 | */ | 
|---|
| 152 | #define VMCLOCK_LEAP_NONE	0x00	/* No known nearby leap second */ | 
|---|
| 153 | #define VMCLOCK_LEAP_PRE_POS	0x01	/* Positive leap second at EOM */ | 
|---|
| 154 | #define VMCLOCK_LEAP_PRE_NEG	0x02	/* Negative leap second at EOM */ | 
|---|
| 155 | #define VMCLOCK_LEAP_POS	0x03	/* Set during 23:59:60 second */ | 
|---|
| 156 | #define VMCLOCK_LEAP_POST_POS	0x04 | 
|---|
| 157 | #define VMCLOCK_LEAP_POST_NEG	0x05 | 
|---|
| 158 |  | 
|---|
| 159 | /* Bit shift for counter_period_frac_sec and its error rate */ | 
|---|
| 160 | __u8 counter_period_shift; | 
|---|
| 161 | /* | 
|---|
| 162 | * Paired values of counter and UTC at a given point in time. | 
|---|
| 163 | */ | 
|---|
| 164 | __le64 counter_value; | 
|---|
| 165 | /* | 
|---|
| 166 | * Counter period, and error margin of same. The unit of these | 
|---|
| 167 | * fields is 1/2^(64 + counter_period_shift) of a second. | 
|---|
| 168 | */ | 
|---|
| 169 | __le64 counter_period_frac_sec; | 
|---|
| 170 | __le64 counter_period_esterror_rate_frac_sec; | 
|---|
| 171 | __le64 counter_period_maxerror_rate_frac_sec; | 
|---|
| 172 |  | 
|---|
| 173 | /* | 
|---|
| 174 | * Time according to time_type field above. | 
|---|
| 175 | */ | 
|---|
| 176 | __le64 time_sec;		/* Seconds since time_type epoch */ | 
|---|
| 177 | __le64 time_frac_sec;		/* Units of 1/2^64 of a second */ | 
|---|
| 178 | __le64 time_esterror_nanosec; | 
|---|
| 179 | __le64 time_maxerror_nanosec; | 
|---|
| 180 | }; | 
|---|
| 181 |  | 
|---|
| 182 | #endif /*  __VMCLOCK_ABI_H__ */ | 
|---|
| 183 |  | 
|---|