1*20503272SDavid Woodhouse /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ 2*20503272SDavid Woodhouse 3*20503272SDavid Woodhouse /* 4*20503272SDavid Woodhouse * This structure provides a vDSO-style clock to VM guests, exposing the 5*20503272SDavid Woodhouse * relationship (or lack thereof) between the CPU clock (TSC, timebase, arch 6*20503272SDavid Woodhouse * counter, etc.) and real time. It is designed to address the problem of 7*20503272SDavid Woodhouse * live migration, which other clock enlightenments do not. 8*20503272SDavid Woodhouse * 9*20503272SDavid Woodhouse * When a guest is live migrated, this affects the clock in two ways. 10*20503272SDavid Woodhouse * 11*20503272SDavid Woodhouse * First, even between identical hosts the actual frequency of the underlying 12*20503272SDavid Woodhouse * counter will change within the tolerances of its specification (typically 13*20503272SDavid Woodhouse * ±50PPM, or 4 seconds a day). This frequency also varies over time on the 14*20503272SDavid Woodhouse * same host, but can be tracked by NTP as it generally varies slowly. With 15*20503272SDavid Woodhouse * live migration there is a step change in the frequency, with no warning. 16*20503272SDavid Woodhouse * 17*20503272SDavid Woodhouse * Second, there may be a step change in the value of the counter itself, as 18*20503272SDavid Woodhouse * its accuracy is limited by the precision of the NTP synchronization on the 19*20503272SDavid Woodhouse * source and destination hosts. 20*20503272SDavid Woodhouse * 21*20503272SDavid Woodhouse * So any calibration (NTP, PTP, etc.) which the guest has done on the source 22*20503272SDavid Woodhouse * host before migration is invalid, and needs to be redone on the new host. 23*20503272SDavid Woodhouse * 24*20503272SDavid Woodhouse * In its most basic mode, this structure provides only an indication to the 25*20503272SDavid Woodhouse * guest that live migration has occurred. This allows the guest to know that 26*20503272SDavid Woodhouse * its clock is invalid and take remedial action. For applications that need 27*20503272SDavid Woodhouse * reliable accurate timestamps (e.g. distributed databases), the structure 28*20503272SDavid Woodhouse * can be mapped all the way to userspace. This allows the application to see 29*20503272SDavid Woodhouse * directly for itself that the clock is disrupted and take appropriate 30*20503272SDavid Woodhouse * action, even when using a vDSO-style method to get the time instead of a 31*20503272SDavid Woodhouse * system call. 32*20503272SDavid Woodhouse * 33*20503272SDavid Woodhouse * In its more advanced mode. this structure can also be used to expose the 34*20503272SDavid Woodhouse * precise relationship of the CPU counter to real time, as calibrated by the 35*20503272SDavid Woodhouse * host. This means that userspace applications can have accurate time 36*20503272SDavid Woodhouse * immediately after live migration, rather than having to pause operations 37*20503272SDavid Woodhouse * and wait for NTP to recover. This mode does, of course, rely on the 38*20503272SDavid Woodhouse * counter being reliable and consistent across CPUs. 39*20503272SDavid Woodhouse * 40*20503272SDavid Woodhouse * Note that this must be true UTC, never with smeared leap seconds. If a 41*20503272SDavid Woodhouse * guest wishes to construct a smeared clock, it can do so. Presenting a 42*20503272SDavid Woodhouse * smeared clock through this interface would be problematic because it 43*20503272SDavid Woodhouse * actually messes with the apparent counter *period*. A linear smearing 44*20503272SDavid Woodhouse * of 1 ms per second would effectively tweak the counter period by 1000PPM 45*20503272SDavid Woodhouse * at the start/end of the smearing period, while a sinusoidal smear would 46*20503272SDavid Woodhouse * basically be impossible to represent. 47*20503272SDavid Woodhouse * 48*20503272SDavid Woodhouse * This structure is offered with the intent that it be adopted into the 49*20503272SDavid Woodhouse * nascent virtio-rtc standard, as a virtio-rtc that does not address the live 50*20503272SDavid Woodhouse * migration problem seems a little less than fit for purpose. For that 51*20503272SDavid Woodhouse * reason, certain fields use precisely the same numeric definitions as in 52*20503272SDavid Woodhouse * the virtio-rtc proposal. The structure can also be exposed through an ACPI 53*20503272SDavid Woodhouse * device with the CID "VMCLOCK", modelled on the "VMGENID" device except for 54*20503272SDavid Woodhouse * the fact that it uses a real _CRS to convey the address of the structure 55*20503272SDavid Woodhouse * (which should be a full page, to allow for mapping directly to userspace). 56*20503272SDavid Woodhouse */ 57*20503272SDavid Woodhouse 58*20503272SDavid Woodhouse #ifndef __VMCLOCK_ABI_H__ 59*20503272SDavid Woodhouse #define __VMCLOCK_ABI_H__ 60*20503272SDavid Woodhouse 61*20503272SDavid Woodhouse #include <linux/types.h> 62*20503272SDavid Woodhouse 63*20503272SDavid Woodhouse struct vmclock_abi { 64*20503272SDavid Woodhouse /* CONSTANT FIELDS */ 65*20503272SDavid Woodhouse __le32 magic; 66*20503272SDavid Woodhouse #define VMCLOCK_MAGIC 0x4b4c4356 /* "VCLK" */ 67*20503272SDavid Woodhouse __le32 size; /* Size of region containing this structure */ 68*20503272SDavid Woodhouse __le16 version; /* 1 */ 69*20503272SDavid Woodhouse __u8 counter_id; /* Matches VIRTIO_RTC_COUNTER_xxx except INVALID */ 70*20503272SDavid Woodhouse #define VMCLOCK_COUNTER_ARM_VCNT 0 71*20503272SDavid Woodhouse #define VMCLOCK_COUNTER_X86_TSC 1 72*20503272SDavid Woodhouse #define VMCLOCK_COUNTER_INVALID 0xff 73*20503272SDavid Woodhouse __u8 time_type; /* Matches VIRTIO_RTC_TYPE_xxx */ 74*20503272SDavid Woodhouse #define VMCLOCK_TIME_UTC 0 /* Since 1970-01-01 00:00:00z */ 75*20503272SDavid Woodhouse #define VMCLOCK_TIME_TAI 1 /* Since 1970-01-01 00:00:00z */ 76*20503272SDavid Woodhouse #define VMCLOCK_TIME_MONOTONIC 2 /* Since undefined epoch */ 77*20503272SDavid Woodhouse #define VMCLOCK_TIME_INVALID_SMEARED 3 /* Not supported */ 78*20503272SDavid Woodhouse #define VMCLOCK_TIME_INVALID_MAYBE_SMEARED 4 /* Not supported */ 79*20503272SDavid Woodhouse 80*20503272SDavid Woodhouse /* NON-CONSTANT FIELDS PROTECTED BY SEQCOUNT LOCK */ 81*20503272SDavid Woodhouse __le32 seq_count; /* Low bit means an update is in progress */ 82*20503272SDavid Woodhouse /* 83*20503272SDavid Woodhouse * This field changes to another non-repeating value when the CPU 84*20503272SDavid Woodhouse * counter is disrupted, for example on live migration. This lets 85*20503272SDavid Woodhouse * the guest know that it should discard any calibration it has 86*20503272SDavid Woodhouse * performed of the counter against external sources (NTP/PTP/etc.). 87*20503272SDavid Woodhouse */ 88*20503272SDavid Woodhouse __le64 disruption_marker; 89*20503272SDavid Woodhouse __le64 flags; 90*20503272SDavid Woodhouse /* Indicates that the tai_offset_sec field is valid */ 91*20503272SDavid Woodhouse #define VMCLOCK_FLAG_TAI_OFFSET_VALID (1 << 0) 92*20503272SDavid Woodhouse /* 93*20503272SDavid Woodhouse * Optionally used to notify guests of pending maintenance events. 94*20503272SDavid Woodhouse * A guest which provides latency-sensitive services may wish to 95*20503272SDavid Woodhouse * remove itself from service if an event is coming up. Two flags 96*20503272SDavid Woodhouse * indicate the approximate imminence of the event. 97*20503272SDavid Woodhouse */ 98*20503272SDavid Woodhouse #define VMCLOCK_FLAG_DISRUPTION_SOON (1 << 1) /* About a day */ 99*20503272SDavid Woodhouse #define VMCLOCK_FLAG_DISRUPTION_IMMINENT (1 << 2) /* About an hour */ 100*20503272SDavid Woodhouse #define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID (1 << 3) 101*20503272SDavid Woodhouse #define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID (1 << 4) 102*20503272SDavid Woodhouse #define VMCLOCK_FLAG_TIME_ESTERROR_VALID (1 << 5) 103*20503272SDavid Woodhouse #define VMCLOCK_FLAG_TIME_MAXERROR_VALID (1 << 6) 104*20503272SDavid Woodhouse /* 105*20503272SDavid Woodhouse * If the MONOTONIC flag is set then (other than leap seconds) it is 106*20503272SDavid Woodhouse * guaranteed that the time calculated according this structure at 107*20503272SDavid Woodhouse * any given moment shall never appear to be later than the time 108*20503272SDavid Woodhouse * calculated via the structure at any *later* moment. 109*20503272SDavid Woodhouse * 110*20503272SDavid Woodhouse * In particular, a timestamp based on a counter reading taken 111*20503272SDavid Woodhouse * immediately after setting the low bit of seq_count (and the 112*20503272SDavid Woodhouse * associated memory barrier), using the previously-valid time and 113*20503272SDavid Woodhouse * period fields, shall never be later than a timestamp based on 114*20503272SDavid Woodhouse * a counter reading taken immediately before *clearing* the low 115*20503272SDavid Woodhouse * bit again after the update, using the about-to-be-valid fields. 116*20503272SDavid Woodhouse */ 117*20503272SDavid Woodhouse #define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) 118*20503272SDavid Woodhouse 119*20503272SDavid Woodhouse __u8 pad[2]; 120*20503272SDavid Woodhouse __u8 clock_status; 121*20503272SDavid Woodhouse #define VMCLOCK_STATUS_UNKNOWN 0 122*20503272SDavid Woodhouse #define VMCLOCK_STATUS_INITIALIZING 1 123*20503272SDavid Woodhouse #define VMCLOCK_STATUS_SYNCHRONIZED 2 124*20503272SDavid Woodhouse #define VMCLOCK_STATUS_FREERUNNING 3 125*20503272SDavid Woodhouse #define VMCLOCK_STATUS_UNRELIABLE 4 126*20503272SDavid Woodhouse 127*20503272SDavid Woodhouse /* 128*20503272SDavid Woodhouse * The time exposed through this device is never smeared. This field 129*20503272SDavid Woodhouse * corresponds to the 'subtype' field in virtio-rtc, which indicates 130*20503272SDavid Woodhouse * the smearing method. However in this case it provides a *hint* to 131*20503272SDavid Woodhouse * the guest operating system, such that *if* the guest OS wants to 132*20503272SDavid Woodhouse * provide its users with an alternative clock which does not follow 133*20503272SDavid Woodhouse * UTC, it may do so in a fashion consistent with the other systems 134*20503272SDavid Woodhouse * in the nearby environment. 135*20503272SDavid Woodhouse */ 136*20503272SDavid Woodhouse __u8 leap_second_smearing_hint; /* Matches VIRTIO_RTC_SUBTYPE_xxx */ 137*20503272SDavid Woodhouse #define VMCLOCK_SMEARING_STRICT 0 138*20503272SDavid Woodhouse #define VMCLOCK_SMEARING_NOON_LINEAR 1 139*20503272SDavid Woodhouse #define VMCLOCK_SMEARING_UTC_SLS 2 140*20503272SDavid Woodhouse __le16 tai_offset_sec; /* Actually two's complement signed */ 141*20503272SDavid Woodhouse __u8 leap_indicator; 142*20503272SDavid Woodhouse /* 143*20503272SDavid Woodhouse * This field is based on the VIRTIO_RTC_LEAP_xxx values as defined 144*20503272SDavid Woodhouse * in the current draft of virtio-rtc, but since smearing cannot be 145*20503272SDavid Woodhouse * used with the shared memory device, some values are not used. 146*20503272SDavid Woodhouse * 147*20503272SDavid Woodhouse * The _POST_POS and _POST_NEG values allow the guest to perform 148*20503272SDavid Woodhouse * its own smearing during the day or so after a leap second when 149*20503272SDavid Woodhouse * such smearing may need to continue being applied for a leap 150*20503272SDavid Woodhouse * second which is now theoretically "historical". 151*20503272SDavid Woodhouse */ 152*20503272SDavid Woodhouse #define VMCLOCK_LEAP_NONE 0x00 /* No known nearby leap second */ 153*20503272SDavid Woodhouse #define VMCLOCK_LEAP_PRE_POS 0x01 /* Positive leap second at EOM */ 154*20503272SDavid Woodhouse #define VMCLOCK_LEAP_PRE_NEG 0x02 /* Negative leap second at EOM */ 155*20503272SDavid Woodhouse #define VMCLOCK_LEAP_POS 0x03 /* Set during 23:59:60 second */ 156*20503272SDavid Woodhouse #define VMCLOCK_LEAP_POST_POS 0x04 157*20503272SDavid Woodhouse #define VMCLOCK_LEAP_POST_NEG 0x05 158*20503272SDavid Woodhouse 159*20503272SDavid Woodhouse /* Bit shift for counter_period_frac_sec and its error rate */ 160*20503272SDavid Woodhouse __u8 counter_period_shift; 161*20503272SDavid Woodhouse /* 162*20503272SDavid Woodhouse * Paired values of counter and UTC at a given point in time. 163*20503272SDavid Woodhouse */ 164*20503272SDavid Woodhouse __le64 counter_value; 165*20503272SDavid Woodhouse /* 166*20503272SDavid Woodhouse * Counter period, and error margin of same. The unit of these 167*20503272SDavid Woodhouse * fields is 1/2^(64 + counter_period_shift) of a second. 168*20503272SDavid Woodhouse */ 169*20503272SDavid Woodhouse __le64 counter_period_frac_sec; 170*20503272SDavid Woodhouse __le64 counter_period_esterror_rate_frac_sec; 171*20503272SDavid Woodhouse __le64 counter_period_maxerror_rate_frac_sec; 172*20503272SDavid Woodhouse 173*20503272SDavid Woodhouse /* 174*20503272SDavid Woodhouse * Time according to time_type field above. 175*20503272SDavid Woodhouse */ 176*20503272SDavid Woodhouse __le64 time_sec; /* Seconds since time_type epoch */ 177*20503272SDavid Woodhouse __le64 time_frac_sec; /* Units of 1/2^64 of a second */ 178*20503272SDavid Woodhouse __le64 time_esterror_nanosec; 179*20503272SDavid Woodhouse __le64 time_maxerror_nanosec; 180*20503272SDavid Woodhouse }; 181*20503272SDavid Woodhouse 182*20503272SDavid Woodhouse #endif /* __VMCLOCK_ABI_H__ */ 183