linux/include/vdso/datapage.h

143 lines
4.1 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __VDSO_DATAPAGE_H
#define __VDSO_DATAPAGE_H
#ifndef __ASSEMBLY__
#include <linux/compiler.h>
#include <uapi/linux/time.h>
#include <uapi/linux/types.h>
#include <uapi/asm-generic/errno-base.h>
#include <vdso/bits.h>
#include <vdso/clocksource.h>
#include <vdso/ktime.h>
#include <vdso/limits.h>
#include <vdso/math64.h>
#include <vdso/processor.h>
#include <vdso/time.h>
#include <vdso/time32.h>
#include <vdso/time64.h>
#ifdef CONFIG_ARCH_HAS_VDSO_DATA
#include <asm/vdso/data.h>
#else
struct arch_vdso_data {};
#endif
#define VDSO_BASES (CLOCK_TAI + 1)
#define VDSO_HRES (BIT(CLOCK_REALTIME) | \
BIT(CLOCK_MONOTONIC) | \
BIT(CLOCK_BOOTTIME) | \
BIT(CLOCK_TAI))
#define VDSO_COARSE (BIT(CLOCK_REALTIME_COARSE) | \
BIT(CLOCK_MONOTONIC_COARSE))
#define VDSO_RAW (BIT(CLOCK_MONOTONIC_RAW))
#define CS_HRES_COARSE 0
#define CS_RAW 1
#define CS_BASES (CS_RAW + 1)
/**
* struct vdso_timestamp - basetime per clock_id
* @sec: seconds
* @nsec: nanoseconds
*
* There is one vdso_timestamp object in vvar for each vDSO-accelerated
* clock_id. For high-resolution clocks, this encodes the time
* corresponding to vdso_data.cycle_last. For coarse clocks this encodes
* the actual time.
*
* To be noticed that for highres clocks nsec is left-shifted by
* vdso_data.cs[x].shift.
*/
struct vdso_timestamp {
u64 sec;
u64 nsec;
};
/**
* struct vdso_data - vdso datapage representation
* @seq: timebase sequence counter
* @clock_mode: clock mode
* @cycle_last: timebase at clocksource init
* @mask: clocksource mask
* @mult: clocksource multiplier
* @shift: clocksource shift
* @basetime[clock_id]: basetime per clock_id
lib/vdso: Prepare for time namespace support To support time namespaces in the vdso with a minimal impact on regular non time namespace affected tasks, the namespace handling needs to be hidden in a slow path. The most obvious place is vdso_seq_begin(). If a task belongs to a time namespace then the VVAR page which contains the system wide vdso data is replaced with a namespace specific page which has the same layout as the VVAR page. That page has vdso_data->seq set to 1 to enforce the slow path and vdso_data->clock_mode set to VCLOCK_TIMENS to enforce the time namespace handling path. The extra check in the case that vdso_data->seq is odd, e.g. a concurrent update of the vdso data is in progress, is not really affecting regular tasks which are not part of a time namespace as the task is spin waiting for the update to finish and vdso_data->seq to become even again. If a time namespace task hits that code path, it invokes the corresponding time getter function which retrieves the real VVAR page, reads host time and then adds the offset for the requested clock which is stored in the special VVAR page. If VDSO time namespace support is disabled the whole magic is compiled out. Initial testing shows that the disabled case is almost identical to the host case which does not take the slow timens path. With the special timens page installed the performance hit is constant time and in the range of 5-7%. For the vdso functions which are not using the sequence count an unconditional check for vdso_data->clock_mode is added which switches to the real vdso when the clock_mode is VCLOCK_TIMENS. [avagin: Make do_hres_timens() work with raw clocks too: choose vdso_data pointer by CS_RAW offset.] Suggested-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrei Vagin <avagin@gmail.com> Signed-off-by: Dmitry Safonov <dima@arista.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/r/20191112012724.250792-21-dima@arista.com
2019-11-12 09:27:09 +08:00
* @offset[clock_id]: time namespace offset per clock_id
* @tz_minuteswest: minutes west of Greenwich
* @tz_dsttime: type of DST correction
* @hrtimer_res: hrtimer resolution
* @__unused: unused
* @arch_data: architecture specific data (optional, defaults
* to an empty struct)
*
* vdso_data will be accessed by 64 bit and compat code at the same time
* so we should be careful before modifying this structure.
lib/vdso: Prepare for time namespace support To support time namespaces in the vdso with a minimal impact on regular non time namespace affected tasks, the namespace handling needs to be hidden in a slow path. The most obvious place is vdso_seq_begin(). If a task belongs to a time namespace then the VVAR page which contains the system wide vdso data is replaced with a namespace specific page which has the same layout as the VVAR page. That page has vdso_data->seq set to 1 to enforce the slow path and vdso_data->clock_mode set to VCLOCK_TIMENS to enforce the time namespace handling path. The extra check in the case that vdso_data->seq is odd, e.g. a concurrent update of the vdso data is in progress, is not really affecting regular tasks which are not part of a time namespace as the task is spin waiting for the update to finish and vdso_data->seq to become even again. If a time namespace task hits that code path, it invokes the corresponding time getter function which retrieves the real VVAR page, reads host time and then adds the offset for the requested clock which is stored in the special VVAR page. If VDSO time namespace support is disabled the whole magic is compiled out. Initial testing shows that the disabled case is almost identical to the host case which does not take the slow timens path. With the special timens page installed the performance hit is constant time and in the range of 5-7%. For the vdso functions which are not using the sequence count an unconditional check for vdso_data->clock_mode is added which switches to the real vdso when the clock_mode is VCLOCK_TIMENS. [avagin: Make do_hres_timens() work with raw clocks too: choose vdso_data pointer by CS_RAW offset.] Suggested-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrei Vagin <avagin@gmail.com> Signed-off-by: Dmitry Safonov <dima@arista.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/r/20191112012724.250792-21-dima@arista.com
2019-11-12 09:27:09 +08:00
*
* @basetime is used to store the base time for the system wide time getter
* VVAR page.
*
* @offset is used by the special time namespace VVAR pages which are
* installed instead of the real VVAR page. These namespace pages must set
* @seq to 1 and @clock_mode to VDSO_CLOCKMODE_TIMENS to force the code into
* the time namespace slow path. The namespace aware functions retrieve the
lib/vdso: Prepare for time namespace support To support time namespaces in the vdso with a minimal impact on regular non time namespace affected tasks, the namespace handling needs to be hidden in a slow path. The most obvious place is vdso_seq_begin(). If a task belongs to a time namespace then the VVAR page which contains the system wide vdso data is replaced with a namespace specific page which has the same layout as the VVAR page. That page has vdso_data->seq set to 1 to enforce the slow path and vdso_data->clock_mode set to VCLOCK_TIMENS to enforce the time namespace handling path. The extra check in the case that vdso_data->seq is odd, e.g. a concurrent update of the vdso data is in progress, is not really affecting regular tasks which are not part of a time namespace as the task is spin waiting for the update to finish and vdso_data->seq to become even again. If a time namespace task hits that code path, it invokes the corresponding time getter function which retrieves the real VVAR page, reads host time and then adds the offset for the requested clock which is stored in the special VVAR page. If VDSO time namespace support is disabled the whole magic is compiled out. Initial testing shows that the disabled case is almost identical to the host case which does not take the slow timens path. With the special timens page installed the performance hit is constant time and in the range of 5-7%. For the vdso functions which are not using the sequence count an unconditional check for vdso_data->clock_mode is added which switches to the real vdso when the clock_mode is VCLOCK_TIMENS. [avagin: Make do_hres_timens() work with raw clocks too: choose vdso_data pointer by CS_RAW offset.] Suggested-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrei Vagin <avagin@gmail.com> Signed-off-by: Dmitry Safonov <dima@arista.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/r/20191112012724.250792-21-dima@arista.com
2019-11-12 09:27:09 +08:00
* real system wide VVAR page, read host time and add the per clock offset.
* For clocks which are not affected by time namespace adjustment the
* offset must be zero.
*/
struct vdso_data {
u32 seq;
s32 clock_mode;
u64 cycle_last;
u64 mask;
u32 mult;
u32 shift;
lib/vdso: Prepare for time namespace support To support time namespaces in the vdso with a minimal impact on regular non time namespace affected tasks, the namespace handling needs to be hidden in a slow path. The most obvious place is vdso_seq_begin(). If a task belongs to a time namespace then the VVAR page which contains the system wide vdso data is replaced with a namespace specific page which has the same layout as the VVAR page. That page has vdso_data->seq set to 1 to enforce the slow path and vdso_data->clock_mode set to VCLOCK_TIMENS to enforce the time namespace handling path. The extra check in the case that vdso_data->seq is odd, e.g. a concurrent update of the vdso data is in progress, is not really affecting regular tasks which are not part of a time namespace as the task is spin waiting for the update to finish and vdso_data->seq to become even again. If a time namespace task hits that code path, it invokes the corresponding time getter function which retrieves the real VVAR page, reads host time and then adds the offset for the requested clock which is stored in the special VVAR page. If VDSO time namespace support is disabled the whole magic is compiled out. Initial testing shows that the disabled case is almost identical to the host case which does not take the slow timens path. With the special timens page installed the performance hit is constant time and in the range of 5-7%. For the vdso functions which are not using the sequence count an unconditional check for vdso_data->clock_mode is added which switches to the real vdso when the clock_mode is VCLOCK_TIMENS. [avagin: Make do_hres_timens() work with raw clocks too: choose vdso_data pointer by CS_RAW offset.] Suggested-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrei Vagin <avagin@gmail.com> Signed-off-by: Dmitry Safonov <dima@arista.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/r/20191112012724.250792-21-dima@arista.com
2019-11-12 09:27:09 +08:00
union {
struct vdso_timestamp basetime[VDSO_BASES];
struct timens_offset offset[VDSO_BASES];
};
s32 tz_minuteswest;
s32 tz_dsttime;
u32 hrtimer_res;
u32 __unused;
struct arch_vdso_data arch_data;
};
/*
* We use the hidden visibility to prevent the compiler from generating a GOT
* relocation. Not only is going through a GOT useless (the entry couldn't and
* must not be overridden by another library), it does not even work: the linker
* cannot generate an absolute address to the data page.
*
* With the hidden visibility, the compiler simply generates a PC-relative
* relocation, and this is what we need.
*/
extern struct vdso_data _vdso_data[CS_BASES] __attribute__((visibility("hidden")));
arm64/vdso: Add time namespace page Allocate the time namespace page among VVAR pages. Provide __arch_get_timens_vdso_data() helper for VDSO code to get the code-relative position of VVARs on that special page. If a task belongs to a time namespace then the VVAR page which contains the system wide VDSO data is replaced with a namespace specific page which has the same layout as the VVAR page. That page has vdso_data->seq set to 1 to enforce the slow path and vdso_data->clock_mode set to VCLOCK_TIMENS to enforce the time namespace handling path. The extra check in the case that vdso_data->seq is odd, e.g. a concurrent update of the VDSO data is in progress, is not really affecting regular tasks which are not part of a time namespace as the task is spin waiting for the update to finish and vdso_data->seq to become even again. If a time namespace task hits that code path, it invokes the corresponding time getter function which retrieves the real VVAR page, reads host time and then adds the offset for the requested clock which is stored in the special VVAR page. The time-namespace page isn't allocated on !CONFIG_TIME_NAMESPACE, but vma is the same size, which simplifies criu/vdso migration between different kernel configs. Signed-off-by: Andrei Vagin <avagin@gmail.com> Reviewed-by: Vincenzo Frascino <vincenzo.frascino@arm.com> Reviewed-by: Dmitry Safonov <dima@arista.com> Cc: Mark Rutland <mark.rutland@arm.com> Link: https://lore.kernel.org/r/20200624083321.144975-4-avagin@gmail.com Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2020-06-24 16:33:18 +08:00
extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden")));
/*
* The generic vDSO implementation requires that gettimeofday.h
* provides:
* - __arch_get_vdso_data(): to get the vdso datapage.
* - __arch_get_hw_counter(): to get the hw counter based on the
* clock_mode.
* - gettimeofday_fallback(): fallback for gettimeofday.
* - clock_gettime_fallback(): fallback for clock_gettime.
* - clock_getres_fallback(): fallback for clock_getres.
*/
#ifdef ENABLE_COMPAT_VDSO
#include <asm/vdso/compat_gettimeofday.h>
#else
#include <asm/vdso/gettimeofday.h>
#endif /* ENABLE_COMPAT_VDSO */
#endif /* !__ASSEMBLY__ */
#endif /* __VDSO_DATAPAGE_H */