From 82ccdf062a64f3c4ac575c16179ce68edbbbe8e4 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 22 Mar 2024 15:04:41 +0800 Subject: [PATCH 01/33] hrtimer: Remove unused function The function is defined, but not called anywhere: kernel/time/hrtimer.c:1880:20: warning: unused function '__hrtimer_peek_ahead_timers'. Remove it. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240322070441.29646-1-jiapeng.chong@linux.alibaba.com Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=8611 --- kernel/time/hrtimer.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 70625dff62ce..cae9d04b5584 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1875,25 +1875,7 @@ retry: tick_program_event(expires_next, 1); pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta)); } - -/* called with interrupts disabled */ -static inline void __hrtimer_peek_ahead_timers(void) -{ - struct tick_device *td; - - if (!hrtimer_hres_active()) - return; - - td = this_cpu_ptr(&tick_cpu_device); - if (td && td->evtdev) - hrtimer_interrupt(td->evtdev); -} - -#else /* CONFIG_HIGH_RES_TIMERS */ - -static inline void __hrtimer_peek_ahead_timers(void) { } - -#endif /* !CONFIG_HIGH_RES_TIMERS */ +#endif /* !CONFIG_HIGH_RES_TIMERS */ /* * Called from run_local_timers in hardirq context every jiffy From c8e3a8b6f2e62661d838ae222774121ae23777a4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:05 +0200 Subject: [PATCH 02/33] vdso: Consolidate vdso_calc_delta() Consolidate vdso_calc_delta(), in preparation for further simplification. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-2-adrian.hunter@intel.com --- arch/powerpc/include/asm/vdso/gettimeofday.h | 26 +++++++++----------- arch/s390/include/asm/vdso/gettimeofday.h | 7 ++---- lib/vdso/gettimeofday.c | 9 ++++++- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 78302f6c2580..c6390890a60c 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -13,6 +13,17 @@ #define VDSO_HAS_TIME 1 +/* + * powerpc specific delta calculation. + * + * This variant removes the masking of the subtraction because the + * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX + * which would result in a pointless operation. The compiler cannot + * optimize it away as the mask comes from the vdso data and is not compile + * time constant. + */ +#define VDSO_DELTA_NOMASK 1 + static __always_inline int do_syscall_2(const unsigned long _r0, const unsigned long _r3, const unsigned long _r4) { @@ -104,21 +115,6 @@ static inline bool vdso_clocksource_ok(const struct vdso_data *vd) } #define vdso_clocksource_ok vdso_clocksource_ok -/* - * powerpc specific delta calculation. - * - * This variant removes the masking of the subtraction because the - * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX - * which would result in a pointless operation. The compiler cannot - * optimize it away as the mask comes from the vdso data and is not compile - * time constant. - */ -static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) -{ - return (cycles - last) * mult; -} -#define vdso_calc_delta vdso_calc_delta - #ifndef __powerpc64__ static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift) { diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index db84942eb78f..7937765ccfa5 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -6,16 +6,13 @@ #define VDSO_HAS_CLOCK_GETRES 1 +#define VDSO_DELTA_NOMASK 1 + #include #include #include #include -#define vdso_calc_delta __arch_vdso_calc_delta -static __always_inline u64 __arch_vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) -{ - return (cycles - last) * mult; -} static __always_inline const struct vdso_data *__arch_get_vdso_data(void) { diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index ce2f69552003..faccf12f7c03 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -6,6 +6,13 @@ #include #ifndef vdso_calc_delta + +#ifdef VDSO_DELTA_NOMASK +# define VDSO_DELTA_MASK(mask) U64_MAX +#else +# define VDSO_DELTA_MASK(mask) (mask) +#endif + /* * Default implementation which works for all sane clocksources. That * obviously excludes x86/TSC. @@ -13,7 +20,7 @@ static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) { - return ((cycles - last) & mask) * mult; + return ((cycles - last) & VDSO_DELTA_MASK(mask)) * mult; } #endif From 5b26ef660a690e424d9548fdf0565d4172d5d88f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:06 +0200 Subject: [PATCH 03/33] vdso: Consolidate nanoseconds calculation Consolidate nanoseconds calculation to simplify and reduce code duplication. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-3-adrian.hunter@intel.com --- arch/x86/include/asm/vdso/gettimeofday.h | 17 +++++---- lib/vdso/gettimeofday.c | 45 +++++++++++------------- 2 files changed, 28 insertions(+), 34 deletions(-) diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 8e048ca980df..5727dedd3549 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -300,7 +300,7 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) #define vdso_cycles_ok arch_vdso_cycles_ok /* - * x86 specific delta calculation. + * x86 specific calculation of nanoseconds for the current cycle count * * The regular implementation assumes that clocksource reads are globally * monotonic. The TSC can be slightly off across sockets which can cause @@ -308,8 +308,8 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) * jump. * * Therefore it needs to be verified that @cycles are greater than - * @last. If not then use @last, which is the base time of the current - * conversion period. + * @vd->cycles_last. If not then use @vd->cycles_last, which is the base + * time of the current conversion period. * * This variant also uses a custom mask because while the clocksource mask of * all the VDSO capable clocksources on x86 is U64_MAX, the above code uses @@ -317,25 +317,24 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) * declares everything with the MSB/Sign-bit set as invalid. Therefore the * effective mask is S64_MAX. */ -static __always_inline -u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, u64 base) { /* * Due to the MSB/Sign-bit being used as invalid marker (see * arch_vdso_cycles_valid() above), the effective mask is S64_MAX. */ - u64 delta = (cycles - last) & S64_MAX; + u64 delta = (cycles - vd->cycle_last) & S64_MAX; /* * Due to the above mentioned TSC wobbles, filter out negative motion. * Per the above masking, the effective sign bit is now bit 62. */ if (unlikely(delta & (1ULL << 62))) - return 0; + return base >> vd->shift; - return delta * mult; + return ((delta * vd->mult) + base) >> vd->shift; } -#define vdso_calc_delta vdso_calc_delta +#define vdso_calc_ns vdso_calc_ns #endif /* !__ASSEMBLY__ */ diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index faccf12f7c03..9fa90e0794c9 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -5,23 +5,12 @@ #include #include -#ifndef vdso_calc_delta +#ifndef vdso_calc_ns #ifdef VDSO_DELTA_NOMASK -# define VDSO_DELTA_MASK(mask) U64_MAX +# define VDSO_DELTA_MASK(vd) U64_MAX #else -# define VDSO_DELTA_MASK(mask) (mask) -#endif - -/* - * Default implementation which works for all sane clocksources. That - * obviously excludes x86/TSC. - */ -static __always_inline -u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) -{ - return ((cycles - last) & VDSO_DELTA_MASK(mask)) * mult; -} +# define VDSO_DELTA_MASK(vd) (vd->mask) #endif #ifndef vdso_shift_ns @@ -31,6 +20,18 @@ static __always_inline u64 vdso_shift_ns(u64 ns, u32 shift) } #endif +/* + * Default implementation which works for all sane clocksources. That + * obviously excludes x86/TSC. + */ +static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, u64 base) +{ + u64 delta = (cycles - vd->cycle_last) & VDSO_DELTA_MASK(vd); + + return vdso_shift_ns((delta * vd->mult) + base, vd->shift); +} +#endif /* vdso_calc_ns */ + #ifndef __arch_vdso_hres_capable static inline bool __arch_vdso_hres_capable(void) { @@ -56,10 +57,10 @@ static inline bool vdso_cycles_ok(u64 cycles) static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, struct __kernel_timespec *ts) { - const struct vdso_data *vd; const struct timens_offset *offs = &vdns->offset[clk]; const struct vdso_timestamp *vdso_ts; - u64 cycles, last, ns; + const struct vdso_data *vd; + u64 cycles, ns; u32 seq; s64 sec; @@ -80,10 +81,7 @@ static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_ cycles = __arch_get_hw_counter(vd->clock_mode, vd); if (unlikely(!vdso_cycles_ok(cycles))) return -1; - ns = vdso_ts->nsec; - last = vd->cycle_last; - ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); - ns = vdso_shift_ns(ns, vd->shift); + ns = vdso_calc_ns(vd, cycles, vdso_ts->nsec); sec = vdso_ts->sec; } while (unlikely(vdso_read_retry(vd, seq))); @@ -118,7 +116,7 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; - u64 cycles, last, sec, ns; + u64 cycles, sec, ns; u32 seq; /* Allows to compile the high resolution parts out */ @@ -151,10 +149,7 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, cycles = __arch_get_hw_counter(vd->clock_mode, vd); if (unlikely(!vdso_cycles_ok(cycles))) return -1; - ns = vdso_ts->nsec; - last = vd->cycle_last; - ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); - ns = vdso_shift_ns(ns, vd->shift); + ns = vdso_calc_ns(vd, cycles, vdso_ts->nsec); sec = vdso_ts->sec; } while (unlikely(vdso_read_retry(vd, seq))); From 0c68458b0a5878d735572b4f4d91219a1db7c784 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:07 +0200 Subject: [PATCH 04/33] vdso: Add CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT Add CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT in preparation to add multiplication overflow protection to the VDSO time getter functions. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-4-adrian.hunter@intel.com --- lib/vdso/Kconfig | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig index d883ac299508..c46c2300517c 100644 --- a/lib/vdso/Kconfig +++ b/lib/vdso/Kconfig @@ -30,4 +30,11 @@ config GENERIC_VDSO_TIME_NS Selected by architectures which support time namespaces in the VDSO +config GENERIC_VDSO_OVERFLOW_PROTECT + bool + help + Select to add multiplication overflow protection to the VDSO + time getter functions for the price of an extra conditional + in the hotpath. + endif From 5e5e51422cd189bc1b627f619f0f99324e6e4de9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:08 +0200 Subject: [PATCH 05/33] math64: Tidy up mul_u64_u32_shr() Put together declaration and initialization of local variables. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-5-adrian.hunter@intel.com --- include/linux/math64.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/math64.h b/include/linux/math64.h index bf74478926d4..fd13622b2056 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -179,16 +179,12 @@ static __always_inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift) #ifndef mul_u64_u32_shr static __always_inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) { - u32 ah, al; + u32 ah = a >> 32, al = a; u64 ret; - al = a; - ah = a >> 32; - ret = mul_u32_u32(al, mul) >> shift; if (ah) ret += mul_u32_u32(ah, mul) << (32 - shift); - return ret; } #endif /* mul_u64_u32_shr */ From 1beb35ec615f676d49d68b6dc23c7418ba8ff145 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:09 +0200 Subject: [PATCH 06/33] vdso, math64: Provide mul_u64_u32_add_u64_shr() Provide mul_u64_u32_add_u64_shr() which is a calculation that will be used by timekeeping and VDSO. Place #include after #include to allow architecture-specific overrides, at least for the kernel. Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-6-adrian.hunter@intel.com --- include/linux/math64.h | 2 +- include/vdso/math64.h | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/include/linux/math64.h b/include/linux/math64.h index fd13622b2056..d34def7f9a8c 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -4,8 +4,8 @@ #include #include -#include #include +#include #if BITS_PER_LONG == 64 diff --git a/include/vdso/math64.h b/include/vdso/math64.h index 7da703ee5561..22ae212f8b28 100644 --- a/include/vdso/math64.h +++ b/include/vdso/math64.h @@ -21,4 +21,42 @@ __iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) return ret; } +#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__) + +#ifndef mul_u64_u32_add_u64_shr +static __always_inline u64 mul_u64_u32_add_u64_shr(u64 a, u32 mul, u64 b, unsigned int shift) +{ + return (u64)((((unsigned __int128)a * mul) + b) >> shift); +} +#endif /* mul_u64_u32_add_u64_shr */ + +#else + +#ifndef mul_u64_u32_add_u64_shr +#ifndef mul_u32_u32 +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + return (u64)a * b; +} +#define mul_u32_u32 mul_u32_u32 +#endif +static __always_inline u64 mul_u64_u32_add_u64_shr(u64 a, u32 mul, u64 b, unsigned int shift) +{ + u32 ah = a >> 32, al = a; + bool ovf; + u64 ret; + + ovf = __builtin_add_overflow(mul_u32_u32(al, mul), b, &ret); + ret >>= shift; + if (ovf && shift) + ret += 1ULL << (64 - shift); + if (ah) + ret += mul_u32_u32(ah, mul) << (32 - shift); + + return ret; +} +#endif /* mul_u64_u32_add_u64_shr */ + +#endif + #endif /* __VDSO_MATH64_H */ From d2e58ab5cda2a225c406ac10d0a8b960bc5a39b6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:10 +0200 Subject: [PATCH 07/33] vdso: Add vdso_data:: Max_cycles Add vdso_data::max_cycles in preparation to use it to detect potential multiplication overflow. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-7-adrian.hunter@intel.com --- include/vdso/datapage.h | 4 ++++ kernel/time/vsyscall.c | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index c71ddb6d4691..d04d394db064 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -61,6 +61,7 @@ struct vdso_timestamp { * @seq: timebase sequence counter * @clock_mode: clock mode * @cycle_last: timebase at clocksource init + * @max_cycles: maximum cycles which won't overflow 64bit multiplication * @mask: clocksource mask * @mult: clocksource multiplier * @shift: clocksource shift @@ -92,6 +93,9 @@ struct vdso_data { s32 clock_mode; u64 cycle_last; +#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT + u64 max_cycles; +#endif u64 mask; u32 mult; u32 shift; diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index f0d5062d9cbc..9193d6133e5d 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -22,10 +22,16 @@ static inline void update_vdso_data(struct vdso_data *vdata, u64 nsec, sec; vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last; +#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT + vdata[CS_HRES_COARSE].max_cycles = tk->tkr_mono.clock->max_cycles; +#endif vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask; vdata[CS_HRES_COARSE].mult = tk->tkr_mono.mult; vdata[CS_HRES_COARSE].shift = tk->tkr_mono.shift; vdata[CS_RAW].cycle_last = tk->tkr_raw.cycle_last; +#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT + vdata[CS_RAW].max_cycles = tk->tkr_raw.clock->max_cycles; +#endif vdata[CS_RAW].mask = tk->tkr_raw.mask; vdata[CS_RAW].mult = tk->tkr_raw.mult; vdata[CS_RAW].shift = tk->tkr_raw.shift; From 456e3788bc7164c1c8298045e04068b8e3d8e413 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:11 +0200 Subject: [PATCH 08/33] vdso: Make delta calculation overflow safe Kernel timekeeping is designed to keep the change in cycles (since the last timer interrupt) below max_cycles, which prevents multiplication overflow when converting cycles to nanoseconds. However, if timer interrupts stop, the calculation will eventually overflow. Add protection against that, enabled by config option CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT. Check against max_cycles, falling back to a slower higher precision calculation. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-8-adrian.hunter@intel.com --- lib/vdso/gettimeofday.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 9fa90e0794c9..9c3a8d2440c9 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -13,6 +13,18 @@ # define VDSO_DELTA_MASK(vd) (vd->mask) #endif +#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT +static __always_inline bool vdso_delta_ok(const struct vdso_data *vd, u64 delta) +{ + return delta < vd->max_cycles; +} +#else +static __always_inline bool vdso_delta_ok(const struct vdso_data *vd, u64 delta) +{ + return true; +} +#endif + #ifndef vdso_shift_ns static __always_inline u64 vdso_shift_ns(u64 ns, u32 shift) { @@ -28,7 +40,10 @@ static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, { u64 delta = (cycles - vd->cycle_last) & VDSO_DELTA_MASK(vd); - return vdso_shift_ns((delta * vd->mult) + base, vd->shift); + if (likely(vdso_delta_ok(vd, delta))) + return vdso_shift_ns((delta * vd->mult) + base, vd->shift); + + return mul_u64_u32_add_u64_shr(delta, vd->mult, base, vd->shift); } #endif /* vdso_calc_ns */ From 7e90ffb716d289b3b82fb41892bb52a11bdadfd9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:12 +0200 Subject: [PATCH 09/33] x86/vdso: Make delta calculation overflow safe Kernel timekeeping is designed to keep the change in cycles (since the last timer interrupt) below max_cycles, which prevents multiplication overflow when converting cycles to nanoseconds. However, if timer interrupts stop, the calculation will eventually overflow. Add protection against that. Select GENERIC_VDSO_OVERFLOW_PROTECT so that max_cycles is made available in the VDSO data page. Check against max_cycles, falling back to a slower higher precision calculation. Take advantage of the opportunity to move masking and negative motion check into the slow path. The result is a calculation that has similar performance as before. Newer machines showed performance benefit, whereas older Skylake-based hardware such as Intel Kaby Lake was seen <1% worse. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-9-adrian.hunter@intel.com --- arch/x86/Kconfig | 1 + arch/x86/include/asm/vdso/gettimeofday.h | 31 +++++++++++++++++------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4fff6ed46e90..4e251ba3bad2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -168,6 +168,7 @@ config X86 select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY select GENERIC_VDSO_TIME_NS + select GENERIC_VDSO_OVERFLOW_PROTECT select GUP_GET_PXX_LOW_HIGH if X86_PAE select HARDIRQS_SW_RESEND select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 5727dedd3549..0ef36190abe6 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -319,18 +319,31 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) */ static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, u64 base) { - /* - * Due to the MSB/Sign-bit being used as invalid marker (see - * arch_vdso_cycles_valid() above), the effective mask is S64_MAX. - */ - u64 delta = (cycles - vd->cycle_last) & S64_MAX; + u64 delta = cycles - vd->cycle_last; /* - * Due to the above mentioned TSC wobbles, filter out negative motion. - * Per the above masking, the effective sign bit is now bit 62. + * Negative motion and deltas which can cause multiplication + * overflow require special treatment. This check covers both as + * negative motion is guaranteed to be greater than @vd::max_cycles + * due to unsigned comparison. + * + * Due to the MSB/Sign-bit being used as invalid marker (see + * arch_vdso_cycles_valid() above), the effective mask is S64_MAX, + * but that case is also unlikely and will also take the unlikely path + * here. */ - if (unlikely(delta & (1ULL << 62))) - return base >> vd->shift; + if (unlikely(delta > vd->max_cycles)) { + /* + * Due to the above mentioned TSC wobbles, filter out + * negative motion. Per the above masking, the effective + * sign bit is now bit 62. + */ + if (delta & (1ULL << 62)) + return base >> vd->shift; + + /* Handle multiplication overflow gracefully */ + return mul_u64_u32_add_u64_shr(delta & S64_MAX, vd->mult, base, vd->shift); + } return ((delta * vd->mult) + base) >> vd->shift; } From e98ab3d4159e6bab4e391f376a1e548dd4d32524 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:13 +0200 Subject: [PATCH 10/33] timekeeping: Move timekeeping helper functions Move timekeeping helper functions to prepare for their reuse. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-10-adrian.hunter@intel.com --- kernel/time/timekeeping.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b58dffc58a8f..3375f0a6400f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -381,14 +381,6 @@ static inline u64 timekeeping_delta_to_ns(const struct tk_read_base *tkr, u64 de return nsec; } -static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) -{ - u64 delta; - - delta = timekeeping_get_delta(tkr); - return timekeeping_delta_to_ns(tkr, delta); -} - static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles) { u64 delta; @@ -398,6 +390,22 @@ static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 c return timekeeping_delta_to_ns(tkr, delta); } +static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr) +{ + u64 delta, cycles = tk_clock_read(tkr); + + delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask); + return timekeeping_delta_to_ns(tkr, delta); +} + +static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) +{ + u64 delta; + + delta = timekeeping_get_delta(tkr); + return timekeeping_delta_to_ns(tkr, delta); +} + /** * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. * @tkr: Timekeeping readout base from which we take the update @@ -431,14 +439,6 @@ static void update_fast_timekeeper(const struct tk_read_base *tkr, memcpy(base + 1, base, sizeof(*base)); } -static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr) -{ - u64 delta, cycles = tk_clock_read(tkr); - - delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask); - return timekeeping_delta_to_ns(tkr, delta); -} - static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) { struct tk_read_base *tkr; From a729a63c6b2ebd8bc37646519d404f005ea8f1b2 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:14 +0200 Subject: [PATCH 11/33] timekeeping: Rename fast_tk_get_delta_ns() to __timekeeping_get_ns() Rename fast_tk_get_delta_ns() to __timekeeping_get_ns() to prepare for its reuse as a general timekeeping helper function. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-11-adrian.hunter@intel.com --- kernel/time/timekeeping.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3375f0a6400f..63061332a75c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -390,7 +390,7 @@ static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 c return timekeeping_delta_to_ns(tkr, delta); } -static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr) +static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr) { u64 delta, cycles = tk_clock_read(tkr); @@ -449,7 +449,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) seq = raw_read_seqcount_latch(&tkf->seq); tkr = tkf->base + (seq & 0x01); now = ktime_to_ns(tkr->base); - now += fast_tk_get_delta_ns(tkr); + now += __timekeeping_get_ns(tkr); } while (raw_read_seqcount_latch_retry(&tkf->seq, seq)); return now; @@ -565,7 +565,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono) tkr = tkf->base + (seq & 0x01); basem = ktime_to_ns(tkr->base); baser = ktime_to_ns(tkr->base_real); - delta = fast_tk_get_delta_ns(tkr); + delta = __timekeeping_get_ns(tkr); } while (raw_read_seqcount_latch_retry(&tkf->seq, seq)); if (mono) From 9af4548e828aa2ea66f54433c5747f64124a6240 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:15 +0200 Subject: [PATCH 12/33] timekeeping: Tidy timekeeping_cycles_to_ns() slightly Put together declaration and initialization of the local variable 'delta'. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-12-adrian.hunter@intel.com --- kernel/time/timekeeping.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 63061332a75c..c698219b152d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -383,10 +383,9 @@ static inline u64 timekeeping_delta_to_ns(const struct tk_read_base *tkr, u64 de static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles) { - u64 delta; + /* Calculate the delta since the last update_wall_time() */ + u64 delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask); - /* calculate the delta since the last update_wall_time */ - delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask); return timekeeping_delta_to_ns(tkr, delta); } From 670be12ba8f5d20ee2fb0531be6977005cd62401 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:16 +0200 Subject: [PATCH 13/33] timekeeping: Reuse timekeeping_cycles_to_ns() Simplify __timekeeping_get_ns() by reusing timekeeping_cycles_to_ns(). No functional change. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-13-adrian.hunter@intel.com --- kernel/time/timekeeping.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c698219b152d..f81d675291e0 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -391,10 +391,7 @@ static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 c static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr) { - u64 delta, cycles = tk_clock_read(tkr); - - delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask); - return timekeeping_delta_to_ns(tkr, delta); + return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr)); } static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) From e8e9d21a5df655a62ab4611fd437fb7510d2f85c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:17 +0200 Subject: [PATCH 14/33] timekeeping: Refactor timekeeping helpers Simplify the usage of timekeeping sanity checking, in preparation for consolidating timekeeping helpers. This works towards eliminating timekeeping_delta_to_ns() in favour of timekeeping_cycles_to_ns(). No functional change. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-14-adrian.hunter@intel.com --- kernel/time/timekeeping.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f81d675291e0..618328cd4bc4 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -237,7 +237,7 @@ static void timekeeping_check_update(struct timekeeper *tk, u64 offset) } } -static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr) +static inline u64 timekeeping_debug_get_delta(const struct tk_read_base *tkr) { struct timekeeper *tk = &tk_core.timekeeper; u64 now, last, mask, max, delta; @@ -281,17 +281,9 @@ static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr) static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset) { } -static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr) +static inline u64 timekeeping_debug_get_delta(const struct tk_read_base *tkr) { - u64 cycle_now, delta; - - /* read clocksource */ - cycle_now = tk_clock_read(tkr); - - /* calculate the delta since the last update_wall_time */ - delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); - - return delta; + BUG(); } #endif @@ -396,10 +388,10 @@ static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr) static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) { - u64 delta; + if (IS_ENABLED(CONFIG_DEBUG_TIMEKEEPING)) + return timekeeping_delta_to_ns(tkr, timekeeping_debug_get_delta(tkr)); - delta = timekeeping_get_delta(tkr); - return timekeeping_delta_to_ns(tkr, delta); + return __timekeeping_get_ns(tkr); } /** From e84f43e34faf85816587f80594541ec978449d6e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:18 +0200 Subject: [PATCH 15/33] timekeeping: Consolidate timekeeping helpers Consolidate timekeeping helpers, making use of timekeeping_cycles_to_ns() in preference to directly using timekeeping_delta_to_ns(). No functional change. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-15-adrian.hunter@intel.com --- kernel/time/timekeeping.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 618328cd4bc4..1bbfe1ff8d24 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -237,7 +237,9 @@ static void timekeeping_check_update(struct timekeeper *tk, u64 offset) } } -static inline u64 timekeeping_debug_get_delta(const struct tk_read_base *tkr) +static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles); + +static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr) { struct timekeeper *tk = &tk_core.timekeeper; u64 now, last, mask, max, delta; @@ -266,22 +268,22 @@ static inline u64 timekeeping_debug_get_delta(const struct tk_read_base *tkr) */ if (unlikely((~delta & mask) < (mask >> 3))) { tk->underflow_seen = 1; - delta = 0; + now = last; } /* Cap delta value to the max_cycles values to avoid mult overflows */ if (unlikely(delta > max)) { tk->overflow_seen = 1; - delta = tkr->clock->max_cycles; + now = last + max; } - return delta; + return timekeeping_cycles_to_ns(tkr, now); } #else static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset) { } -static inline u64 timekeeping_debug_get_delta(const struct tk_read_base *tkr) +static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr) { BUG(); } @@ -389,7 +391,7 @@ static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr) static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) { if (IS_ENABLED(CONFIG_DEBUG_TIMEKEEPING)) - return timekeeping_delta_to_ns(tkr, timekeeping_debug_get_delta(tkr)); + return timekeeping_debug_get_ns(tkr); return __timekeeping_get_ns(tkr); } From 3094c6db1cba0bbca6ea19c777762c26fee747d7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:19 +0200 Subject: [PATCH 16/33] timekeeping: Fold in timekeeping_delta_to_ns() timekeeping_delta_to_ns() is now called only from timekeeping_cycles_to_ns(), and it is not useful otherwise. Simplify the code by folding it into timekeeping_cycles_to_ns(). No functional change. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-16-adrian.hunter@intel.com --- kernel/time/timekeeping.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 1bbfe1ff8d24..749387f22f0f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -364,23 +364,12 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) } /* Timekeeper helper functions. */ - -static inline u64 timekeeping_delta_to_ns(const struct tk_read_base *tkr, u64 delta) -{ - u64 nsec; - - nsec = delta * tkr->mult + tkr->xtime_nsec; - nsec >>= tkr->shift; - - return nsec; -} - static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles) { /* Calculate the delta since the last update_wall_time() */ u64 delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask); - return timekeeping_delta_to_ns(tkr, delta); + return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift; } static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr) From e809a80aa0bcf802f99407c23fd6be6fd4eb250a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:20 +0200 Subject: [PATCH 17/33] timekeeping: Prepare timekeeping_cycles_to_ns() for overflow safety Open code clocksource_delta() in timekeeping_cycles_to_ns() so that overflow safety can be added efficiently. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-17-adrian.hunter@intel.com --- kernel/time/timekeeping.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 749387f22f0f..d17484082e2c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -367,7 +367,17 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles) { /* Calculate the delta since the last update_wall_time() */ - u64 delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask); + u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask; + + if (IS_ENABLED(CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE)) { + /* + * Handle clocksource inconsistency between CPUs to prevent + * time from going backwards by checking for the MSB of the + * mask being set in the delta. + */ + if (unlikely(delta & ~(mask >> 1))) + return tkr->xtime_nsec >> tkr->shift; + } return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift; } From fcf190c369149c3b04539797cedf28741eb14164 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:21 +0200 Subject: [PATCH 18/33] timekeeping: Make delta calculation overflow safe Kernel timekeeping is designed to keep the change in cycles (since the last timer interrupt) below max_cycles, which prevents multiplication overflow when converting cycles to nanoseconds. However, if timer interrupts stop, the calculation will eventually overflow. Add protection against that. In timekeeping_cycles_to_ns() calculation, check against max_cycles, falling back to a slower higher precision calculation. In timekeeping_forward_now(), process delta in chunks of at most max_cycles. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-18-adrian.hunter@intel.com --- kernel/time/timekeeping.c | 40 ++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index d17484082e2c..111dfdbd488f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -364,19 +364,32 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) } /* Timekeeper helper functions. */ +static noinline u64 delta_to_ns_safe(const struct tk_read_base *tkr, u64 delta) +{ + return mul_u64_u32_add_u64_shr(delta, tkr->mult, tkr->xtime_nsec, tkr->shift); +} + static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles) { /* Calculate the delta since the last update_wall_time() */ u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask; - if (IS_ENABLED(CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE)) { - /* - * Handle clocksource inconsistency between CPUs to prevent - * time from going backwards by checking for the MSB of the - * mask being set in the delta. - */ - if (unlikely(delta & ~(mask >> 1))) - return tkr->xtime_nsec >> tkr->shift; + /* + * This detects the case where the delta overflows the multiplication + * with tkr->mult. + */ + if (unlikely(delta > tkr->clock->max_cycles)) { + if (IS_ENABLED(CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE)) { + /* + * Handle clocksource inconsistency between CPUs to prevent + * time from going backwards by checking for the MSB of the + * mask being set in the delta. + */ + if (unlikely(delta & ~(mask >> 1))) + return tkr->xtime_nsec >> tkr->shift; + } + + return delta_to_ns_safe(tkr, delta); } return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift; @@ -789,10 +802,15 @@ static void timekeeping_forward_now(struct timekeeper *tk) tk->tkr_mono.cycle_last = cycle_now; tk->tkr_raw.cycle_last = cycle_now; - tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult; - tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult; + while (delta > 0) { + u64 max = tk->tkr_mono.clock->max_cycles; + u64 incr = delta < max ? delta : max; - tk_normalize_xtime(tk); + tk->tkr_mono.xtime_nsec += incr * tk->tkr_mono.mult; + tk->tkr_raw.xtime_nsec += incr * tk->tkr_raw.mult; + tk_normalize_xtime(tk); + delta -= incr; + } } /** From 135225a363ae67bc90bde7a2cbbe1ea0f152ba22 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:22 +0200 Subject: [PATCH 19/33] timekeeping: Let timekeeping_cycles_to_ns() handle both under and overflow For the case !CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE, forego overflow protection in the range (mask << 1) < delta <= mask, and interpret it always as an inconsistency between CPU clock values. That allows slightly neater code, and it is on a slow path so has no effect on performance. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-19-adrian.hunter@intel.com --- kernel/time/timekeeping.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 111dfdbd488f..4e18db1819f8 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -266,17 +266,14 @@ static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr) * Try to catch underflows by checking if we are seeing small * mask-relative negative values. */ - if (unlikely((~delta & mask) < (mask >> 3))) { + if (unlikely((~delta & mask) < (mask >> 3))) tk->underflow_seen = 1; - now = last; - } - /* Cap delta value to the max_cycles values to avoid mult overflows */ - if (unlikely(delta > max)) { + /* Check for multiplication overflows */ + if (unlikely(delta > max)) tk->overflow_seen = 1; - now = last + max; - } + /* timekeeping_cycles_to_ns() handles both under and overflow */ return timekeeping_cycles_to_ns(tkr, now); } #else @@ -375,19 +372,17 @@ static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 c u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask; /* - * This detects the case where the delta overflows the multiplication - * with tkr->mult. + * This detects both negative motion and the case where the delta + * overflows the multiplication with tkr->mult. */ if (unlikely(delta > tkr->clock->max_cycles)) { - if (IS_ENABLED(CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE)) { - /* - * Handle clocksource inconsistency between CPUs to prevent - * time from going backwards by checking for the MSB of the - * mask being set in the delta. - */ - if (unlikely(delta & ~(mask >> 1))) - return tkr->xtime_nsec >> tkr->shift; - } + /* + * Handle clocksource inconsistency between CPUs to prevent + * time from going backwards by checking for the MSB of the + * mask being set in the delta. + */ + if (delta & ~(mask >> 1)) + return tkr->xtime_nsec >> tkr->shift; return delta_to_ns_safe(tkr, delta); } From d0304569fb019d1bcfbbbce1ce6df6b96f04079b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2024 08:40:23 +0200 Subject: [PATCH 20/33] clocksource: Make watchdog and suspend-timing multiplication overflow safe Kernel timekeeping is designed to keep the change in cycles (since the last timer interrupt) below max_cycles, which prevents multiplication overflow when converting cycles to nanoseconds. However, if timer interrupts stop, the clocksource_cyc2ns() calculation will eventually overflow. Add protection against that. Simplify by folding together clocksource_delta() and clocksource_cyc2ns() into cycles_to_nsec_safe(). Check against max_cycles, falling back to a slower higher precision calculation. Suggested-by: Thomas Gleixner Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325064023.2997-20-adrian.hunter@intel.com --- kernel/time/clocksource.c | 42 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index e5b260aa0e02..4d50d53ac719 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -20,6 +20,16 @@ #include "tick-internal.h" #include "timekeeping_internal.h" +static noinline u64 cycles_to_nsec_safe(struct clocksource *cs, u64 start, u64 end) +{ + u64 delta = clocksource_delta(end, start, cs->mask); + + if (likely(delta < cs->max_cycles)) + return clocksource_cyc2ns(delta, cs->mult, cs->shift); + + return mul_u64_u32_shr(delta, cs->mult, cs->shift); +} + /** * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks * @mult: pointer to mult variable @@ -222,8 +232,8 @@ enum wd_read_status { static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow) { unsigned int nretries, max_retries; - u64 wd_end, wd_end2, wd_delta; int64_t wd_delay, wd_seq_delay; + u64 wd_end, wd_end2; max_retries = clocksource_get_max_watchdog_retry(); for (nretries = 0; nretries <= max_retries; nretries++) { @@ -234,9 +244,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, wd_end2 = watchdog->read(watchdog); local_irq_enable(); - wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask); - wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, - watchdog->shift); + wd_delay = cycles_to_nsec_safe(watchdog, *wdnow, wd_end); if (wd_delay <= WATCHDOG_MAX_SKEW) { if (nretries > 1 || nretries >= max_retries) { pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n", @@ -254,8 +262,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, * report system busy, reinit the watchdog and skip the current * watchdog test. */ - wd_delta = clocksource_delta(wd_end2, wd_end, watchdog->mask); - wd_seq_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, watchdog->shift); + wd_seq_delay = cycles_to_nsec_safe(watchdog, wd_end, wd_end2); if (wd_seq_delay > WATCHDOG_MAX_SKEW/2) goto skip_test; } @@ -366,8 +373,7 @@ void clocksource_verify_percpu(struct clocksource *cs) delta = (csnow_end - csnow_mid) & cs->mask; if (delta < 0) cpumask_set_cpu(cpu, &cpus_ahead); - delta = clocksource_delta(csnow_end, csnow_begin, cs->mask); - cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift); + cs_nsec = cycles_to_nsec_safe(cs, csnow_begin, csnow_end); if (cs_nsec > cs_nsec_max) cs_nsec_max = cs_nsec; if (cs_nsec < cs_nsec_min) @@ -398,8 +404,8 @@ static inline void clocksource_reset_watchdog(void) static void clocksource_watchdog(struct timer_list *unused) { - u64 csnow, wdnow, cslast, wdlast, delta; int64_t wd_nsec, cs_nsec, interval; + u64 csnow, wdnow, cslast, wdlast; int next_cpu, reset_pending; struct clocksource *cs; enum wd_read_status read_ret; @@ -456,12 +462,8 @@ static void clocksource_watchdog(struct timer_list *unused) continue; } - delta = clocksource_delta(wdnow, cs->wd_last, watchdog->mask); - wd_nsec = clocksource_cyc2ns(delta, watchdog->mult, - watchdog->shift); - - delta = clocksource_delta(csnow, cs->cs_last, cs->mask); - cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift); + wd_nsec = cycles_to_nsec_safe(watchdog, cs->wd_last, wdnow); + cs_nsec = cycles_to_nsec_safe(cs, cs->cs_last, csnow); wdlast = cs->wd_last; /* save these in case we print them */ cslast = cs->cs_last; cs->cs_last = csnow; @@ -832,7 +834,7 @@ void clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles) */ u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now) { - u64 now, delta, nsec = 0; + u64 now, nsec = 0; if (!suspend_clocksource) return 0; @@ -847,12 +849,8 @@ u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now) else now = suspend_clocksource->read(suspend_clocksource); - if (now > suspend_start) { - delta = clocksource_delta(now, suspend_start, - suspend_clocksource->mask); - nsec = mul_u64_u32_shr(delta, suspend_clocksource->mult, - suspend_clocksource->shift); - } + if (now > suspend_start) + nsec = cycles_to_nsec_safe(suspend_clocksource, suspend_start, now); /* * Disable the suspend timer to save power if current clocksource is From 8f0acb7f3a1331559e325566c00c26d1523dfe06 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Thu, 14 Mar 2024 18:04:01 +0800 Subject: [PATCH 21/33] clocksource: Convert s[n]printf() to sysfs_emit() Per filesystems/sysfs.rst, show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. coccinelle complains that there are still a couple of functions that use snprintf(). Convert them to sysfs_emit(). Signed-off-by: Li Zhijian Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240314100402.1326582-1-lizhijian@fujitsu.com --- kernel/time/clocksource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 4d50d53ac719..d25ba49e313c 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -1334,7 +1334,7 @@ static ssize_t current_clocksource_show(struct device *dev, ssize_t count = 0; mutex_lock(&clocksource_mutex); - count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); + count = sysfs_emit(buf, "%s\n", curr_clocksource->name); mutex_unlock(&clocksource_mutex); return count; From 98fe0fcb326a923740cb8900aa7ed7fe538c984a Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Thu, 14 Mar 2024 18:04:02 +0800 Subject: [PATCH 22/33] clockevents: Convert s[n]printf() to sysfs_emit() Per filesystems/sysfs.rst, show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. coccinelle complains that there are still a couple of functions that use snprintf(). Convert them to sysfs_emit(). Signed-off-by: Li Zhijian Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240314100402.1326582-2-lizhijian@fujitsu.com --- kernel/time/clockevents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index a7ca458cdd9c..60a6484831b1 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -677,7 +677,7 @@ static ssize_t current_device_show(struct device *dev, raw_spin_lock_irq(&clockevents_lock); td = tick_get_tick_dev(dev); if (td && td->evtdev) - count = snprintf(buf, PAGE_SIZE, "%s\n", td->evtdev->name); + count = sysfs_emit(buf, "%s\n", td->evtdev->name); raw_spin_unlock_irq(&clockevents_lock); return count; } From 8ff1e6c5aca5fd908e81c33c460c45f9555e1c22 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 9 Apr 2024 09:26:39 +0300 Subject: [PATCH 23/33] vdso: Fix powerpc build U64_MAX undeclared error U64_MAX is not in include/vdso/limits.h, although that isn't noticed on x86 because x86 includes include/linux/limits.h indirectly. However powerpc is more selective, resulting in the following build error: In file included from : lib/vdso/gettimeofday.c: In function 'vdso_calc_ns': lib/vdso/gettimeofday.c:11:33: error: 'U64_MAX' undeclared 11 | # define VDSO_DELTA_MASK(vd) U64_MAX | ^~~~~~~ Use ULLONG_MAX instead which will work just as well and is in include/vdso/limits.h. Fixes: c8e3a8b6f2e6 ("vdso: Consolidate vdso_calc_delta()") Reported-by: Stephen Rothwell Signed-off-by: Adrian Hunter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240409062639.3393-1-adrian.hunter@intel.com Closes: https://lore.kernel.org/all/20240409124905.6816db37@canb.auug.org.au/ --- lib/vdso/gettimeofday.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 9c3a8d2440c9..899850bd6f0b 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -8,7 +8,7 @@ #ifndef vdso_calc_ns #ifdef VDSO_DELTA_NOMASK -# define VDSO_DELTA_MASK(vd) U64_MAX +# define VDSO_DELTA_MASK(vd) ULLONG_MAX #else # define VDSO_DELTA_MASK(vd) (vd->mask) #endif From 48b7f4d29ac8fcdc35a97ce38e4aecdee83b0e3f Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Fri, 22 Mar 2024 08:59:38 +0000 Subject: [PATCH 24/33] rust: time: Add Ktime Introduce a wrapper around `ktime_t` with a few different useful methods. Rust Binder will use these bindings to compute how many milliseconds a transaction has been active for when dumping the current state of the Binder driver. This replicates the logic in C Binder [1]. For a usage example in Rust Binder, see [2]. ktime_get() cannot be safely called in NMI context. This requirement is not checked by these abstractions, but it is intended that klint [3] or a similar tool will be used to check it in the future. Signed-off-by: Alice Ryhl Signed-off-by: Thomas Gleixner Reviewed-by: Benno Lossin Reviewed-by: Boqun Feng Reviewed-by: Thomas Gleixner Acked-by: Miguel Ojeda Link: https://lore.kernel.org/r/20240322-rust-ktime_ms_delta-v2-1-d98de1f7c282@google.com Link: https://lore.kernel.org/lkml/5ac8c0d09392290be789423f0dd78a520b830fab.1682333709.git.zhangchuang3@xiaomi.com/ [1] Link: https://r.android.com/3004103 [2] Link: https://rust-for-linux.com/klint [3] --- rust/kernel/time.rs | 60 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs index 25a896eed468..6811d5cadbd4 100644 --- a/rust/kernel/time.rs +++ b/rust/kernel/time.rs @@ -5,6 +5,9 @@ //! This module contains the kernel APIs related to time and timers that //! have been ported or wrapped for usage by Rust code in the kernel. +/// The number of nanoseconds per millisecond. +pub const NSEC_PER_MSEC: i64 = bindings::NSEC_PER_MSEC as i64; + /// The time unit of Linux kernel. One jiffy equals (1/HZ) second. pub type Jiffies = core::ffi::c_ulong; @@ -18,3 +21,60 @@ pub fn msecs_to_jiffies(msecs: Msecs) -> Jiffies { // matter what the argument is. unsafe { bindings::__msecs_to_jiffies(msecs) } } + +/// A Rust wrapper around a `ktime_t`. +#[repr(transparent)] +#[derive(Copy, Clone)] +pub struct Ktime { + inner: bindings::ktime_t, +} + +impl Ktime { + /// Create a `Ktime` from a raw `ktime_t`. + #[inline] + pub fn from_raw(inner: bindings::ktime_t) -> Self { + Self { inner } + } + + /// Get the current time using `CLOCK_MONOTONIC`. + #[inline] + pub fn ktime_get() -> Self { + // SAFETY: It is always safe to call `ktime_get` outside of NMI context. + Self::from_raw(unsafe { bindings::ktime_get() }) + } + + /// Divide the number of nanoseconds by a compile-time constant. + #[inline] + fn divns_constant(self) -> i64 { + self.to_ns() / DIV + } + + /// Returns the number of nanoseconds. + #[inline] + pub fn to_ns(self) -> i64 { + self.inner + } + + /// Returns the number of milliseconds. + #[inline] + pub fn to_ms(self) -> i64 { + self.divns_constant::() + } +} + +/// Returns the number of milliseconds between two ktimes. +#[inline] +pub fn ktime_ms_delta(later: Ktime, earlier: Ktime) -> i64 { + (later - earlier).to_ms() +} + +impl core::ops::Sub for Ktime { + type Output = Ktime; + + #[inline] + fn sub(self, other: Ktime) -> Ktime { + Self { + inner: self.inner - other.inner, + } + } +} From e84c60032a39e3267f0b46175d5368da33e214a6 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Wed, 17 Apr 2024 16:02:29 +0200 Subject: [PATCH 25/33] timerqueue: Remove never used function timerqueue_node_expires() This function was introduced with commit 60bda037f1dd ("posix-cpu-timers: Utilize timerqueue for storage") but never used. Remove it. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240417140229.19633-1-anna-maria@linutronix.de --- include/linux/timerqueue.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h index 62973f7d4610..d306d9dd2207 100644 --- a/include/linux/timerqueue.h +++ b/include/linux/timerqueue.h @@ -37,11 +37,6 @@ static inline bool timerqueue_node_queued(struct timerqueue_node *node) return !RB_EMPTY_NODE(&node->node); } -static inline bool timerqueue_node_expires(struct timerqueue_node *node) -{ - return node->expires; -} - static inline void timerqueue_init_head(struct timerqueue_head *head) { head->rb_root = RB_ROOT_CACHED; From b7c8e1f8a7b4352c1d0b4310686385e3cf6c104a Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 18 Apr 2024 10:30:00 +0800 Subject: [PATCH 26/33] hrtimer: Rename __hrtimer_hres_active() to hrtimer_hres_active() The function hrtimer_hres_active() are defined in the hrtimer.c file, but not called elsewhere, so rename __hrtimer_hres_active() to hrtimer_hres_active() and remove the old hrtimer_hres_active() function. kernel/time/hrtimer.c:653:19: warning: unused function 'hrtimer_hres_active'. Fixes: 82ccdf062a64 ("hrtimer: Remove unused function") Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Thomas Gleixner Reviewed-by: Anna-Maria Behnsen Link: https://lore.kernel.org/r/20240418023000.130324-1-jiapeng.chong@linux.alibaba.com Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=8778 --- kernel/time/hrtimer.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index cae9d04b5584..492c14aac642 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -644,17 +644,12 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) /* * Is the high resolution mode active ? */ -static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) +static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) { return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? cpu_base->hres_active : 0; } -static inline int hrtimer_hres_active(void) -{ - return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); -} - static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, struct hrtimer *next_timer, ktime_t expires_next) @@ -678,7 +673,7 @@ static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, * set. So we'd effectively block all timers until the T2 event * fires. */ - if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) + if (!hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) return; tick_program_event(expires_next, 1); @@ -789,12 +784,12 @@ static void retrigger_next_event(void *arg) * function call will take care of the reprogramming in case the * CPU was in a NOHZ idle sleep. */ - if (!__hrtimer_hres_active(base) && !tick_nohz_active) + if (!hrtimer_hres_active(base) && !tick_nohz_active) return; raw_spin_lock(&base->lock); hrtimer_update_base(base); - if (__hrtimer_hres_active(base)) + if (hrtimer_hres_active(base)) hrtimer_force_reprogram(base, 0); else hrtimer_update_next_event(base); @@ -951,7 +946,7 @@ void clock_was_set(unsigned int bases) cpumask_var_t mask; int cpu; - if (!__hrtimer_hres_active(cpu_base) && !tick_nohz_active) + if (!hrtimer_hres_active(cpu_base) && !tick_nohz_active) goto out_timerfd; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { @@ -1491,7 +1486,7 @@ u64 hrtimer_get_next_event(void) raw_spin_lock_irqsave(&cpu_base->lock, flags); - if (!__hrtimer_hres_active(cpu_base)) + if (!hrtimer_hres_active(cpu_base)) expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); @@ -1514,7 +1509,7 @@ u64 hrtimer_next_event_without(const struct hrtimer *exclude) raw_spin_lock_irqsave(&cpu_base->lock, flags); - if (__hrtimer_hres_active(cpu_base)) { + if (hrtimer_hres_active(cpu_base)) { unsigned int active; if (!cpu_base->softirq_activated) { @@ -1886,7 +1881,7 @@ void hrtimer_run_queues(void) unsigned long flags; ktime_t now; - if (__hrtimer_hres_active(cpu_base)) + if (hrtimer_hres_active(cpu_base)) return; /* From 54db412e618e9c43e5167f809a901f554e8c43e2 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Sun, 28 Apr 2024 12:21:43 +0200 Subject: [PATCH 27/33] clocksource: Make the int help prompt unit readable in ncurses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When doing make menuconfig and searching for the CLOCKSOURCE_WATCHDOG_MAX_SKEW_US config item, the help says: │ Symbol: CLOCKSOURCE_WATCHDOG_MAX_SKEW_US [=125] │ Type : integer │ Range : [50 1000] │ Defined at kernel/time/Kconfig:204 │ Prompt: Clocksource watchdog maximum allowable skew (in s) ^^^ │ Depends on: GENERIC_CLOCKEVENTS [=y] && CLOCKSOURCE_WATCHDOG [=y] because on some terminals, it cannot display the 'μ' char, unicode number 0x3bc. So simply write it out so that there's no trouble. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Thomas Gleixner Acked-by: Paul E. McKenney Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20240428102143.26764-1-bp@kernel.org --- kernel/time/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index fc3b1a06c981..8ebb6d5a106b 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -202,7 +202,7 @@ config HIGH_RES_TIMERS the size of the kernel image. config CLOCKSOURCE_WATCHDOG_MAX_SKEW_US - int "Clocksource watchdog maximum allowable skew (in μs)" + int "Clocksource watchdog maximum allowable skew (in microseconds)" depends on CLOCKSOURCE_WATCHDOG range 50 1000 default 125 From ddd9120983c3efbcaa3a4c7777da1440f8ce27d8 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Thu, 11 Apr 2024 16:08:00 -0700 Subject: [PATCH 28/33] rust: time: doc: Add missing C header links The definitions related to jiffies are at linux/jiffies.h, and the definitions related to ktime_t are at linux/ktime.h, since `kernel::time` provides the functionality dealing with jiffies and ktime_t, it makes sense to add links to them from Rust's time module. Signed-off-by: Boqun Feng Signed-off-by: Thomas Gleixner Reviewed-by: Alice Ryhl Acked-by: Miguel Ojeda Link: https://lore.kernel.org/r/20240411230801.1504496-2-boqun.feng@gmail.com --- rust/kernel/time.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs index 6811d5cadbd4..e3bb5e89f88d 100644 --- a/rust/kernel/time.rs +++ b/rust/kernel/time.rs @@ -4,6 +4,9 @@ //! //! This module contains the kernel APIs related to time and timers that //! have been ported or wrapped for usage by Rust code in the kernel. +//! +//! C header: [`include/linux/jiffies.h`](srctree/include/linux/jiffies.h). +//! C header: [`include/linux/ktime.h`](srctree/include/linux/ktime.h). /// The number of nanoseconds per millisecond. pub const NSEC_PER_MSEC: i64 = bindings::NSEC_PER_MSEC as i64; From 6402eb802deb312e33c24699f68fb7775b2c7386 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Fri, 22 Mar 2024 15:12:18 +0000 Subject: [PATCH 29/33] dt-bindings: timer: renesas: ostm: Document Renesas RZ/V2H(P) SoC Document the General Timer Module (a.k.a OSTM) block on Renesas RZ/V2H(P) ("R9A09G057") SoC, which is identical to the one found on the RZ/A1H and RZ/G2L SoCs. Add the "renesas,r9a09g057-ostm" compatible string for the RZ/V2H(P) SoC. Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Acked-by: Conor Dooley Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20240322151219.885832-2-prabhakar.mahadev-lad.rj@bp.renesas.com --- Documentation/devicetree/bindings/timer/renesas,ostm.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/timer/renesas,ostm.yaml b/Documentation/devicetree/bindings/timer/renesas,ostm.yaml index 8b06a681764e..e8c642166462 100644 --- a/Documentation/devicetree/bindings/timer/renesas,ostm.yaml +++ b/Documentation/devicetree/bindings/timer/renesas,ostm.yaml @@ -26,6 +26,7 @@ properties: - renesas,r9a07g043-ostm # RZ/G2UL and RZ/Five - renesas,r9a07g044-ostm # RZ/G2{L,LC} - renesas,r9a07g054-ostm # RZ/V2L + - renesas,r9a09g057-ostm # RZ/V2H(P) - const: renesas,ostm # Generic reg: @@ -58,6 +59,7 @@ if: - renesas,r9a07g043-ostm - renesas,r9a07g044-ostm - renesas,r9a07g054-ostm + - renesas,r9a09g057-ostm then: required: - resets From 0f63c95aebf11d87b166a5dfd389957c67fef9c0 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Fri, 22 Mar 2024 15:12:19 +0000 Subject: [PATCH 30/33] clocksource/drivers/renesas-ostm: Allow OSTM driver to reprobe for RZ/V2H(P) SoC The RZ/V2H(P) (R9A09G057) SoC is equipped with the Generic Timer Module, also known as OSTM. Similar to the RZ/G2L SoC, the OSTM on the RZ/V2H(P) SoC requires the reset line to be deasserted before accessing any registers. Early call to ostm_init() happens through TIMER_OF_DECLARE() which always fails with -EPROBE_DEFER, as resets are not available that early in the boot process. To address this issue on the RZ/V2H(P) SoC, enable the OSTM driver to be reprobed through the platform driver probe mechanism. Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20240322151219.885832-3-prabhakar.mahadev-lad.rj@bp.renesas.com --- drivers/clocksource/renesas-ostm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/renesas-ostm.c b/drivers/clocksource/renesas-ostm.c index 8da972dc1713..39487d05a009 100644 --- a/drivers/clocksource/renesas-ostm.c +++ b/drivers/clocksource/renesas-ostm.c @@ -224,7 +224,7 @@ err_free: TIMER_OF_DECLARE(ostm, "renesas,ostm", ostm_init); -#ifdef CONFIG_ARCH_RZG2L +#if defined(CONFIG_ARCH_RZG2L) || defined(CONFIG_ARCH_R9A09G057) static int __init ostm_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; From 37385c0772a4fc6b89605b9701fa934fa2beb2cc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 20 Mar 2024 11:30:07 +0100 Subject: [PATCH 31/33] clocksource/drivers/renesas-ostm: Avoid reprobe after successful early probe The Renesas OS Timer (OSTM) driver contains two probe points, of which only one should complete: 1. Early probe, using TIMER_OF_DECLARE(), to provide the sole clocksource on (arm32) RZ/A1 and RZ/A2 SoCs, 2. Normal probe, using a platform driver, to provide additional timers on (arm64 + riscv) RZ/G2L and similar SoCs. The latter is needed because using OSTM on RZ/G2L requires manipulation of its reset signal, which is not yet available at the time of early probe, causing early probe to fail with -EPROBE_DEFER. It is only enabled when building a kernel with support for the RZ/G2L family, so it does not impact RZ/A1 and RZ/A2. Hence only one probe method can complete on all affected systems. As relying on the order of initialization of subsystems inside the kernel is fragile, set the DT node's OF_POPULATED flag after a succesful early probe. This makes sure the platform driver's probe is never called after a successful early probe. Signed-off-by: Geert Uytterhoeven Reviwed-by: Lad Prabhakar Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/bd027379713cbaafa21ffe9e848ebb7f475ca0e7.1710930542.git.geert+renesas@glider.be --- drivers/clocksource/renesas-ostm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/renesas-ostm.c b/drivers/clocksource/renesas-ostm.c index 39487d05a009..3fcbd02b2483 100644 --- a/drivers/clocksource/renesas-ostm.c +++ b/drivers/clocksource/renesas-ostm.c @@ -210,6 +210,7 @@ static int __init ostm_init(struct device_node *np) pr_info("%pOF: used for clock events\n", np); } + of_node_set_flag(np, OF_POPULATED); return 0; err_cleanup: From e6f8bed209d5fa8602cda45930b0a331234d95ed Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 30 Apr 2024 23:42:39 +0200 Subject: [PATCH 32/33] clocksource/drivers/timer-ti-dm: Remove an unused field in struct dmtimer In "struct dmtimer", the 'rate' field is unused. Remove it. Found with cppcheck, unusedStructMember. Signed-off-by: Christophe JAILLET Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/c9f7579922c587fce334a1aa9651f3189de7a00b.1714513336.git.christophe.jaillet@wanadoo.fr --- drivers/clocksource/timer-ti-dm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c index 56acf2617262..b7a34b1a975e 100644 --- a/drivers/clocksource/timer-ti-dm.c +++ b/drivers/clocksource/timer-ti-dm.c @@ -129,7 +129,6 @@ struct dmtimer { void __iomem *func_base; /* function register base */ atomic_t enabled; - unsigned long rate; unsigned reserved:1; unsigned posted:1; unsigned omap1:1; From 2030a7e11f161b4067bd4eadd984cdb36446fcca Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 2 May 2024 16:34:46 -0700 Subject: [PATCH 33/33] clocksource/drivers/arm_arch_timer: Mark hisi_161010101_oem_info const This isn't modified at runtime. Mark it const so it can move to read-only data. Cc: dann frazier Cc: Hanjun Guo Cc: Marc Zyngier Cc: Mark Rutland Signed-off-by: Stephen Boyd Reviewed-by: Hanjun Guo Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20240502233447.420888-1-swboyd@chromium.org --- drivers/clocksource/arm_arch_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 8d4a52056684..5bb43cc1a8df 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -331,7 +331,7 @@ static u64 notrace hisi_161010101_read_cntvct_el0(void) return __hisi_161010101_read_reg(cntvct_el0); } -static struct ate_acpi_oem_info hisi_161010101_oem_info[] = { +static const struct ate_acpi_oem_info hisi_161010101_oem_info[] = { /* * Note that trailing spaces are required to properly match * the OEM table information.