sched: Add missing memory barrier in switch_mm_cid
Many architectures' switch_mm() (e.g. arm64) do not have an smp_mb() which the core scheduler code has depended upon since commit: commit223baf9d17
("sched: Fix performance regression introduced by mm_cid") If switch_mm() doesn't call smp_mb(), sched_mm_cid_remote_clear() can unset the actively used cid when it fails to observe active task after it sets lazy_put. There *is* a memory barrier between storing to rq->curr and _return to userspace_ (as required by membarrier), but the rseq mm_cid has stricter requirements: the barrier needs to be issued between store to rq->curr and switch_mm_cid(), which happens earlier than: - spin_unlock(), - switch_to(). So it's fine when the architecture switch_mm() happens to have that barrier already, but less so when the architecture only provides the full barrier in switch_to() or spin_unlock(). It is a bug in the rseq switch_mm_cid() implementation. All architectures that don't have memory barriers in switch_mm(), but rather have the full barrier either in finish_lock_switch() or switch_to() have them too late for the needs of switch_mm_cid(). Introduce a new smp_mb__after_switch_mm(), defined as smp_mb() in the generic barrier.h header, and use it in switch_mm_cid() for scheduler transitions where switch_mm() is expected to provide a memory barrier. Architectures can override smp_mb__after_switch_mm() if their switch_mm() implementation provides an implicit memory barrier. Override it with a no-op on x86 which implicitly provide this memory barrier by writing to CR3. Fixes:223baf9d17
("sched: Fix performance regression introduced by mm_cid") Reported-by: levi.yun <yeoreum.yun@arm.com> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> # for arm64 Acked-by: Dave Hansen <dave.hansen@linux.intel.com> # for x86 Cc: <stable@vger.kernel.org> # 6.4.x Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: https://lore.kernel.org/r/20240415152114.59122-2-mathieu.desnoyers@efficios.com
This commit is contained in:
parent
0bbac3facb
commit
fe90f3967b
|
@ -79,6 +79,9 @@ do { \
|
|||
#define __smp_mb__before_atomic() do { } while (0)
|
||||
#define __smp_mb__after_atomic() do { } while (0)
|
||||
|
||||
/* Writing to CR3 provides a full memory barrier in switch_mm(). */
|
||||
#define smp_mb__after_switch_mm() do { } while (0)
|
||||
|
||||
#include <asm-generic/barrier.h>
|
||||
|
||||
#endif /* _ASM_X86_BARRIER_H */
|
||||
|
|
|
@ -294,5 +294,13 @@ do { \
|
|||
#define io_stop_wc() do { } while (0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Architectures that guarantee an implicit smp_mb() in switch_mm()
|
||||
* can override smp_mb__after_switch_mm.
|
||||
*/
|
||||
#ifndef smp_mb__after_switch_mm
|
||||
# define smp_mb__after_switch_mm() smp_mb()
|
||||
#endif
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
#endif /* __ASM_GENERIC_BARRIER_H */
|
||||
|
|
|
@ -79,6 +79,8 @@
|
|||
# include <asm/paravirt_api_clock.h>
|
||||
#endif
|
||||
|
||||
#include <asm/barrier.h>
|
||||
|
||||
#include "cpupri.h"
|
||||
#include "cpudeadline.h"
|
||||
|
||||
|
@ -3445,13 +3447,19 @@ static inline void switch_mm_cid(struct rq *rq,
|
|||
* between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu].
|
||||
* Provide it here.
|
||||
*/
|
||||
if (!prev->mm) // from kernel
|
||||
if (!prev->mm) { // from kernel
|
||||
smp_mb();
|
||||
/*
|
||||
* user -> user transition guarantees a memory barrier through
|
||||
* switch_mm() when current->mm changes. If current->mm is
|
||||
* unchanged, no barrier is needed.
|
||||
*/
|
||||
} else { // from user
|
||||
/*
|
||||
* user->user transition relies on an implicit
|
||||
* memory barrier in switch_mm() when
|
||||
* current->mm changes. If the architecture
|
||||
* switch_mm() does not have an implicit memory
|
||||
* barrier, it is emitted here. If current->mm
|
||||
* is unchanged, no barrier is needed.
|
||||
*/
|
||||
smp_mb__after_switch_mm();
|
||||
}
|
||||
}
|
||||
if (prev->mm_cid_active) {
|
||||
mm_cid_snapshot_time(rq, prev->mm);
|
||||
|
|
Loading…
Reference in New Issue