2023-11-12 17:52:44 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
/*
|
|
|
|
* Based on arch/x86/include/asm/arch_hweight.h
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _ASM_RISCV_HWEIGHT_H
|
|
|
|
#define _ASM_RISCV_HWEIGHT_H
|
|
|
|
|
|
|
|
#include <asm/alternative-macros.h>
|
|
|
|
#include <asm/hwcap.h>
|
|
|
|
|
|
|
|
#if (BITS_PER_LONG == 64)
|
|
|
|
#define CPOPW "cpopw "
|
|
|
|
#elif (BITS_PER_LONG == 32)
|
|
|
|
#define CPOPW "cpop "
|
|
|
|
#else
|
|
|
|
#error "Unexpected BITS_PER_LONG"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static __always_inline unsigned int __arch_hweight32(unsigned int w)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_RISCV_ISA_ZBB
|
work around gcc bugs with 'asm goto' with outputs
We've had issues with gcc and 'asm goto' before, and we created a
'asm_volatile_goto()' macro for that in the past: see commits
3f0116c3238a ("compiler/gcc4: Add quirk for 'asm goto' miscompilation
bug") and a9f180345f53 ("compiler/gcc4: Make quirk for
asm_volatile_goto() unconditional").
Then, much later, we ended up removing the workaround in commit
43c249ea0b1e ("compiler-gcc.h: remove ancient workaround for gcc PR
58670") because we no longer supported building the kernel with the
affected gcc versions, but we left the macro uses around.
Now, Sean Christopherson reports a new version of a very similar
problem, which is fixed by re-applying that ancient workaround. But the
problem in question is limited to only the 'asm goto with outputs'
cases, so instead of re-introducing the old workaround as-is, let's
rename and limit the workaround to just that much less common case.
It looks like there are at least two separate issues that all hit in
this area:
(a) some versions of gcc don't mark the asm goto as 'volatile' when it
has outputs:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420
which is easy to work around by just adding the 'volatile' by hand.
(b) Internal compiler errors:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422
which are worked around by adding the extra empty 'asm' as a
barrier, as in the original workaround.
but the problem Sean sees may be a third thing since it involves bad
code generation (not an ICE) even with the manually added 'volatile'.
but the same old workaround works for this case, even if this feels a
bit like voodoo programming and may only be hiding the issue.
Reported-and-tested-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/all/20240208220604.140859-1-seanjc@google.com/
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: Andrew Pinski <quic_apinski@quicinc.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2024-02-10 04:39:31 +08:00
|
|
|
asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
|
2023-11-12 17:52:44 +08:00
|
|
|
RISCV_ISA_EXT_ZBB, 1)
|
|
|
|
: : : : legacy);
|
|
|
|
|
|
|
|
asm (".option push\n"
|
|
|
|
".option arch,+zbb\n"
|
|
|
|
CPOPW "%0, %0\n"
|
|
|
|
".option pop\n"
|
|
|
|
: "+r" (w) : :);
|
|
|
|
|
|
|
|
return w;
|
|
|
|
|
|
|
|
legacy:
|
|
|
|
#endif
|
|
|
|
return __sw_hweight32(w);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned int __arch_hweight16(unsigned int w)
|
|
|
|
{
|
|
|
|
return __arch_hweight32(w & 0xffff);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned int __arch_hweight8(unsigned int w)
|
|
|
|
{
|
|
|
|
return __arch_hweight32(w & 0xff);
|
|
|
|
}
|
|
|
|
|
|
|
|
#if BITS_PER_LONG == 64
|
|
|
|
static __always_inline unsigned long __arch_hweight64(__u64 w)
|
|
|
|
{
|
|
|
|
# ifdef CONFIG_RISCV_ISA_ZBB
|
work around gcc bugs with 'asm goto' with outputs
We've had issues with gcc and 'asm goto' before, and we created a
'asm_volatile_goto()' macro for that in the past: see commits
3f0116c3238a ("compiler/gcc4: Add quirk for 'asm goto' miscompilation
bug") and a9f180345f53 ("compiler/gcc4: Make quirk for
asm_volatile_goto() unconditional").
Then, much later, we ended up removing the workaround in commit
43c249ea0b1e ("compiler-gcc.h: remove ancient workaround for gcc PR
58670") because we no longer supported building the kernel with the
affected gcc versions, but we left the macro uses around.
Now, Sean Christopherson reports a new version of a very similar
problem, which is fixed by re-applying that ancient workaround. But the
problem in question is limited to only the 'asm goto with outputs'
cases, so instead of re-introducing the old workaround as-is, let's
rename and limit the workaround to just that much less common case.
It looks like there are at least two separate issues that all hit in
this area:
(a) some versions of gcc don't mark the asm goto as 'volatile' when it
has outputs:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420
which is easy to work around by just adding the 'volatile' by hand.
(b) Internal compiler errors:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422
which are worked around by adding the extra empty 'asm' as a
barrier, as in the original workaround.
but the problem Sean sees may be a third thing since it involves bad
code generation (not an ICE) even with the manually added 'volatile'.
but the same old workaround works for this case, even if this feels a
bit like voodoo programming and may only be hiding the issue.
Reported-and-tested-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/all/20240208220604.140859-1-seanjc@google.com/
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: Andrew Pinski <quic_apinski@quicinc.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2024-02-10 04:39:31 +08:00
|
|
|
asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
|
2023-11-12 17:52:44 +08:00
|
|
|
RISCV_ISA_EXT_ZBB, 1)
|
|
|
|
: : : : legacy);
|
|
|
|
|
|
|
|
asm (".option push\n"
|
|
|
|
".option arch,+zbb\n"
|
|
|
|
"cpop %0, %0\n"
|
|
|
|
".option pop\n"
|
|
|
|
: "+r" (w) : :);
|
|
|
|
|
|
|
|
return w;
|
|
|
|
|
|
|
|
legacy:
|
|
|
|
# endif
|
|
|
|
return __sw_hweight64(w);
|
|
|
|
}
|
|
|
|
#else /* BITS_PER_LONG == 64 */
|
|
|
|
static inline unsigned long __arch_hweight64(__u64 w)
|
|
|
|
{
|
|
|
|
return __arch_hweight32((u32)w) +
|
|
|
|
__arch_hweight32((u32)(w >> 32));
|
|
|
|
}
|
|
|
|
#endif /* !(BITS_PER_LONG == 64) */
|
|
|
|
|
|
|
|
#endif /* _ASM_RISCV_HWEIGHT_H */
|