From e18048da9bc3f87acef4eb67a11b4fc55fe15424 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 23 Feb 2023 14:46:05 -0800 Subject: [PATCH 1/4] RISC-V: Stop emitting attributes The RISC-V ELF attributes don't contain any useful information. New toolchains ignore them, but they frequently trip up various older/mixed toolchains. So just turn them off. Tested-by: Conor Dooley Link: https://lore.kernel.org/r/20230223224605.6995-1-palmer@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Makefile | 7 +++++++ arch/riscv/kernel/compat_vdso/Makefile | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 6203c3378922..4de83b9b1772 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -84,6 +84,13 @@ endif # Avoid generating .eh_frame sections. KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables +# The RISC-V attributes frequently cause compatibility issues and provide no +# information, so just turn them off. +KBUILD_CFLAGS += $(call cc-option,-mno-riscv-attribute) +KBUILD_AFLAGS += $(call cc-option,-mno-riscv-attribute) +KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mno-arch-attr) +KBUILD_AFLAGS += $(call as-option,-Wa$(comma)-mno-arch-attr) + KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax) diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile index 260daf3236d3..7f34f3c7c882 100644 --- a/arch/riscv/kernel/compat_vdso/Makefile +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -14,6 +14,10 @@ COMPAT_LD := $(LD) COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32 COMPAT_LD_FLAGS := -melf32lriscv +# Disable attributes, as they're useless and break the build. +COMPAT_CC_FLAGS += $(call cc-option,-mno-riscv-attribute) +COMPAT_CC_FLAGS += $(call as-option,-Wa$(comma)-mno-arch-attr) + # Files to link into the compat_vdso obj-compat_vdso = $(patsubst %, %.o, $(compat_vdso-syms)) note.o From bf89b7ee52af5a5944fa3539e86089f72475055b Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 2 Mar 2023 17:41:55 +0000 Subject: [PATCH 2/4] RISC-V: fix taking the text_mutex twice during sifive errata patching Chris pointed out that some bonehead, *cough* me *cough*, added two mutex_locks() to the SiFive errata patching. The second was meant to have been a mutex_unlock(). This results in errors such as Unable to handle kernel NULL pointer dereference at virtual address 0000000000000030 Oops [#1] Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 6.2.0-rc1-starlight-00079-g9493e6f3ce02 #229 Hardware name: BeagleV Starlight Beta (DT) epc : __schedule+0x42/0x500 ra : schedule+0x46/0xce epc : ffffffff8065957c ra : ffffffff80659a80 sp : ffffffff81203c80 gp : ffffffff812d50a0 tp : ffffffff8120db40 t0 : ffffffff81203d68 t1 : 0000000000000001 t2 : 4c45203a76637369 s0 : ffffffff81203cf0 s1 : ffffffff8120db40 a0 : 0000000000000000 a1 : ffffffff81213958 a2 : ffffffff81213958 a3 : 0000000000000000 a4 : 0000000000000000 a5 : ffffffff80a1bd00 a6 : 0000000000000000 a7 : 0000000052464e43 s2 : ffffffff8120db41 s3 : ffffffff80a1ad00 s4 : 0000000000000000 s5 : 0000000000000002 s6 : ffffffff81213938 s7 : 0000000000000000 s8 : 0000000000000000 s9 : 0000000000000001 s10: ffffffff812d7204 s11: ffffffff80d3c920 t3 : 0000000000000001 t4 : ffffffff812e6dd7 t5 : ffffffff812e6dd8 t6 : ffffffff81203bb8 status: 0000000200000100 badaddr: 0000000000000030 cause: 000000000000000d [] schedule+0x46/0xce [] schedule_preempt_disabled+0x16/0x28 [] __mutex_lock.constprop.0+0x3fe/0x652 [] __mutex_lock_slowpath+0xe/0x16 [] mutex_lock+0x42/0x4c [] sifive_errata_patch_func+0xf6/0x18c [] _apply_alternatives+0x74/0x76 [] apply_boot_alternatives+0x3c/0xfa [] setup_arch+0x60c/0x640 [] start_kernel+0x8e/0x99c ---[ end trace 0000000000000000 ]--- Reported-by: Chris Hofstaedtler Fixes: 9493e6f3ce02 ("RISC-V: take text_mutex during alternative patching") Signed-off-by: Conor Dooley Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230302174154.970746-1-conor@kernel.org [Palmer: pick up Geert's bug report from the thread] Signed-off-by: Palmer Dabbelt --- arch/riscv/errata/sifive/errata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c index da55cb247e89..31d2ebea4286 100644 --- a/arch/riscv/errata/sifive/errata.c +++ b/arch/riscv/errata/sifive/errata.c @@ -111,7 +111,7 @@ void __init_or_module sifive_errata_patch_func(struct alt_entry *begin, mutex_lock(&text_mutex); patch_text_nosync(ALT_OLD_PTR(alt), ALT_ALT_PTR(alt), alt->alt_len); - mutex_lock(&text_mutex); + mutex_unlock(&text_mutex); cpu_apply_errata |= tmp; } } From 76950340cf03b149412fe0d5f0810e52ac1df8cb Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 8 Mar 2023 10:16:39 +0100 Subject: [PATCH 3/4] riscv: Use READ_ONCE_NOCHECK in imprecise unwinding stack mode When CONFIG_FRAME_POINTER is unset, the stack unwinding function walk_stackframe randomly reads the stack and then, when KASAN is enabled, it can lead to the following backtrace: [ 0.000000] ================================================================== [ 0.000000] BUG: KASAN: stack-out-of-bounds in walk_stackframe+0xa6/0x11a [ 0.000000] Read of size 8 at addr ffffffff81807c40 by task swapper/0 [ 0.000000] [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.2.0-12919-g24203e6db61f #43 [ 0.000000] Hardware name: riscv-virtio,qemu (DT) [ 0.000000] Call Trace: [ 0.000000] [] walk_stackframe+0x0/0x11a [ 0.000000] [] init_param_lock+0x26/0x2a [ 0.000000] [] walk_stackframe+0xa2/0x11a [ 0.000000] [] dump_stack_lvl+0x22/0x36 [ 0.000000] [] print_report+0x198/0x4a8 [ 0.000000] [] init_param_lock+0x26/0x2a [ 0.000000] [] walk_stackframe+0xa2/0x11a [ 0.000000] [] kasan_report+0x9a/0xc8 [ 0.000000] [] walk_stackframe+0xa2/0x11a [ 0.000000] [] walk_stackframe+0xa2/0x11a [ 0.000000] [] desc_make_final+0x80/0x84 [ 0.000000] [] stack_trace_save+0x88/0xa6 [ 0.000000] [] filter_irq_stacks+0x72/0x76 [ 0.000000] [] devkmsg_read+0x32a/0x32e [ 0.000000] [] kasan_save_stack+0x28/0x52 [ 0.000000] [] desc_make_final+0x7c/0x84 [ 0.000000] [] stack_trace_save+0x84/0xa6 [ 0.000000] [] kasan_set_track+0x12/0x20 [ 0.000000] [] __kasan_slab_alloc+0x58/0x5e [ 0.000000] [] __kmem_cache_create+0x21e/0x39a [ 0.000000] [] create_boot_cache+0x70/0x9c [ 0.000000] [] kmem_cache_init+0x6c/0x11e [ 0.000000] [] mm_init+0xd8/0xfe [ 0.000000] [] start_kernel+0x190/0x3ca [ 0.000000] [ 0.000000] The buggy address belongs to stack of task swapper/0 [ 0.000000] and is located at offset 0 in frame: [ 0.000000] stack_trace_save+0x0/0xa6 [ 0.000000] [ 0.000000] This frame has 1 object: [ 0.000000] [32, 56) 'c' [ 0.000000] [ 0.000000] The buggy address belongs to the physical page: [ 0.000000] page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x81a07 [ 0.000000] flags: 0x1000(reserved|zone=0) [ 0.000000] raw: 0000000000001000 ff600003f1e3d150 ff600003f1e3d150 0000000000000000 [ 0.000000] raw: 0000000000000000 0000000000000000 00000001ffffffff [ 0.000000] page dumped because: kasan: bad access detected [ 0.000000] [ 0.000000] Memory state around the buggy address: [ 0.000000] ffffffff81807b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 0.000000] ffffffff81807b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 0.000000] >ffffffff81807c00: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 f3 [ 0.000000] ^ [ 0.000000] ffffffff81807c80: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 [ 0.000000] ffffffff81807d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 0.000000] ================================================================== Fix that by using READ_ONCE_NOCHECK when reading the stack in imprecise mode. Fixes: 5d8544e2d007 ("RISC-V: Generic library routines and assembly") Reported-by: Chathura Rajapaksha Link: https://lore.kernel.org/all/CAD7mqryDQCYyJ1gAmtMm8SASMWAQ4i103ptTb0f6Oda=tPY2=A@mail.gmail.com/ Suggested-by: Dmitry Vyukov Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230308091639.602024-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index f9a5a7c90ff0..64a9c093aef9 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -101,7 +101,7 @@ void notrace walk_stackframe(struct task_struct *task, while (!kstack_end(ksp)) { if (__kernel_text_address(pc) && unlikely(!fn(arg, pc))) break; - pc = (*ksp++) - 0x4; + pc = READ_ONCE_NOCHECK(*ksp++) - 0x4; } } From 2a8db5ec4a28a0fce822d10224db9471a44b6925 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Fri, 3 Mar 2023 14:37:55 +0000 Subject: [PATCH 4/4] RISC-V: Don't check text_mutex during stop_machine We're currently using stop_machine() to update ftrace & kprobes, which means that the thread that takes text_mutex during may not be the same as the thread that eventually patches the code. This isn't actually a race because the lock is still held (preventing any other concurrent accesses) and there is only one thread running during stop_machine(), but it does trigger a lockdep failure. This patch just elides the lockdep check during stop_machine. Fixes: c15ac4fd60d5 ("riscv/ftrace: Add dynamic function tracer support") Suggested-by: Steven Rostedt Reported-by: Changbin Du Signed-off-by: Palmer Dabbelt Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20230303143754.4005217-1-conor.dooley@microchip.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/ftrace.h | 2 +- arch/riscv/include/asm/patch.h | 2 ++ arch/riscv/kernel/ftrace.c | 13 +++++++++++-- arch/riscv/kernel/patch.c | 28 +++++++++++++++++++++++++--- 4 files changed, 39 insertions(+), 6 deletions(-) diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 9e73922e1e2e..d47d87c2d7e3 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -109,6 +109,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #define ftrace_init_nop ftrace_init_nop #endif -#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* _ASM_RISCV_FTRACE_H */ diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h index f433121774c0..63c98833d510 100644 --- a/arch/riscv/include/asm/patch.h +++ b/arch/riscv/include/asm/patch.h @@ -9,4 +9,6 @@ int patch_text_nosync(void *addr, const void *insns, size_t len); int patch_text(void *addr, u32 *insns, int ninsns); +extern int riscv_patch_in_stop_machine; + #endif /* _ASM_RISCV_PATCH_H */ diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 5bff37af4770..03a6434a8cdd 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -15,10 +15,19 @@ void ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) { mutex_lock(&text_mutex); + + /* + * The code sequences we use for ftrace can't be patched while the + * kernel is running, so we need to use stop_machine() to modify them + * for now. This doesn't play nice with text_mutex, we use this flag + * to elide the check. + */ + riscv_patch_in_stop_machine = true; } void ftrace_arch_code_modify_post_process(void) __releases(&text_mutex) { + riscv_patch_in_stop_machine = false; mutex_unlock(&text_mutex); } @@ -107,9 +116,9 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) { int out; - ftrace_arch_code_modify_prepare(); + mutex_lock(&text_mutex); out = ftrace_make_nop(mod, rec, MCOUNT_ADDR); - ftrace_arch_code_modify_post_process(); + mutex_unlock(&text_mutex); return out; } diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 8086d1a281cd..575e71d6c8ae 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -11,6 +11,7 @@ #include #include #include +#include #include struct patch_insn { @@ -20,6 +21,8 @@ struct patch_insn { atomic_t cpu_count; }; +int riscv_patch_in_stop_machine = false; + #ifdef CONFIG_MMU /* * The fix_to_virt(, idx) needs a const value (not a dynamic variable of @@ -60,8 +63,15 @@ static int patch_insn_write(void *addr, const void *insn, size_t len) * Before reaching here, it was expected to lock the text_mutex * already, so we don't need to give another lock here and could * ensure that it was safe between each cores. + * + * We're currently using stop_machine() for ftrace & kprobes, and while + * that ensures text_mutex is held before installing the mappings it + * does not ensure text_mutex is held by the calling thread. That's + * safe but triggers a lockdep failure, so just elide it for that + * specific case. */ - lockdep_assert_held(&text_mutex); + if (!riscv_patch_in_stop_machine) + lockdep_assert_held(&text_mutex); if (across_pages) patch_map(addr + len, FIX_TEXT_POKE1); @@ -125,6 +135,7 @@ NOKPROBE_SYMBOL(patch_text_cb); int patch_text(void *addr, u32 *insns, int ninsns) { + int ret; struct patch_insn patch = { .addr = addr, .insns = insns, @@ -132,7 +143,18 @@ int patch_text(void *addr, u32 *insns, int ninsns) .cpu_count = ATOMIC_INIT(0), }; - return stop_machine_cpuslocked(patch_text_cb, - &patch, cpu_online_mask); + /* + * kprobes takes text_mutex, before calling patch_text(), but as we call + * calls stop_machine(), the lockdep assertion in patch_insn_write() + * gets confused by the context in which the lock is taken. + * Instead, ensure the lock is held before calling stop_machine(), and + * set riscv_patch_in_stop_machine to skip the check in + * patch_insn_write(). + */ + lockdep_assert_held(&text_mutex); + riscv_patch_in_stop_machine = true; + ret = stop_machine_cpuslocked(patch_text_cb, &patch, cpu_online_mask); + riscv_patch_in_stop_machine = false; + return ret; } NOKPROBE_SYMBOL(patch_text);