From f9899c028151468d8c4af0bcbb3d5e87619b0973 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Fri, 26 Jan 2024 14:44:51 +0800 Subject: [PATCH] NUMA: early use of cpu_to_node() returns 0 instead of the correct node id During the kernel booting, the generic cpu_to_node() is called too early in arm64, powerpc and riscv when CONFIG_NUMA is enabled. There are at least four places in the common code where the generic cpu_to_node() is called before it is initialized: 1.) early_trace_init() in kernel/trace/trace.c 2.) sched_init() in kernel/sched/core.c 3.) init_sched_fair_class() in kernel/sched/fair.c 4.) workqueue_init_early() in kernel/workqueue.c This will harm performance since there is an increase in off node accesses. In order to fix the bug, the patch introduces early_numa_node_init() which is called after smp_prepare_boot_cpu() in start_kernel. early_numa_node_init will initialize the "numa_node" as soon as the early_cpu_to_node() is ready, before the cpu_to_node() is called at the first time. Link: https://lkml.kernel.org/r/20240126064451.5465-1-shijie@os.amperecomputing.com Signed-off-by: Huang Shijie Acked-by: Palmer Dabbelt [RISC-V] Cc: Albert Ou Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Greg Kroah-Hartman Cc: Huacai Chen Cc: Ingo Molnar Cc: Jakub Kicinski Cc: Jiaxun Yang Cc: Josh Poimboeuf Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Michael Ellerman Cc: Michael Kelley (LINUX) Cc: "Mike Rapoport (IBM)" Cc: Nick Desaulniers Cc: Paul Walmsley Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Valentin Schneider Cc: Vlastimil Babka Cc: Will Deacon Cc: Yury Norov Signed-off-by: Andrew Morton --- init/main.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/init/main.c b/init/main.c index 6b4b656cef1d..0aecd2839c1f 100644 --- a/init/main.c +++ b/init/main.c @@ -882,6 +882,19 @@ static void __init print_unknown_bootoptions(void) memblock_free(unknown_options, len); } +static void __init early_numa_node_init(void) +{ +#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID +#ifndef cpu_to_node + int cpu; + + /* The early_cpu_to_node() should be ready here. */ + for_each_possible_cpu(cpu) + set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); +#endif +#endif +} + asmlinkage __visible __init __no_sanitize_address __noreturn __no_stack_protector void start_kernel(void) { @@ -912,6 +925,7 @@ void start_kernel(void) setup_nr_cpu_ids(); setup_per_cpu_areas(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ + early_numa_node_init(); boot_cpu_hotplug_init(); pr_notice("Kernel command line: %s\n", saved_command_line);