From 2cce9059599143aa950b0baaf2523b17ab47d27d Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Tue, 14 May 2024 12:24:18 +0800 Subject: [PATCH 01/11] LoongArch: Select ARCH_HAS_FAST_MULTIPLIER LA464 and LA664 can do 32-bit/64-bit integer multiplication with a latency of 4 cycles and a throughput of 2 ops per cycle. It is comparable to the mainstream x86 and arm64 cores, so we can select ARCH_HAS_FAST_MULTIPLIER like them. It speeds up __sw_hweight32() in lib/hweight.c for about 14% on LA464 and 11% on LA664, while __sw_hweight64() for about 30% on LA464 and 33% on LA664. Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 54ad04dacdee..1355ec0c69cc 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -16,6 +16,7 @@ config LOONGARCH select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CURRENT_STACK_POINTER + select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KCOV select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS From 5125d033c8af733ee4d52e3e3c6ebf5784976e46 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Tue, 14 May 2024 12:24:18 +0800 Subject: [PATCH 02/11] LoongArch: Select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 This allows compiling a full 128-bit product of two 64-bit integers as a mul/mulh pair, instead of a nasty long sequence of 20+ instructions. However, after selecting ARCH_SUPPORTS_INT128, when optimizing for size the compiler generates calls to __ashlti3, __ashrti3, and __lshrti3 for shifting __int128 values, causing a link failure: loongarch64-unknown-linux-gnu-ld: kernel/sched/fair.o: in function `mul_u64_u32_shr': /include/linux/math64.h:161:(.text+0x5e4): undefined reference to `__lshrti3' So provide the implementation of these functions if ARCH_SUPPORTS_INT128. Closes: https://lore.kernel.org/loongarch/CAAhV-H5EZ=7OF7CSiYyZ8_+wWuenpo=K2WT8-6mAT4CvzUC_4g@mail.gmail.com/ Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/asm-prototypes.h | 6 +++ arch/loongarch/lib/Makefile | 2 + arch/loongarch/lib/tishift.S | 56 +++++++++++++++++++++ 4 files changed, 65 insertions(+) create mode 100644 arch/loongarch/lib/tishift.S diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 1355ec0c69cc..335a98b28167 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -57,6 +57,7 @@ config LOONGARCH select ARCH_SUPPORTS_ACPI select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_HUGETLBFS + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_LTO_CLANG_THIN select ARCH_SUPPORTS_NUMA_BALANCING diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h index cf8e1a4e7c19..51f224bcfc65 100644 --- a/arch/loongarch/include/asm/asm-prototypes.h +++ b/arch/loongarch/include/asm/asm-prototypes.h @@ -6,3 +6,9 @@ #include #include #include + +#ifdef CONFIG_ARCH_SUPPORTS_INT128 +__int128_t __ashlti3(__int128_t a, int b); +__int128_t __ashrti3(__int128_t a, int b); +__int128_t __lshrti3(__int128_t a, int b); +#endif diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index a77bf160bfc4..ccea3bbd4353 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -6,6 +6,8 @@ lib-y += delay.o memset.o memcpy.o memmove.o \ clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o +obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o + obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/loongarch/lib/tishift.S b/arch/loongarch/lib/tishift.S new file mode 100644 index 000000000000..fa1d310012bc --- /dev/null +++ b/arch/loongarch/lib/tishift.S @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +SYM_FUNC_START(__ashlti3) + srli.d t2, a0, 1 + nor t3, zero, a2 + sll.d t1, a1, a2 + srl.d t2, t2, t3 + andi t0, a2, 64 + sll.d a0, a0, a2 + or t1, t2, t1 + maskeqz a1, a0, t0 + masknez a0, a0, t0 + masknez t0, t1, t0 + or a1, t0, a1 + jr ra +SYM_FUNC_END(__ashlti3) +EXPORT_SYMBOL(__ashlti3) + +SYM_FUNC_START(__ashrti3) + nor t3, zero, a2 + slli.d t2, a1, 1 + srl.d t1, a0, a2 + sll.d t2, t2, t3 + andi t0, a2, 64 + or t1, t2, t1 + sra.d a2, a1, a2 + srai.d a1, a1, 63 + maskeqz a0, a2, t0 + maskeqz a1, a1, t0 + masknez a2, a2, t0 + masknez t0, t1, t0 + or a1, a1, a2 + or a0, t0, a0 + jr ra +SYM_FUNC_END(__ashrti3) +EXPORT_SYMBOL(__ashrti3) + +SYM_FUNC_START(__lshrti3) + slli.d t2, a1, 1 + nor t3, zero, a2 + srl.d t1, a0, a2 + sll.d t2, t2, t3 + andi t0, a2, 64 + srl.d a1, a1, a2 + or t1, t2, t1 + maskeqz a0, a1, t0 + masknez a1, a1, t0 + masknez t0, t1, t0 + or a0, t0, a0 + jr ra +SYM_FUNC_END(__lshrti3) +EXPORT_SYMBOL(__lshrti3) From d0b35b024725fda19c44f7144dd35d13c7e920ba Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 14 May 2024 12:24:18 +0800 Subject: [PATCH 03/11] LoongArch: Select ARCH_WANT_DEFAULT_BPF_JIT BPF JIT has better performance and more secure than BPF interpreter, so enable it by default, as most other architectures done. Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 335a98b28167..24bce573dc2b 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -65,6 +65,7 @@ config LOONGARCH select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP From ff4a2443ef88874943f6d8444fb86327ec6f96b0 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 14 May 2024 12:24:18 +0800 Subject: [PATCH 04/11] LoongArch: Select THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE THP_SWAP has been proven to improve the swap throughput significantly on x86_64 system according to commit bd4c82c22c367e0 ("mm, THP, swap: delay splitting THP after swapped out"), on ARM64 system according to commit d0637c505f8a1d ("arm64: enable THP_SWAP for arm64") and on RISC-V system according to commit 87f81e66e2e84c7 ("riscv: enable THP_SWAP for RV64"). Enable THP_SWAP for LoongArch, testing the micro-benchmark which is introduced by commit d0637c505f8a1d ("arm64: enable THP_SWAP for arm64") shows below numbers on the Loongson-3A5000 board: swp out bandwidth w/o patch: 1815716 bytes/ms (mean of 10 tests) swp out bandwidth w/ patch: 3410003 bytes/ms (mean of 10 tests) Improved by 46.75%! Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 24bce573dc2b..e22330064e98 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -70,6 +70,7 @@ config LOONGARCH select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP select ARCH_WANTS_NO_INSTR + select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE select BUILDTIME_TABLE_SORT select COMMON_CLK select CPU_PM From 5685d7fcb55fd729d7e0452c157a0ac8d72ca7b6 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 14 May 2024 12:24:18 +0800 Subject: [PATCH 05/11] LoongArch: Give a chance to build with !CONFIG_SMP In the current code, SMP is selected in Kconfig for LoongArch, the users can not unset it, this is reasonable for a multi-processor machine. But as the help info of config SMP said, if you have a system with only one CPU, say N. On a uni-processor machine, the kernel will run faster if you say N here. Loongson-2K0500 is a single-core CPU for applications like industrial control, printing terminals, and BMC (Baseboard Management Controller), there are many development boards, products and solutions on the market, so it is better and necessary to give a chance to build with !CONFIG_SMP for a uni-processor machine. First of all, do not select SMP for config LOONGARCH in Kconfig to make it possible to unset CONFIG_SMP. Then, do some changes to fix warnings and errors if CONFIG_SMP is not set. (1) Define get_ipi_irq() only if CONFIG_SMP is set to fix the warning: arch/loongarch/kernel/irq.c:90:19: warning: 'get_ipi_irq' defined but not used [-Wunused-function] (2) Add "#ifdef CONFIG_SMP" in asm/smp.h to fix the warning: ./arch/loongarch/include/asm/smp.h:49:9: warning: "raw_smp_processor_id" redefined 49 | #define raw_smp_processor_id raw_smp_processor_id | ^~~~~~~~~~~~~~~~~~~~ ./include/linux/smp.h:198:9: note: this is the location of the previous definition 198 | #define raw_smp_processor_id() 0 (3) Define machine_shutdown() as empty under !CONFIG_SMP to fix the error: arch/loongarch/kernel/machine_kexec.c: In function 'machine_shutdown': arch/loongarch/kernel/machine_kexec.c:233:25: error: implicit declaration of function 'cpu_device_up'; did you mean 'put_device'? [-Wimplicit-function-declaration] (4) Make config SCHED_SMT depends on SMP to fix many errors such as: kernel/sched/core.c: In function 'sched_core_find': kernel/sched/core.c:310:43: error: 'struct rq' has no member named 'cpu' (5) Define cpu_logical_map(cpu) as 0 under !CONFIG_SMP in asm/smp.h, then include asm/smp.h in asm/acpi.h (because acpi.h is included in linux/irq.h indirectly) to fix many build errors under drivers/irqchip such as: drivers/irqchip/irq-loongson-eiointc.c: In function 'cpu_to_eio_node': drivers/irqchip/irq-loongson-eiointc.c:59:16: error: implicit declaration of function 'cpu_logical_map' [-Wimplicit-function-declaration] (6) Do not write per_cpu_offset(0) to PERCPU_BASE_KS when resume because the per_cpu_offset(x) macro is defined as (__per_cpu_offset[x]) only under CONFIG_SMP in include/asm-generic/percpu.h. Just save the value of PERCPU_BASE_KS when suspend and restore it when resume to fix the error: arch/loongarch/power/suspend.c: In function 'loongarch_common_resume': arch/loongarch/power/suspend.c:47:21: error: implicit declaration of function 'per_cpu_offset' [-Wimplicit-function-declaration] (7) Fix huge page handling under !CONFIG_SMP in tlbex.S. When running the UnixBench tests with "-c 1" single-streamed pass, the improvement of performance is about 9 percent with this patch. By the way, it is helpful to debug and analysis the kernel issues of multi-processor system under !CONFIG_SMP. Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 +- arch/loongarch/include/asm/acpi.h | 1 + arch/loongarch/include/asm/smp.h | 6 ++++++ arch/loongarch/kernel/irq.c | 2 ++ arch/loongarch/kernel/machine_kexec.c | 2 +- arch/loongarch/mm/tlbex.S | 9 ++++++--- arch/loongarch/power/suspend.c | 4 +++- 7 files changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index e22330064e98..73246b21aae8 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -178,7 +178,6 @@ config LOONGARCH select PCI_QUIRKS select PERF_USE_VMALLOC select RTC_LIB - select SMP select SPARSE_IRQ select SYSCTL_ARCH_UNALIGN_ALLOW select SYSCTL_ARCH_UNALIGN_NO_WARN @@ -424,6 +423,7 @@ config EFI_STUB config SCHED_SMT bool "SMT scheduler support" + depends on SMP default y help Improves scheduler's performance when there are multiple diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 49e29b29996f..313f66f7913a 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -8,6 +8,7 @@ #ifndef _ASM_LOONGARCH_ACPI_H #define _ASM_LOONGARCH_ACPI_H +#include #include #ifdef CONFIG_ACPI diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index f81e5f01d619..b3a0868d29ea 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -6,6 +6,8 @@ #ifndef __ASM_SMP_H #define __ASM_SMP_H +#ifdef CONFIG_SMP + #include #include #include @@ -101,4 +103,8 @@ static inline void __cpu_die(unsigned int cpu) } #endif +#else /* !CONFIG_SMP */ +#define cpu_logical_map(cpu) 0 +#endif /* CONFIG_SMP */ + #endif /* __ASM_SMP_H */ diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c index 883e5066ae44..e791fa275ec5 100644 --- a/arch/loongarch/kernel/irq.c +++ b/arch/loongarch/kernel/irq.c @@ -87,6 +87,7 @@ static void __init init_vec_parent_group(void) acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse); } +#ifdef CONFIG_SMP static int __init get_ipi_irq(void) { struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); @@ -96,6 +97,7 @@ static int __init get_ipi_irq(void) return -EINVAL; } +#endif void __init init_IRQ(void) { diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c index 2dcb9e003657..8ae641dc53bb 100644 --- a/arch/loongarch/kernel/machine_kexec.c +++ b/arch/loongarch/kernel/machine_kexec.c @@ -225,6 +225,7 @@ void crash_smp_send_stop(void) void machine_shutdown(void) { +#ifdef CONFIG_SMP int cpu; /* All CPUs go to reboot_code_buffer */ @@ -232,7 +233,6 @@ void machine_shutdown(void) if (!cpu_online(cpu)) cpu_device_up(get_cpu_device(cpu)); -#ifdef CONFIG_SMP smp_call_function(kexec_shutdown_secondary, NULL, 0); #endif } diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index a44387b838af..c08682a89c58 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -125,6 +125,8 @@ vmalloc_load: tlb_huge_update_load: #ifdef CONFIG_SMP ll.d ra, t1, 0 +#else + rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) #endif andi t0, ra, _PAGE_PRESENT beqz t0, nopage_tlb_load @@ -135,7 +137,6 @@ tlb_huge_update_load: beqz t0, tlb_huge_update_load ori t0, ra, _PAGE_VALID #else - rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) ori t0, ra, _PAGE_VALID st.d t0, t1, 0 #endif @@ -281,6 +282,8 @@ vmalloc_store: tlb_huge_update_store: #ifdef CONFIG_SMP ll.d ra, t1, 0 +#else + rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) #endif andi t0, ra, _PAGE_PRESENT | _PAGE_WRITE xori t0, t0, _PAGE_PRESENT | _PAGE_WRITE @@ -292,7 +295,6 @@ tlb_huge_update_store: beqz t0, tlb_huge_update_store ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) #else - rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) st.d t0, t1, 0 #endif @@ -438,6 +440,8 @@ vmalloc_modify: tlb_huge_update_modify: #ifdef CONFIG_SMP ll.d ra, t1, 0 +#else + rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) #endif andi t0, ra, _PAGE_WRITE beqz t0, nopage_tlb_modify @@ -448,7 +452,6 @@ tlb_huge_update_modify: beqz t0, tlb_huge_update_modify ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) #else - rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) st.d t0, t1, 0 #endif diff --git a/arch/loongarch/power/suspend.c b/arch/loongarch/power/suspend.c index 166d9e06a64b..c9e594925c47 100644 --- a/arch/loongarch/power/suspend.c +++ b/arch/loongarch/power/suspend.c @@ -24,6 +24,7 @@ struct saved_registers { u64 kpgd; u32 pwctl0; u32 pwctl1; + u64 pcpu_base; }; static struct saved_registers saved_regs; @@ -36,6 +37,7 @@ void loongarch_common_suspend(void) saved_regs.pwctl1 = csr_read32(LOONGARCH_CSR_PWCTL1); saved_regs.ecfg = csr_read32(LOONGARCH_CSR_ECFG); saved_regs.euen = csr_read32(LOONGARCH_CSR_EUEN); + saved_regs.pcpu_base = csr_read64(PERCPU_BASE_KS); loongarch_suspend_addr = loongson_sysconf.suspend_addr; } @@ -44,7 +46,6 @@ void loongarch_common_resume(void) { sync_counter(); local_flush_tlb_all(); - csr_write64(per_cpu_offset(0), PERCPU_BASE_KS); csr_write64(eentry, LOONGARCH_CSR_EENTRY); csr_write64(eentry, LOONGARCH_CSR_MERRENTRY); csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY); @@ -55,6 +56,7 @@ void loongarch_common_resume(void) csr_write32(saved_regs.pwctl1, LOONGARCH_CSR_PWCTL1); csr_write32(saved_regs.ecfg, LOONGARCH_CSR_ECFG); csr_write32(saved_regs.euen, LOONGARCH_CSR_EUEN); + csr_write64(saved_regs.pcpu_base, PERCPU_BASE_KS); } int loongarch_acpi_suspend(void) From d6af2c76399f98444a5b4de96baf4b362d9f102b Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 14 May 2024 12:24:18 +0800 Subject: [PATCH 06/11] LoongArch: Fix callchain parse error with kernel tracepoint events again With commit d3119bc985fb645 ("LoongArch: Fix callchain parse error with kernel tracepoint events"), perf can parse kernel callchain, but not complete and sometimes maybe error. The reason is LoongArch's unwinders (guess, prologue and orc) don't really need fp (i.e., regs[22]), and they use sp (i.e., regs[3]) as the frame address rather than the current stack pointer. Fix that by removing the assignment of regs[22], and instead assign the __builtin_frame_address(0) to regs[3]. Without fix: Children Self Command Shared Object Symbol ........ ........ ............. ................. ................ 33.91% 33.91% swapper [kernel.vmlinux] [k] __schedule | |--33.04%--__schedule | --0.87%--__arch_cpu_idle __schedule With this fix: Children Self Command Shared Object Symbol ........ ........ ............. ................. ................ 31.16% 31.16% swapper [kernel.vmlinux] [k] __schedule | |--20.63%--smpboot_entry | cpu_startup_entry | schedule_idle | __schedule | --10.53%--start_kernel cpu_startup_entry schedule_idle __schedule Fixes: d3119bc985fb645 ("LoongArch: Fix callchain parse error with kernel tracepoint events") Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/perf_event.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/perf_event.h b/arch/loongarch/include/asm/perf_event.h index 52b638059e40..f948a0676daf 100644 --- a/arch/loongarch/include/asm/perf_event.h +++ b/arch/loongarch/include/asm/perf_event.h @@ -13,8 +13,7 @@ #define perf_arch_fetch_caller_regs(regs, __ip) { \ (regs)->csr_era = (__ip); \ - (regs)->regs[3] = current_stack_pointer; \ - (regs)->regs[22] = (unsigned long) __builtin_frame_address(0); \ + (regs)->regs[3] = (unsigned long) __builtin_frame_address(0); \ } #endif /* __LOONGARCH_PERF_EVENT_H__ */ From 8f8d74ee110c02137f5b78ca0a2bd6c10331f267 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Tue, 14 May 2024 12:24:18 +0800 Subject: [PATCH 07/11] LoongArch: rust: Switch to use built-in rustc target This commit switches to use the LoongArch's built-in rustc target 'loongarch64-unknown-none-softfloat'. The Rust samples have been tested. Acked-by: Miguel Ojeda Tested-by: Miguel Ojeda Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 2 +- rust/Makefile | 2 +- scripts/Makefile | 2 +- scripts/generate_rust_target.rs | 7 +------ 4 files changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index df6caf79537a..4347915721bd 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -101,7 +101,7 @@ ifdef CONFIG_OBJTOOL KBUILD_CFLAGS += -fno-jump-tables endif -KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json +KBUILD_RUSTFLAGS += --target=loongarch64-unknown-none-softfloat KBUILD_RUSTFLAGS_MODULE += -Crelocation-model=pic ifeq ($(CONFIG_RELOCATABLE),y) diff --git a/rust/Makefile b/rust/Makefile index 86a125c4243c..a1eab64140ad 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -436,7 +436,7 @@ $(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--re $(obj)/core.o: private rustc_target_flags = $(core-cfgs) $(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs FORCE +$(call if_changed_dep,rustc_library) -ifneq ($(or $(CONFIG_X86_64),$(CONFIG_LOONGARCH)),) +ifdef CONFIG_X86_64 $(obj)/core.o: scripts/target.json endif diff --git a/scripts/Makefile b/scripts/Makefile index bc90520a5426..fe56eeef09dd 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -12,7 +12,7 @@ hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_builder hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_gen -ifneq ($(or $(CONFIG_X86_64),$(CONFIG_LOONGARCH)),) +ifdef CONFIG_X86_64 always-$(CONFIG_RUST) += target.json filechk_rust_target = $< < include/config/auto.conf diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs index 54919cf48621..acd3b1acef83 100644 --- a/scripts/generate_rust_target.rs +++ b/scripts/generate_rust_target.rs @@ -164,12 +164,7 @@ fn main() { ts.push("llvm-target", "x86_64-linux-gnu"); ts.push("target-pointer-width", "64"); } else if cfg.has("LOONGARCH") { - ts.push("arch", "loongarch64"); - ts.push("data-layout", "e-m:e-p:64:64-i64:64-i128:128-n64-S128"); - ts.push("features", "-f,-d"); - ts.push("llvm-target", "loongarch64-linux-gnusf"); - ts.push("llvm-abiname", "lp64s"); - ts.push("target-pointer-width", "64"); + panic!("loongarch uses the builtin rustc loongarch64-unknown-none-softfloat target"); } else { panic!("Unsupported architecture"); } From 3e4d599c1c26afcda5b7be7fa2c04946eb166d4a Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Tue, 14 May 2024 12:24:18 +0800 Subject: [PATCH 08/11] LoongArch: dts: Remove "disabled" state of clock controller node Things like clock controllers or architectural interrupt controllers, no one would disable them because otherwise they would have no usable system. So we just "enabled" them by default. Suggested-by: Conor Dooley Signed-off-by: Binbin Zhou Signed-off-by: Huacai Chen --- arch/loongarch/boot/dts/loongson-2k1000-ref.dts | 4 ---- arch/loongarch/boot/dts/loongson-2k1000.dtsi | 1 - 2 files changed, 5 deletions(-) diff --git a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts index ed4d32434041..8463fe035386 100644 --- a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts @@ -113,10 +113,6 @@ status = "okay"; }; -&clk { - status = "okay"; -}; - &rtc0 { status = "okay"; }; diff --git a/arch/loongarch/boot/dts/loongson-2k1000.dtsi b/arch/loongarch/boot/dts/loongson-2k1000.dtsi index b6aeb1f70e2a..92180140eb56 100644 --- a/arch/loongarch/boot/dts/loongson-2k1000.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k1000.dtsi @@ -159,7 +159,6 @@ #clock-cells = <1>; clocks = <&ref_100m>; clock-names = "ref_100m"; - status = "disabled"; }; gpio0: gpio@1fe00500 { From bd7bc02b0cfe141c077187244ddd2022102618fe Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Tue, 14 May 2024 12:24:18 +0800 Subject: [PATCH 09/11] LoongArch: dts: Add new supported device nodes to Loongson-2K0500 By now, more Loongson-2K0500 related drivers are supported, such as clock controller, thermal controller, and dma controller. So we add these device nodes to the Loongson-2K0500 dts file. Signed-off-by: Binbin Zhou Signed-off-by: Huacai Chen --- arch/loongarch/boot/dts/loongson-2k0500.dtsi | 86 +++++++++++++++++++- 1 file changed, 83 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/boot/dts/loongson-2k0500.dtsi b/arch/loongarch/boot/dts/loongson-2k0500.dtsi index 444779c21034..3b38ff8853a7 100644 --- a/arch/loongarch/boot/dts/loongson-2k0500.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k0500.dtsi @@ -6,6 +6,7 @@ /dts-v1/; #include +#include / { #address-cells = <2>; @@ -19,14 +20,15 @@ compatible = "loongson,la264"; device_type = "cpu"; reg = <0x0>; - clocks = <&cpu_clk>; + clocks = <&clk LOONGSON2_NODE_CLK>; }; }; - cpu_clk: cpu-clk { + ref_100m: clock-ref-100m { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <500000000>; + clock-frequency = <100000000>; + clock-output-names = "ref_100m"; }; cpuintc: interrupt-controller { @@ -35,6 +37,28 @@ interrupt-controller; }; + thermal-zones { + cpu-thermal { + polling-delay-passive = <1000>; + polling-delay = <5000>; + thermal-sensors = <&tsensor 0>; + + trips { + cpu-alert { + temperature = <33000>; + hysteresis = <2000>; + type = "active"; + }; + + cpu-crit { + temperature = <85000>; + hysteresis = <5000>; + type = "critical"; + }; + }; + }; + }; + bus@10000000 { compatible = "simple-bus"; ranges = <0x0 0x10000000 0x0 0x10000000 0x0 0x10000000>, @@ -52,6 +76,54 @@ ranges = <1 0x0 0x0 0x16400000 0x4000>; }; + clk: clock-controller@1fe10400 { + compatible = "loongson,ls2k0500-clk"; + reg = <0x0 0x1fe10400 0x0 0x2c>; + #clock-cells = <1>; + clocks = <&ref_100m>; + clock-names = "ref_100m"; + }; + + dma-controller@1fe10c00 { + compatible = "loongson,ls2k0500-apbdma", "loongson,ls2k1000-apbdma"; + reg = <0 0x1fe10c00 0 0x8>; + interrupt-parent = <&eiointc>; + interrupts = <67>; + clocks = <&clk LOONGSON2_APB_CLK>; + #dma-cells = <1>; + status = "disabled"; + }; + + dma-controller@1fe10c10 { + compatible = "loongson,ls2k0500-apbdma", "loongson,ls2k1000-apbdma"; + reg = <0 0x1fe10c10 0 0x8>; + interrupt-parent = <&eiointc>; + interrupts = <68>; + clocks = <&clk LOONGSON2_APB_CLK>; + #dma-cells = <1>; + status = "disabled"; + }; + + dma-controller@1fe10c20 { + compatible = "loongson,ls2k0500-apbdma", "loongson,ls2k1000-apbdma"; + reg = <0 0x1fe10c20 0 0x8>; + interrupt-parent = <&eiointc>; + interrupts = <69>; + clocks = <&clk LOONGSON2_APB_CLK>; + #dma-cells = <1>; + status = "disabled"; + }; + + dma-controller@1fe10c30 { + compatible = "loongson,ls2k0500-apbdma", "loongson,ls2k1000-apbdma"; + reg = <0 0x1fe10c30 0 0x8>; + interrupt-parent = <&eiointc>; + interrupts = <70>; + clocks = <&clk LOONGSON2_APB_CLK>; + #dma-cells = <1>; + status = "disabled"; + }; + liointc0: interrupt-controller@1fe11400 { compatible = "loongson,liointc-2.0"; reg = <0x0 0x1fe11400 0x0 0x40>, @@ -139,6 +211,14 @@ status = "disabled"; }; + tsensor: thermal-sensor@1fe11500 { + compatible = "loongson,ls2k0500-thermal", "loongson,ls2k1000-thermal"; + reg = <0x0 0x1fe11500 0x0 0x30>; + interrupt-parent = <&liointc0>; + interrupts = <7 IRQ_TYPE_LEVEL_HIGH>; + #thermal-sensor-cells = <1>; + }; + uart0: serial@1ff40800 { compatible = "ns16550a"; reg = <0x0 0x1ff40800 0x0 0x10>; From 7c33c9111c8b5fecf4c7a40e31f69125c1b5a1b6 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Tue, 14 May 2024 12:24:24 +0800 Subject: [PATCH 10/11] LoongArch: dts: Add new supported device nodes to Loongson-2K2000 By now, more Loongson-2K2000 related drivers are supported, such as clock controller and thermal controller. So we add these device nodes to the Loongson-2K2000 dts file. Signed-off-by: Binbin Zhou Signed-off-by: Huacai Chen --- arch/loongarch/boot/dts/loongson-2k2000.dtsi | 49 ++++++++++++++++++-- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/boot/dts/loongson-2k2000.dtsi b/arch/loongarch/boot/dts/loongson-2k2000.dtsi index 9eab2d02cbe8..0953c5707825 100644 --- a/arch/loongarch/boot/dts/loongson-2k2000.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k2000.dtsi @@ -6,6 +6,7 @@ /dts-v1/; #include +#include / { #address-cells = <2>; @@ -19,21 +20,22 @@ compatible = "loongson,la364"; device_type = "cpu"; reg = <0x0>; - clocks = <&cpu_clk>; + clocks = <&clk LOONGSON2_NODE_CLK>; }; cpu1: cpu@2 { compatible = "loongson,la364"; device_type = "cpu"; reg = <0x1>; - clocks = <&cpu_clk>; + clocks = <&clk LOONGSON2_NODE_CLK>; }; }; - cpu_clk: cpu-clk { + ref_100m: clock-ref-100m { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <1400000000>; + clock-frequency = <100000000>; + clock-output-names = "ref_100m"; }; cpuintc: interrupt-controller { @@ -42,6 +44,28 @@ interrupt-controller; }; + thermal-zones { + cpu-thermal { + polling-delay-passive = <1000>; + polling-delay = <5000>; + thermal-sensors = <&tsensor 0>; + + trips { + cpu-alert { + temperature = <40000>; + hysteresis = <2000>; + type = "active"; + }; + + cpu-crit { + temperature = <85000>; + hysteresis = <5000>; + type = "critical"; + }; + }; + }; + }; + bus@10000000 { compatible = "simple-bus"; ranges = <0x0 0x10000000 0x0 0x10000000 0x0 0x10000000>, @@ -58,6 +82,14 @@ ranges = <1 0x0 0x0 0x18400000 0x4000>; }; + clk: clock-controller@10010480 { + compatible = "loongson,ls2k2000-clk"; + reg = <0x0 0x10010480 0x0 0x100>; + #clock-cells = <1>; + clocks = <&ref_100m>; + clock-names = "ref_100m"; + }; + pmc: power-management@100d0000 { compatible = "loongson,ls2k2000-pmc", "loongson,ls2k0500-pmc", "syscon"; reg = <0x0 0x100d0000 0x0 0x58>; @@ -80,6 +112,15 @@ }; }; + tsensor: thermal-sensor@1fe01460 { + compatible = "loongson,ls2k2000-thermal"; + reg = <0x0 0x1fe01460 0x0 0x30>, + <0x0 0x1fe0019c 0x0 0x4>; + interrupt-parent = <&liointc>; + interrupts = <7 IRQ_TYPE_LEVEL_HIGH>; + #thermal-sensor-cells = <1>; + }; + liointc: interrupt-controller@1fe01400 { compatible = "loongson,liointc-1.0"; reg = <0x0 0x1fe01400 0x0 0x64>; From 9cc1df421f00453afdcaf78b105d8e7fd03cce78 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 19 May 2024 22:18:56 +0800 Subject: [PATCH 11/11] LoongArch: Update Loongson-3 default config file 1, Enable PSI tracking. 2, Enable IKCONFIG/IKHEADERS. 3, Enable Generic PHY driver. 4, Enable Motorcomm PHY driver. 5, Enable ORC stack unwinder. 6, Enable some squashfs options. 7, Enable some netfilter options. Signed-off-by: Huacai Chen --- arch/loongarch/configs/loongson3_defconfig | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index f18c2ba871ef..d76adbb1ce92 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -14,6 +14,10 @@ CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_PSI=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_IKHEADERS=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y @@ -130,13 +134,22 @@ CONFIG_IP_MROUTE=y CONFIG_IP_MROUTE_MULTIPLE_TABLES=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y +CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESP_OFFLOAD=m +CONFIG_INET_ESPINTCP=y +CONFIG_INET_IPCOMP=m CONFIG_INET_UDP_DIAG=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y +CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESP_OFFLOAD=m +CONFIG_INET6_ESPINTCP=y +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_MROUTE=y CONFIG_MPTCP=y CONFIG_NETWORK_PHY_TIMESTAMPING=y @@ -152,6 +165,8 @@ CONFIG_NF_CONNTRACK_PPTP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=y +CONFIG_NFT_CT=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -164,6 +179,7 @@ CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m +CONFIG_NFT_FIB_INET=m CONFIG_NFT_SOCKET=m CONFIG_NFT_OSF=m CONFIG_NFT_TPROXY=m @@ -260,6 +276,7 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_TABLES_IPV6=y +CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -280,6 +297,7 @@ CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_CONNTRACK_BRIDGE=m CONFIG_BRIDGE_NF_EBTABLES=m CONFIG_BRIDGE_EBT_BROUTE=m CONFIG_BRIDGE_EBT_T_FILTER=m @@ -550,6 +568,7 @@ CONFIG_NGBE=y CONFIG_TXGBE=y # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_NET_VENDOR_XILINX is not set +CONFIG_MOTORCOMM_PHY=y CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -811,6 +830,7 @@ CONFIG_NTB_SWITCHTEC=m CONFIG_NTB_PERF=m CONFIG_NTB_TRANSPORT=m CONFIG_PWM=y +CONFIG_GENERIC_PHY=y CONFIG_USB4=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y @@ -876,10 +896,13 @@ CONFIG_UBIFS_FS=m CONFIG_UBIFS_FS_ADVANCED_COMPR=y CONFIG_CRAMFS=m CONFIG_SQUASHFS=y +CONFIG_SQUASHFS_FILE_DIRECT=y +CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y CONFIG_SQUASHFS_XZ=y +CONFIG_SQUASHFS_ZSTD=y CONFIG_MINIX_FS=m CONFIG_ROMFS_FS=m CONFIG_PSTORE=m @@ -961,3 +984,4 @@ CONFIG_DEBUG_FS=y CONFIG_SCHEDSTATS=y # CONFIG_DEBUG_PREEMPT is not set # CONFIG_FTRACE is not set +CONFIG_UNWINDER_ORC=y