1 year ago · e25fc8b511
--- a/examples/utest/testcases/kernel/sched_sem_tc.c
+++ b/examples/utest/testcases/kernel/sched_sem_tc.c
@@ -29,7 +29,7 @@
 
				 #error the thread priority should at least be greater than idle
			
 
				 #endif
			
 
				 
			
 
				-static rt_atomic_t _star_counter = 1;
			
 
				+static rt_atomic_t _star_counter;
			
 
				 static struct rt_semaphore _thr_exit_sem;
			
 
				 static struct rt_semaphore _level_waiting[TEST_LEVEL_COUNTS];
			
 
				 static rt_thread_t _thread_matrix[TEST_LEVEL_COUNTS][KERN_TEST_CONCURRENT_THREADS];
			
@@ -157,6 +157,8 @@ static void scheduler_tc(void)
 
				 static rt_err_t utest_tc_init(void)
			
 
				 {
			
 
				     LOG_I("Setup environment...");
			
 
				+    _star_counter = 1;
			
 
				+    rt_memset(_load_average, 0, sizeof(_load_average));
			
 
				     rt_sem_init(&_thr_exit_sem, "test", 0, RT_IPC_FLAG_PRIO);
			
 
				 
			
 
				     for (size_t i = 0; i < TEST_LEVEL_COUNTS; i++)
			
--- a/libcpu/Kconfig
+++ b/libcpu/Kconfig
@@ -12,6 +12,9 @@ if ARCH_ARMV8 && ARCH_CPU_64BIT
 
				     config ARCH_HAVE_EFFICIENT_UNALIGNED_ACCESS
			
 
				         bool
			
 
				         default y
			
 
				+    config ARCH_USING_GENERIC_CPUID
			
 
				+        bool "Using generic cpuid implemenation"
			
 
				+        default n
			
 
				     endmenu
			
 
				 endif
			
 
				 
			
--- a/libcpu/aarch64/common/context_gcc.S
+++ b/libcpu/aarch64/common/context_gcc.S
@@ -44,7 +44,11 @@ int rt_hw_cpu_id(void)
 
				 .weak rt_hw_cpu_id
			
 
				 .type rt_hw_cpu_id, @function
			
 
				 rt_hw_cpu_id:
			
 
				+#if RT_CPUS_NR > 1
			
 
				     mrs x0, tpidr_el1
			
 
				+#else
			
 
				+    mov x0, xzr
			
 
				+#endif
			
 
				     ret
			
 
				 
			
 
				 /*
			
--- a/libcpu/aarch64/common/cpu.c
+++ b/libcpu/aarch64/common/cpu.c
@@ -8,6 +8,7 @@
 
				  * 2011-09-15     Bernard      first version
			
 
				  * 2019-07-28     zdzn         add smp support
			
 
				  * 2023-02-21     GuEe-GUI     mov cpu ofw init to setup
			
 
				+ * 2024-04-29     Shell        Add generic ticket spinlock using C11 atomic
			
 
				  */
			
 
				 
			
 
				 #include <rthw.h>
			
@@ -55,65 +56,101 @@ rt_weak rt_uint64_t rt_cpu_mpidr_early[] =
 
				 };
			
 
				 #endif /* RT_USING_SMART */
			
 
				 
			
 
				-static inline void arch_spin_lock(arch_spinlock_t *lock)
			
 
				+/* in support of C11 atomic */
			
 
				+#if __STDC_VERSION__ >= 201112L
			
 
				+#include <stdatomic.h>
			
 
				+
			
 
				+union _spinlock
			
 
				+{
			
 
				+    _Atomic(rt_uint32_t) _value;
			
 
				+    struct
			
 
				+    {
			
 
				+        _Atomic(rt_uint16_t) owner;
			
 
				+        _Atomic(rt_uint16_t) next;
			
 
				+    } ticket;
			
 
				+};
			
 
				+
			
 
				+void rt_hw_spin_lock_init(rt_hw_spinlock_t *_lock)
			
 
				 {
			
 
				-    unsigned int tmp;
			
 
				-
			
 
				-    asm volatile(
			
 
				-    "   sevl\n"
			
 
				-    "1: wfe\n"
			
 
				-    "2: ldaxr   %w0, %1\n"
			
 
				-    "   cbnz    %w0, 1b\n"
			
 
				-    "   stxr    %w0, %w2, %1\n"
			
 
				-    "   cbnz    %w0, 2b\n"
			
 
				-    : "=&r" (tmp), "+Q" (lock->lock)
			
 
				-    : "r" (1)
			
 
				-    : "cc", "memory");
			
 
				+    union _spinlock *lock = (void *)_lock;
			
 
				+
			
 
				+    /**
			
 
				+     * just a dummy note that this is an atomic operation, though it alway is
			
 
				+     * even without usage of atomic API in arm64
			
 
				+     */
			
 
				+    atomic_store_explicit(&lock->_value, 0, memory_order_relaxed);
			
 
				 }
			
 
				 
			
 
				-static inline int arch_spin_trylock(arch_spinlock_t *lock)
			
 
				+rt_bool_t rt_hw_spin_trylock(rt_hw_spinlock_t *_lock)
			
 
				 {
			
 
				-    unsigned int tmp;
			
 
				-
			
 
				-    asm volatile(
			
 
				-    "  ldaxr   %w0, %1\n"
			
 
				-    "  cbnz    %w0, 1f\n"
			
 
				-    "  stxr    %w0, %w2, %1\n"
			
 
				-    "1:\n"
			
 
				-    : "=&r" (tmp), "+Q" (lock->lock)
			
 
				-    : "r" (1)
			
 
				-    : "cc", "memory");
			
 
				-
			
 
				-    return !tmp;
			
 
				+    rt_bool_t rc;
			
 
				+    rt_uint32_t readonce;
			
 
				+    union _spinlock temp;
			
 
				+    union _spinlock *lock = (void *)_lock;
			
 
				+
			
 
				+    readonce = atomic_load_explicit(&lock->_value, memory_order_acquire);
			
 
				+    temp._value = readonce;
			
 
				+
			
 
				+    if (temp.ticket.owner != temp.ticket.next)
			
 
				+    {
			
 
				+        rc = RT_FALSE;
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        temp.ticket.next += 1;
			
 
				+        rc = atomic_compare_exchange_strong_explicit(
			
 
				+            &lock->_value, &readonce, temp._value,
			
 
				+            memory_order_acquire, memory_order_relaxed);
			
 
				+    }
			
 
				+    return rc;
			
 
				 }
			
 
				 
			
 
				-static inline void arch_spin_unlock(arch_spinlock_t *lock)
			
 
				+rt_inline rt_base_t _load_acq_exclusive(_Atomic(rt_uint16_t) *halfword)
			
 
				 {
			
 
				-    asm volatile(
			
 
				-    " stlr    %w1, %0\n"
			
 
				-    : "=Q" (lock->lock) : "r" (0) : "memory");
			
 
				+    rt_uint32_t old;
			
 
				+    __asm__ volatile("ldaxrh %w0, [%1]"
			
 
				+                     : "=&r"(old)
			
 
				+                     : "r"(halfword)
			
 
				+                     :  "memory");
			
 
				+    return old;
			
 
				 }
			
 
				 
			
 
				-void rt_hw_spin_lock_init(arch_spinlock_t *lock)
			
 
				+rt_inline void _send_event_local(void)
			
 
				 {
			
 
				-    lock->lock = 0;
			
 
				+    __asm__ volatile("sevl");
			
 
				 }
			
 
				 
			
 
				-void rt_hw_spin_lock(rt_hw_spinlock_t *lock)
			
 
				+rt_inline void _wait_for_event(void)
			
 
				 {
			
 
				-    arch_spin_lock(lock);
			
 
				+    __asm__ volatile("wfe" ::: "memory");
			
 
				 }
			
 
				 
			
 
				-void rt_hw_spin_unlock(rt_hw_spinlock_t *lock)
			
 
				+void rt_hw_spin_lock(rt_hw_spinlock_t *_lock)
			
 
				 {
			
 
				-    arch_spin_unlock(lock);
			
 
				+    union _spinlock *lock = (void *)_lock;
			
 
				+    rt_uint16_t ticket =
			
 
				+        atomic_fetch_add_explicit(&lock->ticket.next, 1, memory_order_relaxed);
			
 
				+
			
 
				+    if (atomic_load_explicit(&lock->ticket.owner, memory_order_acquire) !=
			
 
				+        ticket)
			
 
				+    {
			
 
				+        _send_event_local();
			
 
				+        do
			
 
				+        {
			
 
				+            _wait_for_event();
			
 
				+        }
			
 
				+        while (_load_acq_exclusive(&lock->ticket.owner) != ticket);
			
 
				+    }
			
 
				 }
			
 
				 
			
 
				-rt_bool_t rt_hw_spin_trylock(rt_hw_spinlock_t *lock)
			
 
				+void rt_hw_spin_unlock(rt_hw_spinlock_t *_lock)
			
 
				 {
			
 
				-    return arch_spin_trylock(lock);
			
 
				+    union _spinlock *lock = (void *)_lock;
			
 
				+    atomic_fetch_add_explicit(&lock->ticket.owner, 1, memory_order_release);
			
 
				 }
			
 
				 
			
 
				+#endif
			
 
				+
			
 
				 static int _cpus_init_data_hardcoded(int num_cpus, rt_uint64_t *cpu_hw_ids, struct cpu_ops_t *cpu_ops[])
			
 
				 {
			
 
				     // load in cpu_hw_ids in cpuid_to_hwid,
			
--- a/libcpu/aarch64/common/cpu_gcc.S
+++ b/libcpu/aarch64/common/cpu_gcc.S
@@ -1,103 +1,177 @@
 
				 /*
			
 
				- * Copyright (c) 2006-2020, RT-Thread Development Team
			
 
				+ * Copyright (c) 2006-2024, RT-Thread Development Team
			
 
				  *
			
 
				  * SPDX-License-Identifier: Apache-2.0
			
 
				  *
			
 
				  * Date           Author       Notes
			
 
				  * 2018-10-06     ZhaoXiaowei    the first version
			
 
				+ * 2024-04-28     Shell        add generic spinlock implementation
			
 
				  */
			
 
				- 
			
 
				+
			
 
				 .text
			
 
				 .globl rt_hw_get_current_el
			
 
				 rt_hw_get_current_el:
			
 
				-	MRS		X0, CurrentEL
			
 
				-	CMP		X0, 0xc
			
 
				-	B.EQ	3f
			
 
				-	CMP		X0, 0x8
			
 
				-	B.EQ	2f
			
 
				-	CMP		X0, 0x4
			
 
				-	B.EQ	1f
			
 
				-	
			
 
				-	LDR		X0, =0
			
 
				-	B		0f
			
 
				+    MRS        X0, CurrentEL
			
 
				+    CMP        X0, 0xc
			
 
				+    B.EQ    3f
			
 
				+    CMP        X0, 0x8
			
 
				+    B.EQ    2f
			
 
				+    CMP        X0, 0x4
			
 
				+    B.EQ    1f
			
 
				+
			
 
				+    LDR        X0, =0
			
 
				+    B        0f
			
 
				 3:
			
 
				-	LDR		X0, =3
			
 
				-	B		0f
			
 
				+    LDR        X0, =3
			
 
				+    B        0f
			
 
				 2:
			
 
				-	LDR		X0, =2
			
 
				-	B		0f
			
 
				+    LDR        X0, =2
			
 
				+    B        0f
			
 
				 1:
			
 
				-	LDR		X0, =1
			
 
				-	B		0f
			
 
				+    LDR        X0, =1
			
 
				+    B        0f
			
 
				 0:
			
 
				-	RET
			
 
				+    RET
			
 
				 
			
 
				 
			
 
				 .globl rt_hw_set_current_vbar
			
 
				 rt_hw_set_current_vbar:
			
 
				-	MRS		X1, CurrentEL
			
 
				-	CMP		X1, 0xc
			
 
				-	B.EQ	3f
			
 
				-	CMP		X1, 0x8
			
 
				-	B.EQ	2f
			
 
				-	CMP		X1, 0x4
			
 
				-	B.EQ	1f
			
 
				-	B		0f
			
 
				+    MRS        X1, CurrentEL
			
 
				+    CMP        X1, 0xc
			
 
				+    B.EQ    3f
			
 
				+    CMP        X1, 0x8
			
 
				+    B.EQ    2f
			
 
				+    CMP        X1, 0x4
			
 
				+    B.EQ    1f
			
 
				+    B        0f
			
 
				 3:
			
 
				-	MSR		VBAR_EL3,X0
			
 
				-	B		0f
			
 
				+    MSR        VBAR_EL3,X0
			
 
				+    B        0f
			
 
				 2:
			
 
				-	MSR		VBAR_EL2,X0
			
 
				-	B		0f
			
 
				+    MSR        VBAR_EL2,X0
			
 
				+    B        0f
			
 
				 1:
			
 
				-	MSR		VBAR_EL1,X0
			
 
				-	B		0f
			
 
				+    MSR        VBAR_EL1,X0
			
 
				+    B        0f
			
 
				 0:
			
 
				-	RET
			
 
				+    RET
			
 
				 
			
 
				 .globl rt_hw_set_elx_env
			
 
				 rt_hw_set_elx_env:
			
 
				-	MRS		X1, CurrentEL
			
 
				-	CMP		X1, 0xc
			
 
				-	B.EQ	3f
			
 
				-	CMP		X1, 0x8
			
 
				-	B.EQ	2f
			
 
				-	CMP		X1, 0x4
			
 
				-	B.EQ	1f
			
 
				-	B		0f
			
 
				+    MRS        X1, CurrentEL
			
 
				+    CMP        X1, 0xc
			
 
				+    B.EQ    3f
			
 
				+    CMP        X1, 0x8
			
 
				+    B.EQ    2f
			
 
				+    CMP        X1, 0x4
			
 
				+    B.EQ    1f
			
 
				+    B        0f
			
 
				 3:
			
 
				-	MRS		X0, SCR_EL3
			
 
				-	ORR		X0, X0, #0xF			/* SCR_EL3.NS|IRQ|FIQ|EA */
			
 
				-	MSR		SCR_EL3, X0
			
 
				-	B		0f
			
 
				+    MRS        X0, SCR_EL3
			
 
				+    ORR        X0, X0, #0xF            /* SCR_EL3.NS|IRQ|FIQ|EA */
			
 
				+    MSR        SCR_EL3, X0
			
 
				+    B        0f
			
 
				 2:
			
 
				-	MRS	X0, HCR_EL2
			
 
				-	ORR	X0, X0, #0x38
			
 
				-	MSR	HCR_EL2, X0
			
 
				-	B		0f
			
 
				+    MRS    X0, HCR_EL2
			
 
				+    ORR    X0, X0, #0x38
			
 
				+    MSR    HCR_EL2, X0
			
 
				+    B        0f
			
 
				 1:
			
 
				-	B		0f
			
 
				+    B        0f
			
 
				 0:
			
 
				-	RET
			
 
				+    RET
			
 
				 
			
 
				-.global rt_cpu_vector_set_base
			
 
				+.globl rt_cpu_vector_set_base
			
 
				 rt_cpu_vector_set_base:
			
 
				-	MSR		VBAR_EL1,X0
			
 
				+    MSR        VBAR_EL1,X0
			
 
				     RET
			
 
				 
			
 
				 
			
 
				 /**
			
 
				  * unsigned long rt_hw_ffz(unsigned long x)
			
 
				  */
			
 
				-.global rt_hw_ffz
			
 
				+.globl rt_hw_ffz
			
 
				 rt_hw_ffz:
			
 
				-    mvn x1, x0
			
 
				-    clz x0, x1
			
 
				-    mov x1, #0x3f
			
 
				-    sub x0, x1, x0
			
 
				+    mvn     x1, x0
			
 
				+    clz     x0, x1
			
 
				+    mov     x1, #0x3f
			
 
				+    sub     x0, x1, x0
			
 
				     ret
			
 
				 
			
 
				-.global rt_hw_clz
			
 
				+.globl rt_hw_clz
			
 
				 rt_hw_clz:
			
 
				-    clz x0, x0
			
 
				+    clz     x0, x0
			
 
				+    ret
			
 
				+
			
 
				+/**
			
 
				+ * Spinlock (fallback implementation)
			
 
				+ */
			
 
				+
			
 
				+rt_hw_spin_lock_init:
			
 
				+    .weak   rt_hw_spin_lock_init
			
 
				+    stlr    wzr, [x0]
			
 
				+    ret
			
 
				+
			
 
				+rt_hw_spin_trylock:
			
 
				+    .weak   rt_hw_spin_trylock
			
 
				+    sub     sp, sp, #16
			
 
				+    ldar    w2, [x0]
			
 
				+    add     x1, sp, 8
			
 
				+    stlr    w2, [x1]
			
 
				+    ldarh   w1, [x1]
			
 
				+    and     w1, w1, 65535
			
 
				+    add     x3, sp, 10
			
 
				+    ldarh   w3, [x3]
			
 
				+    cmp     w1, w3, uxth
			
 
				+    beq     1f
			
 
				+    mov     w0, 0
			
 
				+    add     sp, sp, 16
			
 
				+    ret
			
 
				+1:
			
 
				+    add     x1, sp, 10
			
 
				+2:
			
 
				+    ldaxrh  w3, [x1]
			
 
				+    add     w3, w3, 1
			
 
				+    stlxrh  w4, w3, [x1]
			
 
				+    cbnz    w4, 2b
			
 
				+    add     x1, sp, 8
			
 
				+    ldar    w1, [x1]
			
 
				+3:
			
 
				+    ldaxr   w3, [x0]
			
 
				+    cmp     w3, w2
			
 
				+    bne     4f
			
 
				+    stxr    w4, w1, [x0]
			
 
				+    cbnz    w4, 3b
			
 
				+4:
			
 
				+    cset    w0, eq
			
 
				+    add     sp, sp, 16
			
 
				+    ret
			
 
				+
			
 
				+rt_hw_spin_lock:
			
 
				+    .weak   rt_hw_spin_lock
			
 
				+    add     x1, x0, 2
			
 
				+1:
			
 
				+    ldxrh   w2, [x1]
			
 
				+    add     w3, w2, 1
			
 
				+    stxrh   w4, w3, [x1]
			
 
				+    cbnz    w4, 1b
			
 
				+    and     w2, w2, 65535
			
 
				+    ldarh   w1, [x0]
			
 
				+    cmp     w2, w1, uxth
			
 
				+    beq     3f
			
 
				+    sevl
			
 
				+2:
			
 
				+    wfe
			
 
				+    ldaxrh  w1, [x0]
			
 
				+    cmp     w2, w1
			
 
				+    bne     2b
			
 
				+3:
			
 
				+    ret
			
 
				+
			
 
				+rt_hw_spin_unlock:
			
 
				+    .weak   rt_hw_spin_unlock
			
 
				+    ldxrh   w1, [x0]
			
 
				+    add     w1, w1, 1
			
 
				+    stlxrh  w2, w1, [x0]
			
 
				+    cbnz    w2, rt_hw_spin_unlock
			
 
				     ret
			
--- a/libcpu/aarch64/common/cpuport.h
+++ b/libcpu/aarch64/common/cpuport.h
@@ -17,10 +17,42 @@
 
				 #include <rtdef.h>
			
 
				 
			
 
				 #ifdef RT_USING_SMP
			
 
				-typedef struct {
			
 
				-    volatile unsigned int lock;
			
 
				+
			
 
				+/**
			
 
				+ * Spinlock
			
 
				+ */
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+    rt_uint32_t value;
			
 
				 } rt_hw_spinlock_t;
			
 
				-#endif
			
 
				+
			
 
				+/**
			
 
				+ * Generic hw-cpu-id
			
 
				+ */
			
 
				+#ifdef ARCH_USING_GENERIC_CPUID
			
 
				+
			
 
				+#if RT_CPUS_NR > 1
			
 
				+
			
 
				+rt_inline int rt_hw_cpu_id(void)
			
 
				+{
			
 
				+    long cpuid;
			
 
				+    __asm__ volatile("mrs %0, tpidr_el1":"=r"(cpuid));
			
 
				+    return cpuid;
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+rt_inline int rt_hw_cpu_id(void)
			
 
				+{
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+#endif /* RT_CPUS_NR > 1 */
			
 
				+
			
 
				+#endif /* ARCH_USING_GENERIC_CPUID */
			
 
				+
			
 
				+#endif /* RT_USING_SMP */
			
 
				 
			
 
				 #define rt_hw_barrier(cmd, ...) \
			
 
				     __asm__ volatile (RT_STRINGIFY(cmd) " "RT_STRINGIFY(__VA_ARGS__):::"memory")
			
--- a/src/cpu_mp.c
+++ b/src/cpu_mp.c
@@ -146,7 +146,7 @@ rt_base_t rt_cpus_lock(void)
 
				     pcpu = rt_cpu_self();
			
 
				     if (pcpu->current_thread != RT_NULL)
			
 
				     {
			
 
				-        register rt_ubase_t lock_nest = rt_atomic_load(&(pcpu->current_thread->cpus_lock_nest));
			
 
				+        rt_ubase_t lock_nest = rt_atomic_load(&(pcpu->current_thread->cpus_lock_nest));
			
 
				 
			
 
				         rt_atomic_add(&(pcpu->current_thread->cpus_lock_nest), 1);
			
 
				         if (lock_nest == 0)
			
--- a/src/scheduler_mp.c
+++ b/src/scheduler_mp.c
@@ -1089,6 +1089,7 @@ void rt_exit_critical_safe(rt_base_t critical_level)
 
				 
			
 
				 void rt_exit_critical_safe(rt_base_t critical_level)
			
 
				 {
			
 
				+    RT_UNUSED(critical_level);
			
 
				     return rt_exit_critical();
			
 
				 }
			
 
				 
			
--- a/src/thread.c
+++ b/src/thread.c
@@ -363,7 +363,8 @@ rt_thread_t rt_thread_self(void)
 
				     self = rt_cpu_self()->current_thread;
			
 
				     rt_hw_local_irq_enable(lock);
			
 
				     return self;
			
 
				-#else
			
 
				+
			
 
				+#else /* !RT_USING_SMP */
			
 
				     extern rt_thread_t rt_current_thread;
			
 
				 
			
 
				     return rt_current_thread;