Browse Source

[libcpu/arm64] feat: Trimming General Context

This patch focuses on the ARM64 general context handling code.
The modifications are aimed at enhancing performance by simplifying
context save/restore operations.

Changes include:
- Adjusted stack alignment in `arch_set_thread_context` function.
- Updated `lwp_gcc.S` to reset frame pointer and link register.
- Refined `rt_hw_backtrace_frame_unwind` to handle user space address checks.
- Added `GET_THREAD_SELF` macro in `asm-generic.h`.
- Simplified context saving/restoring in `context_gcc.h` and related files.
- Optimized `rt_hw_context_switch_interrupt` and related assembly routines.

Signed-off-by: Shell <smokewood@qq.com>
Shell 1 year ago
parent
commit
7138f340b2

+ 4 - 6
components/lwp/arch/aarch64/cortex-a/lwp_arch.c

@@ -106,18 +106,16 @@ int arch_set_thread_context(void (*exit)(void), void *new_thread_stack,
     struct rt_hw_exp_stack *ori_syscall = rt_thread_self()->user_ctx.ctx;
     RT_ASSERT(ori_syscall != RT_NULL);
 
-    thread_frame = (void *)((long)new_thread_stack - sizeof(struct rt_hw_exp_stack));
-    syscall_frame = (void *)((long)new_thread_stack - 2 * sizeof(struct rt_hw_exp_stack));
+    new_thread_stack = (rt_ubase_t*)RT_ALIGN_DOWN((rt_ubase_t)new_thread_stack, 16);
 
+    syscall_frame = (void *)((long)new_thread_stack - sizeof(struct rt_hw_exp_stack));
     memcpy(syscall_frame, ori_syscall, sizeof(*syscall_frame));
     syscall_frame->sp_el0 = (long)user_stack;
     syscall_frame->x0 = 0;
 
-    thread_frame->cpsr = ((3 << 6) | 0x4 | 0x1);
-    thread_frame->pc = (long)exit;
-    thread_frame->x0 = 0;
+    thread_frame = (void *)rt_hw_stack_init(exit, RT_NULL, (void *)syscall_frame, RT_NULL);
 
-    *thread_sp = syscall_frame;
+    *thread_sp = thread_frame;
 
     return 0;
 }

+ 4 - 1
components/lwp/arch/aarch64/cortex-a/lwp_gcc.S

@@ -125,12 +125,15 @@ lwp_exec_user:
  * since this routine reset the SP, we take it as a start point
  */
 START_POINT(SVC_Handler)
+    mov fp, xzr
+    mov lr, xzr
+
     /* x0 is initial sp */
     mov sp, x0
 
     msr daifclr, #3  /* enable interrupt */
 
-    bl  rt_thread_self
+    GET_THREAD_SELF x0
     bl  lwp_user_setting_save
 
     ldp x8, x9, [sp, #(CONTEXT_OFFSET_X8)]

+ 4 - 3
libcpu/aarch64/common/backtrace.c

@@ -85,11 +85,12 @@ rt_err_t rt_hw_backtrace_frame_unwind(rt_thread_t thread, struct rt_hw_backtrace
     if (fp && !((long)fp & 0x7))
     {
 #ifdef RT_USING_SMART
+#define IN_USER_SPACE(addr) ((rt_ubase_t)(addr) >= USER_VADDR_START && (rt_ubase_t)(addr) < USER_VADDR_TOP)
         if (thread && thread->lwp && rt_scheduler_is_available())
         {
             rt_lwp_t lwp = thread->lwp;
             void *this_lwp = lwp_self();
-            if (this_lwp == lwp && rt_kmem_v2p(fp) != ARCH_MAP_FAILED)
+            if ((!IN_USER_SPACE(fp) || this_lwp == lwp) && rt_kmem_v2p(fp) != ARCH_MAP_FAILED)
             {
                 rc = _bt_kaddr(fp, frame);
             }
@@ -129,8 +130,8 @@ rt_err_t rt_hw_backtrace_frame_get(rt_thread_t thread, struct rt_hw_backtrace_fr
     }
     else
     {
-        frame->pc = ARCH_CONTEXT_FETCH(thread->sp, 3);
-        frame->fp = ARCH_CONTEXT_FETCH(thread->sp, 7);
+        frame->pc = ARCH_CONTEXT_FETCH(thread->sp, 0);
+        frame->fp = ARCH_CONTEXT_FETCH(thread->sp, 4);
         rc = RT_EOK;
     }
     return rc;

+ 11 - 0
libcpu/aarch64/common/include/asm-generic.h

@@ -23,4 +23,15 @@
     .cfi_endproc;               \
     .size name, .-name;
 
+.macro GET_THREAD_SELF, dst:req
+#ifdef ARCH_USING_HW_THREAD_SELF
+    mrs     x0, tpidr_el1
+#else  /* !ARCH_USING_HW_THREAD_SELF */
+    bl      rt_thread_self
+#endif /* ARCH_USING_HW_THREAD_SELF */
+    .if \dst != x0
+    mov     dst, x0
+    .endif
+.endm
+
 #endif /* __ASM_GENERIC_H__ */

+ 46 - 52
libcpu/aarch64/common/include/context_gcc.h

@@ -10,67 +10,61 @@
 #ifndef __ARM64_INC_CONTEXT_H__
 #define __ARM64_INC_CONTEXT_H__
 
-.macro SAVE_CONTEXT_SWITCH
+#include "armv8.h"
+
+.macro SAVE_CONTEXT_SWITCH, tmpx, tmp2x
     /* Save the entire context. */
     SAVE_FPU sp
-    stp     x0, x1, [sp, #-0x10]!
-    stp     x2, x3, [sp, #-0x10]!
-    stp     x4, x5, [sp, #-0x10]!
-    stp     x6, x7, [sp, #-0x10]!
-    stp     x8, x9, [sp, #-0x10]!
-    stp     x10, x11, [sp, #-0x10]!
-    stp     x12, x13, [sp, #-0x10]!
-    stp     x14, x15, [sp, #-0x10]!
-    stp     x16, x17, [sp, #-0x10]!
-    stp     x18, x19, [sp, #-0x10]!
-    stp     x20, x21, [sp, #-0x10]!
-    stp     x22, x23, [sp, #-0x10]!
-    stp     x24, x25, [sp, #-0x10]!
-    stp     x26, x27, [sp, #-0x10]!
-    stp     x28, x29, [sp, #-0x10]!
-    mrs     x28, fpcr
-    mrs     x29, fpsr
-    stp     x28, x29, [sp, #-0x10]!
-    mrs     x29, sp_el0
-    stp     x29, x30, [sp, #-0x10]!
+
+    stp     x19, x20, [sp, #-0x10]!
+    stp     x21, x22, [sp, #-0x10]!
+    stp     x23, x24, [sp, #-0x10]!
+    stp     x25, x26, [sp, #-0x10]!
+    stp     x27, x28, [sp, #-0x10]!
+
+    mrs     \tmpx, sp_el0
+    stp     x29, \tmpx, [sp, #-0x10]!
+
+    mrs     \tmpx, fpcr
+    mrs     \tmp2x, fpsr
+    stp     \tmpx, \tmp2x, [sp, #-0x10]!
+
+    mov     \tmpx, #((3 << 6) | 0x5)    /* el1h, disable interrupt */
+    stp     x30, \tmpx, [sp, #-0x10]!
+
+.endm
+
+.macro SAVE_CONTEXT_SWITCH_FAST
+    /* Save the entire context. */
+    add     sp, sp, #-1 * CONTEXT_FPU_SIZE
+
+    add     sp, sp, #-7 * 16
 
     mov     x19, #((3 << 6) | 0x4 | 0x1)  /* el1h, disable interrupt */
-    mov     x18, x30
+    stp     lr, x19, [sp, #-0x10]!
 
-    stp     x18, x19, [sp, #-0x10]!
 .endm
 
 .macro _RESTORE_CONTEXT_SWITCH
-    ldp     x2, x3, [sp], #0x10  /* SPSR and ELR. */
-
-    tst     x3, #0x1f
-    msr     spsr_el1, x3
-    msr     elr_el1, x2
-
-    ldp     x29, x30, [sp], #0x10
-    msr     sp_el0, x29
-    ldp     x28, x29, [sp], #0x10
-    msr     fpcr, x28
-    msr     fpsr, x29
-    ldp     x28, x29, [sp], #0x10
-    ldp     x26, x27, [sp], #0x10
-    ldp     x24, x25, [sp], #0x10
-    ldp     x22, x23, [sp], #0x10
-    ldp     x20, x21, [sp], #0x10
-    ldp     x18, x19, [sp], #0x10
-    ldp     x16, x17, [sp], #0x10
-    ldp     x14, x15, [sp], #0x10
-    ldp     x12, x13, [sp], #0x10
-    ldp     x10, x11, [sp], #0x10
-    ldp     x8, x9, [sp], #0x10
-    ldp     x6, x7, [sp], #0x10
-    ldp     x4, x5, [sp], #0x10
-    ldp     x2, x3, [sp], #0x10
-    ldp     x0, x1, [sp], #0x10
+    ldp     x30, x19, [sp], #0x10  /* SPSR and ELR. */
+    msr     elr_el1, x30
+    msr     spsr_el1, x19
+
+
+    /* restore NEON */
+    ldp     x19, x20, [sp], #0x10
+    msr     fpcr, x19
+    msr     fpsr, x20
+
+    ldp     x29, x19, [sp], #0x10
+    msr     sp_el0, x19
+    ldp     x27, x28, [sp], #0x10
+    ldp     x25, x26, [sp], #0x10
+    ldp     x23, x24, [sp], #0x10
+    ldp     x21, x22, [sp], #0x10
+    ldp     x19, x20, [sp], #0x10
+
     RESTORE_FPU sp
-#ifdef RT_USING_SMART
-    beq     arch_ret_to_user
-#endif
     eret
 .endm
 

+ 46 - 22
libcpu/aarch64/common/mp/context_gcc.S

@@ -35,13 +35,17 @@
 rt_hw_context_switch_to:
     ldr     x0, [x0]
     mov     sp, x0
-    mov     x0, x1
+
+    /* reserved to_thread */
+    mov     x19, x1
+
+    mov     x0, x19
     bl      rt_cpus_lock_status_restore
 #ifdef RT_USING_SMART
-    bl      rt_thread_self
+    mov     x0, x19
     bl      lwp_user_setting_restore
 #endif
-    b       rt_hw_context_switch_exit
+    b       _context_switch_exit
 
 .globl rt_hw_context_switch
 
@@ -53,7 +57,7 @@ to, struct rt_thread *to_thread);
  * X2 --> to_thread
  */
 rt_hw_context_switch:
-    SAVE_CONTEXT_SWITCH
+    SAVE_CONTEXT_SWITCH x19, x20
     mov     x3, sp
     str     x3, [x0]            // store sp in preempted tasks TCB
     ldr     x0, [x1]            // get new task stack pointer
@@ -68,10 +72,15 @@ rt_hw_context_switch:
     mov     x0, x19
     bl      lwp_user_setting_restore
 #endif
-    b       rt_hw_context_switch_exit
+    b       _context_switch_exit
 
+.globl rt_hw_irq_exit
 .globl rt_hw_context_switch_interrupt
 
+#define EXP_FRAME   x19
+#define FROM_SPP    x20
+#define TO_SPP      x21
+#define TO_TCB      x22
 /*
  * void rt_hw_context_switch_interrupt(context, from sp, to sp, tp tcb)
  * X0 :interrupt context
@@ -80,30 +89,45 @@ rt_hw_context_switch:
  * X3 :to_thread's tcb
  */
 rt_hw_context_switch_interrupt:
-    stp     x0, x1, [sp, #-0x10]!
-    stp     x2, x3, [sp, #-0x10]!
+#ifdef RT_USING_DEBUG
+    /* debug frame for backtrace */
     stp     x29, x30, [sp, #-0x10]!
+#endif /* RT_USING_DEBUG */
+
+    /* we can discard all the previous ABI here */
+    mov     EXP_FRAME, x0
+    mov     FROM_SPP, x1
+    mov     TO_SPP, x2
+    mov     TO_TCB, x3
+
 #ifdef RT_USING_SMART
-    bl      rt_thread_self
+    GET_THREAD_SELF x0
     bl      lwp_user_setting_save
-#endif
-    ldp     x29, x30, [sp], #0x10
-    ldp     x2, x3, [sp], #0x10
-    ldp     x0, x1, [sp], #0x10
-    str     x0, [x1]
-    ldr     x0, [x2]
+#endif /* RT_USING_SMART */
+
+    /* reset SP of from-thread */
+    mov     sp, EXP_FRAME
+
+    /* push context for swtich */
+    adr     lr, rt_hw_irq_exit
+    SAVE_CONTEXT_SWITCH_FAST
+
+    /* save SP of from-thread */
+    mov     x0, sp
+    str     x0, [FROM_SPP]
+
+    /* setup SP to to-thread's */
+    ldr     x0, [TO_SPP]
     mov     sp, x0
-    mov     x0, x3
-    mov     x19, x0
+
+    mov     x0, TO_TCB
     bl      rt_cpus_lock_status_restore
-    mov     x0, x19
 #ifdef RT_USING_SMART
+    mov     x0, TO_TCB
     bl      lwp_user_setting_restore
-#endif
-    b       rt_hw_context_switch_exit
+#endif /* RT_USING_SMART */
+    b       _context_switch_exit
 
-.global rt_hw_context_switch_exit
-rt_hw_context_switch_exit:
+_context_switch_exit:
     clrex
-    mov     x0, sp
     RESTORE_CONTEXT_SWITCH

+ 0 - 6
libcpu/aarch64/common/mp/context_gcc.h

@@ -20,16 +20,10 @@
 #include <armv8.h>
 
 .macro RESTORE_CONTEXT_SWITCH
-    /* Set the SP to point to the stack of the task being restored. */
-    mov     sp, x0
-
     _RESTORE_CONTEXT_SWITCH
 .endm
 
 .macro RESTORE_IRQ_CONTEXT
-    /* Set the SP to point to the stack of the task being restored. */
-    mov     sp, x0
-
     ldp     x2, x3, [sp], #0x10  /* SPSR and ELR. */
 
     tst     x3, #0x1f

+ 5 - 1
libcpu/aarch64/common/mp/vector_gcc.S

@@ -15,10 +15,14 @@
 #include "../include/vector_gcc.h"
 #include "context_gcc.h"
 
+.section .text
+
 .globl vector_fiq
 vector_fiq:
     b       .
 
+.globl rt_hw_irq_exit
+
 START_POINT(vector_irq)
     SAVE_IRQ_CONTEXT
     stp     x0, x1, [sp, #-0x10]!   /* X0 is thread sp */
@@ -42,7 +46,7 @@ START_POINT(vector_irq)
     ldp     x0, x1, [sp], #0x10
     bl      rt_scheduler_do_irq_switch
 
-    mov     x0, sp
+rt_hw_irq_exit:
     RESTORE_IRQ_CONTEXT
 
 START_POINT_END(vector_irq)

+ 16 - 38
libcpu/aarch64/common/stack.c

@@ -41,44 +41,22 @@ rt_uint8_t *rt_hw_stack_init(void *tentry, void *parameter,
         *(rt_uint128_t *)stk = (rt_uint128_t) { 0 };
     }
 
-    *(--stk) = (rt_ubase_t)0;           /* X1 */
-    *(--stk) = (rt_ubase_t)parameter;   /* X0 */
-    *(--stk) = (rt_ubase_t)3;           /* X3 */
-    *(--stk) = (rt_ubase_t)2;           /* X2 */
-    *(--stk) = (rt_ubase_t)5;           /* X5 */
-    *(--stk) = (rt_ubase_t)4;           /* X4 */
-    *(--stk) = (rt_ubase_t)7;           /* X7 */
-    *(--stk) = (rt_ubase_t)6;           /* X6 */
-    *(--stk) = (rt_ubase_t)9;           /* X9 */
-    *(--stk) = (rt_ubase_t)8;           /* X8 */
-    *(--stk) = (rt_ubase_t)11;          /* X11 */
-    *(--stk) = (rt_ubase_t)10;          /* X10 */
-    *(--stk) = (rt_ubase_t)13;          /* X13 */
-    *(--stk) = (rt_ubase_t)12;          /* X12 */
-    *(--stk) = (rt_ubase_t)15;          /* X15 */
-    *(--stk) = (rt_ubase_t)14;          /* X14 */
-    *(--stk) = (rt_ubase_t)17;          /* X17 */
-    *(--stk) = (rt_ubase_t)16;          /* X16 */
-    *(--stk) = (rt_ubase_t)tentry;      /* X19, 1st param */
-    *(--stk) = (rt_ubase_t)18;          /* X18 */
-    *(--stk) = (rt_ubase_t)21;          /* X21 */
-    *(--stk) = (rt_ubase_t)texit;       /* X20, 2nd param */
-    *(--stk) = (rt_ubase_t)23;          /* X23 */
-    *(--stk) = (rt_ubase_t)22;          /* X22 */
-    *(--stk) = (rt_ubase_t)25;          /* X25 */
-    *(--stk) = (rt_ubase_t)24;          /* X24 */
-    *(--stk) = (rt_ubase_t)27;          /* X27 */
-    *(--stk) = (rt_ubase_t)26;          /* X26 */
-    *(--stk) = (rt_ubase_t)0;           /* X29 - addr 0 as AAPCS64 specified */
-    *(--stk) = (rt_ubase_t)28;          /* X28 */
-    *(--stk) = (rt_ubase_t)0;           /* FPSR */
-    *(--stk) = (rt_ubase_t)0;           /* FPCR */
-    *(--stk) = (rt_ubase_t)0;           /* X30 - procedure call link register. */
-    *(--stk) = (rt_ubase_t)0;           /* sp_el0 */
-
-    *(--stk) = INITIAL_SPSR_EL1;
-
-    *(--stk) = (rt_ubase_t)_thread_start; /* Exception return address. */
+    *(--stk) = (rt_ubase_t)texit;           /* X20, 2nd param */
+    *(--stk) = (rt_ubase_t)tentry;          /* X19, 1st param */
+    *(--stk) = (rt_ubase_t)22;              /* X22 */
+    *(--stk) = (rt_ubase_t)parameter;       /* X21, 3rd param */
+    *(--stk) = (rt_ubase_t)24;              /* X24 */
+    *(--stk) = (rt_ubase_t)23;              /* X23 */
+    *(--stk) = (rt_ubase_t)26;              /* X26 */
+    *(--stk) = (rt_ubase_t)25;              /* X25 */
+    *(--stk) = (rt_ubase_t)28;              /* X28 */
+    *(--stk) = (rt_ubase_t)27;              /* X27 */
+    *(--stk) = (rt_ubase_t)0;               /* sp_el0 */
+    *(--stk) = (rt_ubase_t)0;               /* X29 - addr 0 as AAPCS64 specified */
+    *(--stk) = (rt_ubase_t)0;               /* FPSR */
+    *(--stk) = (rt_ubase_t)0;               /* FPCR */
+    *(--stk) = INITIAL_SPSR_EL1;            /* Save Processor States */
+    *(--stk) = (rt_ubase_t)_thread_start;   /* Exception return address. */
 
     /* return task's current stack address */
     return (rt_uint8_t *)stk;

+ 1 - 0
libcpu/aarch64/common/stack_gcc.S

@@ -21,6 +21,7 @@
 .section .text
 
 START_POINT(_thread_start)
+    mov     x0, x21
     blr     x19
     mov     x29, #0
     blr     x20

+ 27 - 7
libcpu/aarch64/common/up/context_gcc.S

@@ -44,7 +44,7 @@ rt_thread_switch_interrupt_flag:
 rt_hw_context_switch_to:
     clrex
     ldr     x0, [x0]
-    RESTORE_CONTEXT_SWITCH
+    RESTORE_CONTEXT_SWITCH x0
 
 /*
  * void rt_hw_context_switch(rt_ubase_t from, rt_ubase_t to);
@@ -55,23 +55,23 @@ rt_hw_context_switch_to:
 .globl rt_hw_context_switch
 rt_hw_context_switch:
     clrex
-    SAVE_CONTEXT_SWITCH
+    SAVE_CONTEXT_SWITCH x19, x20
 
     mov    x2, sp
     str    x2, [x0]            // store sp in preempted tasks TCB
     ldr    x0, [x1]            // get new task stack pointer
 
-    RESTORE_CONTEXT_SWITCH
+    RESTORE_CONTEXT_SWITCH x0
 
-/*
- * void rt_hw_context_switch_interrupt(rt_ubase_t from, rt_ubase_t to, rt_thread_t from_thread, rt_thread_t to_thread);
- */
 .globl rt_thread_switch_interrupt_flag
 .globl rt_interrupt_from_thread
 .globl rt_interrupt_to_thread
 .globl rt_hw_context_switch_interrupt
+
+/*
+ * void rt_hw_context_switch_interrupt(rt_ubase_t from, rt_ubase_t to, rt_thread_t from_thread, rt_thread_t to_thread);
+ */
 rt_hw_context_switch_interrupt:
-    clrex
     ldr     x6, =rt_thread_switch_interrupt_flag
     ldr     x7, [x6]
     cmp     x7, #1
@@ -95,3 +95,23 @@ _reswitch:
     ldr     x6, =rt_interrupt_to_thread     // set rt_interrupt_to_thread
     str     x1, [x6]
     ret
+
+.globl rt_hw_context_switch_interrupt_do
+
+/**
+ * rt_hw_context_switch_interrupt_do(void)
+ */
+rt_hw_context_switch_interrupt_do:
+    clrex
+    SAVE_CONTEXT_SWITCH_FAST
+
+    ldr     x3,  =rt_interrupt_from_thread
+    ldr     x4,  [x3]
+    mov     x0,  sp
+    str     x0,  [x4]       // store sp in preempted tasks's tcb
+
+    ldr     x3,  =rt_interrupt_to_thread
+    ldr     x4,  [x3]
+    ldr     x0,  [x4]       // get new task's stack pointer
+
+    RESTORE_CONTEXT_SWITCH x0

+ 2 - 4
libcpu/aarch64/common/up/context_gcc.h

@@ -19,9 +19,9 @@
 #include <asm-fpu.h>
 #include <armv8.h>
 
-.macro RESTORE_CONTEXT_SWITCH
+.macro RESTORE_CONTEXT_SWITCH using_sp
     /* Set the SP to point to the stack of the task being restored. */
-    mov     sp, x0
+    mov     sp, \using_sp
 
 #ifdef RT_USING_SMART
     bl      rt_thread_self
@@ -34,8 +34,6 @@
 .endm
 
 .macro RESTORE_IRQ_CONTEXT
-    /* Set the SP to point to the stack of the task being restored. */
-    MOV     SP, X0
 #ifdef RT_USING_SMART
     BL      rt_thread_self
     MOV     X19, X0

+ 2 - 14
libcpu/aarch64/common/up/vector_gcc.S

@@ -26,9 +26,7 @@
     .globl vector_fiq
 vector_fiq:
     SAVE_IRQ_CONTEXT
-    stp     x0, x1, [sp, #-0x10]!
     bl      rt_hw_trap_fiq
-    ldp     x0, x1, [sp], #0x10
     RESTORE_IRQ_CONTEXT
 
 .globl      rt_interrupt_enter
@@ -36,19 +34,17 @@ vector_fiq:
 .globl      rt_thread_switch_interrupt_flag
 .globl      rt_interrupt_from_thread
 .globl      rt_interrupt_to_thread
+.globl      rt_hw_context_switch_interrupt_do
 
     .align  8
     .globl vector_irq
 vector_irq:
     SAVE_IRQ_CONTEXT
-    stp     x0, x1, [sp, #-0x10]!   /* X0 is thread sp */
 
     bl      rt_interrupt_enter
     bl      rt_hw_trap_irq
     bl      rt_interrupt_leave
 
-    ldp     x0, x1, [sp], #0x10
-
     /**
      * if rt_thread_switch_interrupt_flag set, jump to
      * rt_hw_context_switch_interrupt_do and don't return
@@ -61,15 +57,7 @@ vector_irq:
     mov     x2,  #0         // clear flag
     str     x2,  [x1]
 
-    ldr     x3,  =rt_interrupt_from_thread
-    ldr     x4,  [x3]
-    str     x0,  [x4]       // store sp in preempted tasks's tcb
-
-    ldr     x3,  =rt_interrupt_to_thread
-    ldr     x4,  [x3]
-    ldr     x0,  [x4]       // get new task's stack pointer
-
-    RESTORE_IRQ_CONTEXT
+    bl      rt_hw_context_switch_interrupt_do
 
 vector_irq_exit:
     RESTORE_IRQ_CONTEXT_WITHOUT_MMU_SWITCH