Browse Source

AArch64: fixup fpu storage's size in stack and append Q16 ~ Q31 (#7815)

Signed-off-by: GuEe-GUI <GuEe-GUI@github.com>
GUI 1 year ago
parent
commit
b424169e17

+ 5 - 105
components/lwp/arch/aarch64/cortex-a/lwp_gcc.S

@@ -8,9 +8,14 @@
  * 2021-05-18     Jesven       first version
  */
 
+#ifndef __ASSEMBLY__
+#define __ASSEMBLY__
+#endif
+
 #include "rtconfig.h"
 #include "asm-generic.h"
 #include "asm-fpu.h"
+#include "armv8.h"
 
 /*********************
  *      SPSR BIT     *
@@ -32,67 +37,6 @@
 #define  SPSR_Z(v)             ((v) << 30)
 #define  SPSR_N(v)             ((v) << 31)
 
-/*********************
- *   CONTEXT_OFFSET  *
- *********************/
-
-#define CONTEXT_OFFSET_ELR_EL1    0x0
-#define CONTEXT_OFFSET_SPSR_EL1   0x8
-#define CONTEXT_OFFSET_SP_EL0     0x10
-#define CONTEXT_OFFSET_X30        0x18
-#define CONTEXT_OFFSET_FPCR       0x20
-#define CONTEXT_OFFSET_FPSR       0x28
-#define CONTEXT_OFFSET_X28        0x30
-#define CONTEXT_OFFSET_X29        0x38
-#define CONTEXT_OFFSET_X26        0x40
-#define CONTEXT_OFFSET_X27        0x48
-#define CONTEXT_OFFSET_X24        0x50
-#define CONTEXT_OFFSET_X25        0x58
-#define CONTEXT_OFFSET_X22        0x60
-#define CONTEXT_OFFSET_X23        0x68
-#define CONTEXT_OFFSET_X20        0x70
-#define CONTEXT_OFFSET_X21        0x78
-#define CONTEXT_OFFSET_X18        0x80
-#define CONTEXT_OFFSET_X19        0x88
-#define CONTEXT_OFFSET_X16        0x90
-#define CONTEXT_OFFSET_X17        0x98
-#define CONTEXT_OFFSET_X14        0xa0
-#define CONTEXT_OFFSET_X15        0xa8
-#define CONTEXT_OFFSET_X12        0xb0
-#define CONTEXT_OFFSET_X13        0xb8
-#define CONTEXT_OFFSET_X10        0xc0
-#define CONTEXT_OFFSET_X11        0xc8
-#define CONTEXT_OFFSET_X8         0xd0
-#define CONTEXT_OFFSET_X9         0xd8
-#define CONTEXT_OFFSET_X6         0xe0
-#define CONTEXT_OFFSET_X7         0xe8
-#define CONTEXT_OFFSET_X4         0xf0
-#define CONTEXT_OFFSET_X5         0xf8
-#define CONTEXT_OFFSET_X2         0x100
-#define CONTEXT_OFFSET_X3         0x108
-#define CONTEXT_OFFSET_X0         0x110
-#define CONTEXT_OFFSET_X1         0x118
-
-#define CONTEXT_OFFSET_Q15        0x120
-#define CONTEXT_OFFSET_Q14        0x130
-#define CONTEXT_OFFSET_Q13        0x140
-#define CONTEXT_OFFSET_Q12        0x150
-#define CONTEXT_OFFSET_Q11        0x160
-#define CONTEXT_OFFSET_Q10        0x170
-#define CONTEXT_OFFSET_Q9         0x180
-#define CONTEXT_OFFSET_Q8         0x190
-#define CONTEXT_OFFSET_Q7         0x1a0
-#define CONTEXT_OFFSET_Q6         0x1b0
-#define CONTEXT_OFFSET_Q5         0x1c0
-#define CONTEXT_OFFSET_Q4         0x1d0
-#define CONTEXT_OFFSET_Q3         0x1e0
-#define CONTEXT_OFFSET_Q2         0x1f0
-#define CONTEXT_OFFSET_Q1         0x200
-#define CONTEXT_OFFSET_Q0         0x210
-
-#define CONTEXT_FPU_SIZE          0x100
-#define CONTEXT_SIZE              0x220
-
 /**************************************************/
 
 .text
@@ -370,50 +314,6 @@ arch_ret_to_user:
 1:
     eret
 
-/*
-struct rt_hw_exp_stack
-{
-    unsigned long pc;     0
-    unsigned long cpsr;
-    unsigned long sp_el0; 0x10
-    unsigned long x30;
-    unsigned long fpcr;   0x20
-    unsigned long fpsr;
-    unsigned long x28;    0x30
-    unsigned long x29;
-    unsigned long x26;    0x40
-    unsigned long x27;
-    unsigned long x24;    0x50
-    unsigned long x25;
-    unsigned long x22;    0x60
-    unsigned long x23;
-    unsigned long x20;    0x70
-    unsigned long x21;
-    unsigned long x18;    0x80
-    unsigned long x19;
-    unsigned long x16;    0x90
-    unsigned long x17;
-    unsigned long x14;    0xa0
-    unsigned long x15;
-    unsigned long x12;    0xb0
-    unsigned long x13;
-    unsigned long x10;    0xc0
-    unsigned long x11;
-    unsigned long x8;     0xd0
-    unsigned long x9;
-    unsigned long x6;     0xe0
-    unsigned long x7;
-    unsigned long x4;     0xf0
-    unsigned long x5;
-    unsigned long x2;     0x100
-    unsigned long x3;
-    unsigned long x0;     0x110
-    unsigned long x1;
-
-    unsigned long long fpu[16]; 0x120
-                                0x220 = 0x120 + 0x10 * 0x10
-};
-*/
 .global lwp_check_debug
 lwp_check_debug:
     ldr x0, =rt_dbg_ops

+ 124 - 37
libcpu/aarch64/common/armv8.h

@@ -11,47 +11,132 @@
 #ifndef __ARMV8_H__
 #define __ARMV8_H__
 
+#ifdef __ASSEMBLY__
+
+/*********************
+ *   CONTEXT_OFFSET  *
+ *********************/
+
+#define CONTEXT_OFFSET_ELR_EL1    0x0
+#define CONTEXT_OFFSET_SPSR_EL1   0x8
+#define CONTEXT_OFFSET_SP_EL0     0x10
+#define CONTEXT_OFFSET_X30        0x18
+#define CONTEXT_OFFSET_FPCR       0x20
+#define CONTEXT_OFFSET_FPSR       0x28
+#define CONTEXT_OFFSET_X28        0x30
+#define CONTEXT_OFFSET_X29        0x38
+#define CONTEXT_OFFSET_X26        0x40
+#define CONTEXT_OFFSET_X27        0x48
+#define CONTEXT_OFFSET_X24        0x50
+#define CONTEXT_OFFSET_X25        0x58
+#define CONTEXT_OFFSET_X22        0x60
+#define CONTEXT_OFFSET_X23        0x68
+#define CONTEXT_OFFSET_X20        0x70
+#define CONTEXT_OFFSET_X21        0x78
+#define CONTEXT_OFFSET_X18        0x80
+#define CONTEXT_OFFSET_X19        0x88
+#define CONTEXT_OFFSET_X16        0x90
+#define CONTEXT_OFFSET_X17        0x98
+#define CONTEXT_OFFSET_X14        0xa0
+#define CONTEXT_OFFSET_X15        0xa8
+#define CONTEXT_OFFSET_X12        0xb0
+#define CONTEXT_OFFSET_X13        0xb8
+#define CONTEXT_OFFSET_X10        0xc0
+#define CONTEXT_OFFSET_X11        0xc8
+#define CONTEXT_OFFSET_X8         0xd0
+#define CONTEXT_OFFSET_X9         0xd8
+#define CONTEXT_OFFSET_X6         0xe0
+#define CONTEXT_OFFSET_X7         0xe8
+#define CONTEXT_OFFSET_X4         0xf0
+#define CONTEXT_OFFSET_X5         0xf8
+#define CONTEXT_OFFSET_X2         0x100
+#define CONTEXT_OFFSET_X3         0x108
+#define CONTEXT_OFFSET_X0         0x110
+#define CONTEXT_OFFSET_X1         0x118
+
+#define CONTEXT_OFFSET_Q31        0x120
+#define CONTEXT_OFFSET_Q30        0x130
+#define CONTEXT_OFFSET_Q29        0x140
+#define CONTEXT_OFFSET_Q28        0x150
+#define CONTEXT_OFFSET_Q27        0x160
+#define CONTEXT_OFFSET_Q26        0x170
+#define CONTEXT_OFFSET_Q25        0x180
+#define CONTEXT_OFFSET_Q24        0x190
+#define CONTEXT_OFFSET_Q23        0x1a0
+#define CONTEXT_OFFSET_Q22        0x1b0
+#define CONTEXT_OFFSET_Q21        0x1c0
+#define CONTEXT_OFFSET_Q20        0x1d0
+#define CONTEXT_OFFSET_Q19        0x1e0
+#define CONTEXT_OFFSET_Q18        0x1f0
+#define CONTEXT_OFFSET_Q17        0x200
+#define CONTEXT_OFFSET_Q16        0x210
+#define CONTEXT_OFFSET_Q15        0x220
+#define CONTEXT_OFFSET_Q14        0x230
+#define CONTEXT_OFFSET_Q13        0x240
+#define CONTEXT_OFFSET_Q12        0x250
+#define CONTEXT_OFFSET_Q11        0x260
+#define CONTEXT_OFFSET_Q10        0x270
+#define CONTEXT_OFFSET_Q9         0x280
+#define CONTEXT_OFFSET_Q8         0x290
+#define CONTEXT_OFFSET_Q7         0x2a0
+#define CONTEXT_OFFSET_Q6         0x2b0
+#define CONTEXT_OFFSET_Q5         0x2c0
+#define CONTEXT_OFFSET_Q4         0x2d0
+#define CONTEXT_OFFSET_Q3         0x2e0
+#define CONTEXT_OFFSET_Q2         0x2f0
+#define CONTEXT_OFFSET_Q1         0x300
+#define CONTEXT_OFFSET_Q0         0x310
+
+#define CONTEXT_FPU_SIZE          (32 * 16)
+#define CONTEXT_SIZE              (0x120 + CONTEXT_FPU_SIZE)
+
+#else /* !__ASSEMBLY__ */
+
+#include <rtdef.h>
+
+typedef struct { rt_uint64_t value[2]; } rt_uint128_t;
+
 /* the exception stack without VFP registers */
 struct rt_hw_exp_stack
 {
-    unsigned long pc;
-    unsigned long cpsr;
-    unsigned long sp_el0;
-    unsigned long x30;
-    unsigned long fpcr;
-    unsigned long fpsr;
-    unsigned long x28;
-    unsigned long x29;
-    unsigned long x26;
-    unsigned long x27;
-    unsigned long x24;
-    unsigned long x25;
-    unsigned long x22;
-    unsigned long x23;
-    unsigned long x20;
-    unsigned long x21;
-    unsigned long x18;
-    unsigned long x19;
-    unsigned long x16;
-    unsigned long x17;
-    unsigned long x14;
-    unsigned long x15;
-    unsigned long x12;
-    unsigned long x13;
-    unsigned long x10;
-    unsigned long x11;
-    unsigned long x8;
-    unsigned long x9;
-    unsigned long x6;
-    unsigned long x7;
-    unsigned long x4;
-    unsigned long x5;
-    unsigned long x2;
-    unsigned long x3;
-    unsigned long x0;
-    unsigned long x1;
+    rt_uint64_t pc;
+    rt_uint64_t cpsr;
+    rt_uint64_t sp_el0;
+    rt_uint64_t x30;
+    rt_uint64_t fpcr;
+    rt_uint64_t fpsr;
+    rt_uint64_t x28;
+    rt_uint64_t x29;
+    rt_uint64_t x26;
+    rt_uint64_t x27;
+    rt_uint64_t x24;
+    rt_uint64_t x25;
+    rt_uint64_t x22;
+    rt_uint64_t x23;
+    rt_uint64_t x20;
+    rt_uint64_t x21;
+    rt_uint64_t x18;
+    rt_uint64_t x19;
+    rt_uint64_t x16;
+    rt_uint64_t x17;
+    rt_uint64_t x14;
+    rt_uint64_t x15;
+    rt_uint64_t x12;
+    rt_uint64_t x13;
+    rt_uint64_t x10;
+    rt_uint64_t x11;
+    rt_uint64_t x8;
+    rt_uint64_t x9;
+    rt_uint64_t x6;
+    rt_uint64_t x7;
+    rt_uint64_t x4;
+    rt_uint64_t x5;
+    rt_uint64_t x2;
+    rt_uint64_t x3;
+    rt_uint64_t x0;
+    rt_uint64_t x1;
 
-    unsigned long long fpu[16];
+    rt_uint128_t fpu[32];
 };
 
 #define SP_ELx     ((unsigned long)0x01)
@@ -64,4 +149,6 @@ rt_ubase_t rt_hw_get_current_el(void);
 void rt_hw_set_elx_env(void);
 void rt_hw_set_current_vbar(rt_ubase_t addr);
 
+#endif /* __ASSEMBLY__ */
+
 #endif

+ 34 - 1
libcpu/aarch64/common/asm-fpu.h

@@ -1,11 +1,12 @@
 /*
- * Copyright (c) 2006-2021, RT-Thread Development Team
+ * Copyright (c) 2006-2023, RT-Thread Development Team
  *
  * SPDX-License-Identifier: Apache-2.0
  *
  * Change Logs:
  * Date           Author       Notes
  * 2021-05-18     Jesven       the first version
+ * 2023-07-13     GuEe-GUI     append Q16 ~ Q31
  */
 
 .macro SAVE_FPU, reg
@@ -25,8 +26,40 @@
     STR Q13, [\reg, #-0x10]!
     STR Q14, [\reg, #-0x10]!
     STR Q15, [\reg, #-0x10]!
+    STR Q16, [\reg, #-0x10]!
+    STR Q17, [\reg, #-0x10]!
+    STR Q18, [\reg, #-0x10]!
+    STR Q19, [\reg, #-0x10]!
+    STR Q20, [\reg, #-0x10]!
+    STR Q21, [\reg, #-0x10]!
+    STR Q22, [\reg, #-0x10]!
+    STR Q23, [\reg, #-0x10]!
+    STR Q24, [\reg, #-0x10]!
+    STR Q25, [\reg, #-0x10]!
+    STR Q26, [\reg, #-0x10]!
+    STR Q27, [\reg, #-0x10]!
+    STR Q28, [\reg, #-0x10]!
+    STR Q29, [\reg, #-0x10]!
+    STR Q30, [\reg, #-0x10]!
+    STR Q31, [\reg, #-0x10]!
 .endm
 .macro RESTORE_FPU, reg
+    LDR Q31, [\reg], #0x10
+    LDR Q30, [\reg], #0x10
+    LDR Q29, [\reg], #0x10
+    LDR Q28, [\reg], #0x10
+    LDR Q27, [\reg], #0x10
+    LDR Q26, [\reg], #0x10
+    LDR Q25, [\reg], #0x10
+    LDR Q24, [\reg], #0x10
+    LDR Q23, [\reg], #0x10
+    LDR Q22, [\reg], #0x10
+    LDR Q21, [\reg], #0x10
+    LDR Q20, [\reg], #0x10
+    LDR Q19, [\reg], #0x10
+    LDR Q18, [\reg], #0x10
+    LDR Q17, [\reg], #0x10
+    LDR Q16, [\reg], #0x10
     LDR Q15, [\reg], #0x10
     LDR Q14, [\reg], #0x10
     LDR Q13, [\reg], #0x10

+ 8 - 33
libcpu/aarch64/common/stack.c

@@ -1,11 +1,12 @@
 /*
- * Copyright (c) 2006-2021, RT-Thread Development Team
+ * Copyright (c) 2006-2023, RT-Thread Development Team
  *
  * SPDX-License-Identifier: Apache-2.0
  *
  * Change Logs:
  * Date           Author       Notes
  * 2021-05-12     RT-Thread    init
+ * 2023-07-13     GuEe-GUI     append fpu: Q16 ~ Q31
  */
 #include <board.h>
 #include <rtthread.h>
@@ -32,38 +33,12 @@ rt_uint8_t *rt_hw_stack_init(void *tentry, void *parameter,
 
     stk = (rt_ubase_t *)stack_addr;
 
-    *(--stk) = (rt_ubase_t)0; /* Q0 */
-    *(--stk) = (rt_ubase_t)0; /* Q0 */
-    *(--stk) = (rt_ubase_t)0; /* Q1 */
-    *(--stk) = (rt_ubase_t)0; /* Q1 */
-    *(--stk) = (rt_ubase_t)0; /* Q2 */
-    *(--stk) = (rt_ubase_t)0; /* Q2 */
-    *(--stk) = (rt_ubase_t)0; /* Q3 */
-    *(--stk) = (rt_ubase_t)0; /* Q3 */
-    *(--stk) = (rt_ubase_t)0; /* Q4 */
-    *(--stk) = (rt_ubase_t)0; /* Q4 */
-    *(--stk) = (rt_ubase_t)0; /* Q5 */
-    *(--stk) = (rt_ubase_t)0; /* Q5 */
-    *(--stk) = (rt_ubase_t)0; /* Q6 */
-    *(--stk) = (rt_ubase_t)0; /* Q6 */
-    *(--stk) = (rt_ubase_t)0; /* Q7 */
-    *(--stk) = (rt_ubase_t)0; /* Q7 */
-    *(--stk) = (rt_ubase_t)0; /* Q8 */
-    *(--stk) = (rt_ubase_t)0; /* Q8 */
-    *(--stk) = (rt_ubase_t)0; /* Q9 */
-    *(--stk) = (rt_ubase_t)0; /* Q9 */
-    *(--stk) = (rt_ubase_t)0; /* Q10 */
-    *(--stk) = (rt_ubase_t)0; /* Q10 */
-    *(--stk) = (rt_ubase_t)0; /* Q11 */
-    *(--stk) = (rt_ubase_t)0; /* Q11 */
-    *(--stk) = (rt_ubase_t)0; /* Q12 */
-    *(--stk) = (rt_ubase_t)0; /* Q12 */
-    *(--stk) = (rt_ubase_t)0; /* Q13 */
-    *(--stk) = (rt_ubase_t)0; /* Q13 */
-    *(--stk) = (rt_ubase_t)0; /* Q14 */
-    *(--stk) = (rt_ubase_t)0; /* Q14 */
-    *(--stk) = (rt_ubase_t)0; /* Q15 */
-    *(--stk) = (rt_ubase_t)0; /* Q15 */
+    for (int i = 0; i < 32; ++i)
+    {
+        stk -= sizeof(rt_uint128_t) / sizeof(rt_ubase_t);
+
+        *(rt_uint128_t *)stk = (rt_uint128_t) { 0 };
+    }
 
     *(--stk) = (rt_ubase_t)0;           /* X1 */
     *(--stk) = (rt_ubase_t)parameter;   /* X0 */