Просмотр исходного кода

[bsp][riscv64] add fpu support

jasonhu 4 лет назад
Родитель
Сommit
22457fdad0

+ 2 - 0
bsp/qemu-virt64-riscv/.config

@@ -467,6 +467,7 @@ CONFIG_RT_LWP_SHM_MAX_NR=64
 # CONFIG_PKG_USING_QBOOT is not set
 # CONFIG_PKG_USING_PPOOL is not set
 # CONFIG_PKG_USING_OPENAMP is not set
+# CONFIG_PKG_USING_RT_VSNPRINTF_FULL is not set
 # CONFIG_PKG_USING_RT_KPRINTF_THREADSAFE is not set
 # CONFIG_PKG_USING_LPM is not set
 # CONFIG_PKG_USING_TLSF is not set
@@ -620,6 +621,7 @@ CONFIG_RT_LWP_SHM_MAX_NR=64
 # CONFIG_PKG_USING_COWSAY is not set
 # CONFIG_PKG_USING_TERMBOX is not set
 CONFIG_BOARD_virt=y
+CONFIG_ENABLE_FPU=y
 
 #
 # General Purpose UARTs

+ 4 - 0
bsp/qemu-virt64-riscv/Kconfig

@@ -31,6 +31,10 @@ config RT_USING_USERSPACE
     bool
     default y
 
+config ENABLE_FPU
+    bool "Enable FPU"
+    default y
+
 source "driver/Kconfig"
 
 config __STACKSIZE__

+ 1 - 0
bsp/qemu-virt64-riscv/rtconfig.h

@@ -192,6 +192,7 @@
 /* entertainment: terminal games and other interesting software packages */
 
 #define BOARD_virt
+#define ENABLE_FPU
 
 /* General Purpose UARTs */
 

+ 1 - 1
bsp/qemu-virt64-riscv/rtconfig.py

@@ -37,7 +37,7 @@ if PLATFORM == 'gcc':
     OBJDUMP = PREFIX + 'objdump'
     OBJCPY  = PREFIX + 'objcopy'
 
-    DEVICE  = ' -mcmodel=medany -march=rv64imac -mabi=lp64'
+    DEVICE  = ' -mcmodel=medany -march=rv64imafdc -mabi=lp64'
     CFLAGS  = DEVICE + ' -fvar-tracking -ffreestanding -fno-common -ffunction-sections -fdata-sections -fstrict-volatile-bitfields'
     AFLAGS  = ' -c' + DEVICE + ' -x assembler-with-cpp'
     LFLAGS  = DEVICE + ' -nostartfiles -Wl,--gc-sections,-Map=rtthread.map,-cref,-u,_start -T link.lds'

+ 4 - 0
libcpu/risc-v/virt64/cpuport.c

@@ -73,7 +73,11 @@ rt_uint8_t *rt_hw_stack_init(void       *tentry,
     frame->user_sp_exc_stack = (rt_ubase_t)(((rt_ubase_t)stk) + sizeof(struct rt_hw_stack_frame));
 
     /* force to supervisor mode(SPP=1) and set SPIE and SUM to 1 */
+#ifdef ENABLE_FPU
+    frame->sstatus = 0x00046120;    /* enable FPU */
+#else
     frame->sstatus = 0x00040120;
+#endif
 
     return stk;
 }

+ 12 - 2
libcpu/risc-v/virt64/cpuport.h

@@ -22,8 +22,18 @@
 // error here, not portable
 #endif
 
-/* 33 normal register */
-#define CTX_REG_NR  33
+/* 33 general register */
+#define CTX_GENERAL_REG_NR  33
+
+#ifdef ENABLE_FPU
+/* 32 fpu register */
+#define CTX_FPU_REG_NR  32
+#else
+#define CTX_FPU_REG_NR  0
+#endif
+
+/* all context registers */
+#define CTX_REG_NR  (CTX_GENERAL_REG_NR + CTX_FPU_REG_NR)
 
 #ifndef __ASSEMBLY__
 rt_inline void rt_hw_dsb()

+ 4 - 1
bsp/qemu-virt64-riscv/driver/encoding.h → libcpu/risc-v/virt64/encoding.h

@@ -28,7 +28,10 @@
 #define SSTATUS_UPIE        0x00000010
 #define SSTATUS_SPIE        0x00000020
 #define SSTATUS_SPP         0x00000100
-#define SSTATUS_FS          0x00006000
+#define SSTATUS_FS          0x00006000 /* Floating-point Status */
+#define SSTATUS_FS_INITIAL  0x00002000
+#define SSTATUS_FS_CLEAN    0x00004000
+#define SSTATUS_FS_DIRTY    0x00006000
 #define SSTATUS_XS          0x00018000
 #define SSTATUS_PUM         0x00040000
 #define SSTATUS32_SD        0x80000000

+ 0 - 0
bsp/qemu-virt64-riscv/driver/io.h → libcpu/risc-v/virt64/io.h


+ 0 - 0
bsp/qemu-virt64-riscv/driver/plic.c → libcpu/risc-v/virt64/plic.c


+ 0 - 0
bsp/qemu-virt64-riscv/driver/plic.h → libcpu/risc-v/virt64/plic.h


+ 0 - 0
bsp/qemu-virt64-riscv/driver/sbi.c → libcpu/risc-v/virt64/sbi.c


+ 0 - 0
bsp/qemu-virt64-riscv/driver/sbi.h → libcpu/risc-v/virt64/sbi.h


+ 3 - 0
libcpu/risc-v/virt64/stack.h

@@ -47,6 +47,9 @@ struct rt_hw_stack_frame
     rt_ubase_t t5;         /* x30 - t5     - temporary register 5                */
     rt_ubase_t t6;         /* x31 - t6     - temporary register 6                */
     rt_ubase_t user_sp_exc_stack;    /* sscratch - user mode sp/exception stack  */
+#ifdef ENABLE_FPU
+    rt_ubase_t f[CTX_FPU_REG_NR];      /* f0~f31 */
+#endif
 };
 
 #endif

+ 139 - 47
libcpu/risc-v/virt64/stackframe.h

@@ -7,16 +7,101 @@
  * Date           Author       Notes
  * 2021-02-02     lizhirui     first version
  * 2021-02-11     lizhirui     fixed gp save/store bug
+ * 2021-11-18     JasonHu      add fpu registers save/restore
  */
 
 #ifndef __STACKFRAME_H__
 #define __STACKFRAME_H__
 
 #include "cpuport.h"
+#include "encoding.h"
+
+#ifdef ENABLE_FPU
+#define FPU_CTX_F0_OFF   0   /* offsetof(fpu_context_t, fpustatus.f[0])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F1_OFF   8   /* offsetof(fpu_context_t, fpustatus.f[1])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F2_OFF   16  /* offsetof(fpu_context_t, fpustatus.f[2])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F3_OFF   24  /* offsetof(fpu_context_t, fpustatus.f[3])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F4_OFF   32  /* offsetof(fpu_context_t, fpustatus.f[4])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F5_OFF   40  /* offsetof(fpu_context_t, fpustatus.f[5])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F6_OFF   48  /* offsetof(fpu_context_t, fpustatus.f[6])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F7_OFF   56  /* offsetof(fpu_context_t, fpustatus.f[7])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F8_OFF   64  /* offsetof(fpu_context_t, fpustatus.f[8])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F9_OFF   72  /* offsetof(fpu_context_t, fpustatus.f[9])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F10_OFF  80  /* offsetof(fpu_context_t, fpustatus.f[10]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F11_OFF  88  /* offsetof(fpu_context_t, fpustatus.f[11]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F12_OFF  96  /* offsetof(fpu_context_t, fpustatus.f[12]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F13_OFF  104 /* offsetof(fpu_context_t, fpustatus.f[13]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F14_OFF  112 /* offsetof(fpu_context_t, fpustatus.f[14]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F15_OFF  120 /* offsetof(fpu_context_t, fpustatus.f[15]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F16_OFF  128 /* offsetof(fpu_context_t, fpustatus.f[16]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F17_OFF  136 /* offsetof(fpu_context_t, fpustatus.f[17]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F18_OFF  144 /* offsetof(fpu_context_t, fpustatus.f[18]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F19_OFF  152 /* offsetof(fpu_context_t, fpustatus.f[19]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F20_OFF  160 /* offsetof(fpu_context_t, fpustatus.f[20]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F21_OFF  168 /* offsetof(fpu_context_t, fpustatus.f[21]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F22_OFF  176 /* offsetof(fpu_context_t, fpustatus.f[22]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F23_OFF  184 /* offsetof(fpu_context_t, fpustatus.f[23]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F24_OFF  192 /* offsetof(fpu_context_t, fpustatus.f[24]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F25_OFF  200 /* offsetof(fpu_context_t, fpustatus.f[25]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F26_OFF  208 /* offsetof(fpu_context_t, fpustatus.f[26]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F27_OFF  216 /* offsetof(fpu_context_t, fpustatus.f[27]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F28_OFF  224 /* offsetof(fpu_context_t, fpustatus.f[28]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F29_OFF  232 /* offsetof(fpu_context_t, fpustatus.f[29]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F30_OFF  240 /* offsetof(fpu_context_t, fpustatus.f[30]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F31_OFF  248 /* offsetof(fpu_context_t, fpustatus.f[31]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#endif /* ENABLE_FPU */
 
 .macro SAVE_ALL
-    addi sp, sp, -CTX_REG_NR * REGBYTES
-
+#ifdef ENABLE_FPU
+    /* save float registers */
+    addi sp, sp, -CTX_FPU_REG_NR * REGBYTES
+
+    li  t0, SSTATUS_FS
+    csrs sstatus, t0
+    fsd f0,  FPU_CTX_F0_OFF(sp)
+    fsd f1,  FPU_CTX_F1_OFF(sp)
+    fsd f2,  FPU_CTX_F2_OFF(sp)
+    fsd f3,  FPU_CTX_F3_OFF(sp)
+    fsd f4,  FPU_CTX_F4_OFF(sp)
+    fsd f5,  FPU_CTX_F5_OFF(sp)
+    fsd f6,  FPU_CTX_F6_OFF(sp)
+    fsd f7,  FPU_CTX_F7_OFF(sp)
+    fsd f8,  FPU_CTX_F8_OFF(sp)
+    fsd f9,  FPU_CTX_F9_OFF(sp)
+    fsd f10, FPU_CTX_F10_OFF(sp)
+    fsd f11, FPU_CTX_F11_OFF(sp)
+    fsd f12, FPU_CTX_F12_OFF(sp)
+    fsd f13, FPU_CTX_F13_OFF(sp)
+    fsd f14, FPU_CTX_F14_OFF(sp)
+    fsd f15, FPU_CTX_F15_OFF(sp)
+    fsd f16, FPU_CTX_F16_OFF(sp)
+    fsd f17, FPU_CTX_F17_OFF(sp)
+    fsd f18, FPU_CTX_F18_OFF(sp)
+    fsd f19, FPU_CTX_F19_OFF(sp)
+    fsd f20, FPU_CTX_F20_OFF(sp)
+    fsd f21, FPU_CTX_F21_OFF(sp)
+    fsd f22, FPU_CTX_F22_OFF(sp)
+    fsd f23, FPU_CTX_F23_OFF(sp)
+    fsd f24, FPU_CTX_F24_OFF(sp)
+    fsd f25, FPU_CTX_F25_OFF(sp)
+    fsd f26, FPU_CTX_F26_OFF(sp)
+    fsd f27, FPU_CTX_F27_OFF(sp)
+    fsd f28, FPU_CTX_F28_OFF(sp)
+    fsd f29, FPU_CTX_F29_OFF(sp)
+    fsd f30, FPU_CTX_F30_OFF(sp)
+    fsd f31, FPU_CTX_F31_OFF(sp)
+
+    /* clr FS domain */
+    csrc sstatus, t0
+
+    /* clean status would clr sr_sd; */
+    li t0, SSTATUS_FS_CLEAN
+    csrs sstatus, t0
+
+#endif /* ENABLE_FPU */
+
+    /* save general registers */
+    addi sp, sp, -CTX_GENERAL_REG_NR * REGBYTES
     STORE x1,   1 * REGBYTES(sp)
 
     csrr  x1, sstatus
@@ -58,51 +143,10 @@
     STORE t0, 32 * REGBYTES(sp)
 .endm
 
-.macro RESTORE_ALL_ONLY
-        /* resw ra to sepc */
-    LOAD x1,   0 * REGBYTES(sp)
-    csrw sepc, x1
-
-    LOAD x1,   2 * REGBYTES(sp)
-    csrw sstatus, x1
-
-    LOAD x1,   1 * REGBYTES(sp)
-
-    LOAD x3,   3 * REGBYTES(sp)
-    LOAD x4,   4 * REGBYTES(sp)
-    LOAD x5,   5 * REGBYTES(sp)
-    LOAD x6,   6 * REGBYTES(sp)
-    LOAD x7,   7 * REGBYTES(sp)
-    LOAD x8,   8 * REGBYTES(sp)
-    LOAD x9,   9 * REGBYTES(sp)
-    LOAD x10, 10 * REGBYTES(sp)
-    LOAD x11, 11 * REGBYTES(sp)
-    LOAD x12, 12 * REGBYTES(sp)
-    LOAD x13, 13 * REGBYTES(sp)
-    LOAD x14, 14 * REGBYTES(sp)
-    LOAD x15, 15 * REGBYTES(sp)
-    LOAD x16, 16 * REGBYTES(sp)
-    LOAD x17, 17 * REGBYTES(sp)
-    LOAD x18, 18 * REGBYTES(sp)
-    LOAD x19, 19 * REGBYTES(sp)
-    LOAD x20, 20 * REGBYTES(sp)
-    LOAD x21, 21 * REGBYTES(sp)
-    LOAD x22, 22 * REGBYTES(sp)
-    LOAD x23, 23 * REGBYTES(sp)
-    LOAD x24, 24 * REGBYTES(sp)
-    LOAD x25, 25 * REGBYTES(sp)
-    LOAD x26, 26 * REGBYTES(sp)
-    LOAD x27, 27 * REGBYTES(sp)
-    LOAD x28, 28 * REGBYTES(sp)
-    LOAD x29, 29 * REGBYTES(sp)
-    LOAD x30, 30 * REGBYTES(sp)
-    LOAD x31, 31 * REGBYTES(sp)
-
-    addi sp,  sp, CTX_REG_NR * REGBYTES
-.endm
-
 .macro RESTORE_ALL
-        /* resw ra to sepc */
+    /* restore general register */
+
+    /* resw ra to sepc */
     LOAD x1,   0 * REGBYTES(sp)
     csrw sepc, x1
 
@@ -141,7 +185,55 @@
     LOAD x30, 30 * REGBYTES(sp)
     LOAD x31, 31 * REGBYTES(sp)
 
-    //restore user sp
+#ifdef ENABLE_FPU
+    /* restore float register  */
+    mv t2, sp
+    addi t2, t2, CTX_GENERAL_REG_NR * REGBYTES   /* skip all normal reg */
+
+    li  t0, SSTATUS_FS
+    csrs sstatus, t0
+    fld f0, FPU_CTX_F0_OFF(t2)
+    fld f1, FPU_CTX_F1_OFF(t2)
+    fld f2, FPU_CTX_F2_OFF(t2)
+    fld f3, FPU_CTX_F3_OFF(t2)
+    fld f4, FPU_CTX_F4_OFF(t2)
+    fld f5, FPU_CTX_F5_OFF(t2)
+    fld f6, FPU_CTX_F6_OFF(t2)
+    fld f7, FPU_CTX_F7_OFF(t2)
+    fld f8, FPU_CTX_F8_OFF(t2)
+    fld f9, FPU_CTX_F9_OFF(t2)
+    fld f10,FPU_CTX_F10_OFF(t2)
+    fld f11,FPU_CTX_F11_OFF(t2)
+    fld f12,FPU_CTX_F12_OFF(t2)
+    fld f13,FPU_CTX_F13_OFF(t2)
+    fld f14,FPU_CTX_F14_OFF(t2)
+    fld f15,FPU_CTX_F15_OFF(t2)
+    fld f16,FPU_CTX_F16_OFF(t2)
+    fld f17,FPU_CTX_F17_OFF(t2)
+    fld f18,FPU_CTX_F18_OFF(t2)
+    fld f19,FPU_CTX_F19_OFF(t2)
+    fld f20,FPU_CTX_F20_OFF(t2)
+    fld f21,FPU_CTX_F21_OFF(t2)
+    fld f22,FPU_CTX_F22_OFF(t2)
+    fld f23,FPU_CTX_F23_OFF(t2)
+    fld f24,FPU_CTX_F24_OFF(t2)
+    fld f25,FPU_CTX_F25_OFF(t2)
+    fld f26,FPU_CTX_F26_OFF(t2)
+    fld f27,FPU_CTX_F27_OFF(t2)
+    fld f28,FPU_CTX_F28_OFF(t2)
+    fld f29,FPU_CTX_F29_OFF(t2)
+    fld f30,FPU_CTX_F30_OFF(t2)
+    fld f31,FPU_CTX_F31_OFF(t2)
+
+    /* clr FS domain */
+    csrc sstatus, t0
+
+    /* clean status would clr sr_sd; */
+    li t0, SSTATUS_FS_CLEAN
+    csrs sstatus, t0
+
+#endif /* ENABLE_FPU */
+    /* restore user sp */
     LOAD sp, 32 * REGBYTES(sp)
 .endm