Przeglądaj źródła

feat: libcpu/risc-v: unify context on c906, virt64

Changes:
- added config for NEW_CTX_SWITCH
- used new context on c906 sched switch routine
- update _rt_hw_stack_init to fit into NEW_CTX_SWITCH
- separated vector ctx from the generic

Signed-off-by: Shell <smokewood@qq.com>
Shell 8 miesięcy temu
rodzic
commit
7528645f59

+ 1 - 0
bsp/qemu-virt64-riscv/Kconfig

@@ -28,6 +28,7 @@ config ENABLE_FPU
 
 config ENABLE_VECTOR
     bool "Using RISC-V Vector Extension"
+    select ARCH_RISCV_VECTOR
     default n
 
 if ENABLE_VECTOR

+ 8 - 0
libcpu/Kconfig

@@ -245,6 +245,9 @@ config ARCH_RISCV
 config ARCH_RISCV_FPU
     bool
 
+config ARCH_RISCV_VECTOR
+    bool
+
 config ARCH_RISCV_FPU_S
     select ARCH_RISCV_FPU
     bool
@@ -263,10 +266,15 @@ config ARCH_RISCV64
     bool
 
 if ARCH_RISCV64
+    config ARCH_USING_NEW_CTX_SWITCH
+        bool
+        default y
+
     config ARCH_USING_RISCV_COMMON64
         bool
         depends on ARCH_RISCV64
         select RT_USING_CPUTIME
+        select ARCH_USING_NEW_CTX_SWITCH
         help
             Using the common64 implementation under ./libcpu/risc-v
 endif

+ 88 - 98
libcpu/risc-v/t-head/c906/backtrace.c

@@ -1,137 +1,127 @@
 /*
- * Copyright (c) 2006-2018, RT-Thread Development Team
+ * Copyright (c) 2006-2023, RT-Thread Development Team
  *
  * SPDX-License-Identifier: Apache-2.0
  *
  * Change Logs:
  * Date           Author       Notes
+ * 2023-10-18     Shell        Add backtrace support
  */
 
-#include <rtthread.h>
-#ifdef RT_USING_SMART
-#include <lwp_arch.h>
-
-#define TRANCE_LEVEL 20
+#define DBG_TAG "hw.backtrace"
+#define DBG_LVL DBG_INFO
+#include <rtdbg.h>
 
-extern rt_ubase_t __text_start[];
-extern rt_ubase_t __text_end[];
+#include <rtthread.h>
+#include <mm_aspace.h>
+#include "riscv_mmu.h"
+#include "stack.h"
 
-static char *_get_elf_name(size_t sepc);
+#define WORD                            sizeof(rt_base_t)
+#define ARCH_CONTEXT_FETCH(pctx, id)    (*(((unsigned long *)pctx) + (id)))
 
-void rt_hw_backtrace(rt_uint32_t *ffp, rt_ubase_t sepc)
+rt_inline rt_err_t _bt_kaddr(rt_ubase_t *fp, struct rt_hw_backtrace_frame *frame)
 {
-    rt_ubase_t *ra;
-    rt_ubase_t *fp;
-    rt_ubase_t vas, vae;
-    int i, j;
+    rt_err_t rc;
+    frame->fp = *(fp - 2);
+    frame->pc = *(fp - 1);
 
-    rt_kprintf("riscv64-unknown-linux-musl-addr2line -e %s -a -f", _get_elf_name(sepc));
+    if ((rt_ubase_t)fp == frame->fp)
+    {
+        rc = -RT_ERROR;
+    }
+    else
+    {
+        rc = RT_EOK;
+    }
+    return rc;
+}
 
-    fp = (rt_ubase_t *)ffp;
+#ifdef RT_USING_SMART
+#include <lwp_arch.h>
+#include <lwp_user_mm.h>
 
-    if (!fp)
+rt_inline rt_err_t _bt_uaddr(rt_lwp_t lwp, rt_ubase_t *fp, struct rt_hw_backtrace_frame *frame)
+{
+    rt_err_t rc;
+    if (lwp_data_get(lwp, &frame->fp, fp - 2, WORD) != WORD)
     {
-        asm volatile("mv %0, s0"
-                     : "=r"(fp));
+        rc = -RT_EFAULT;
     }
-
-    if (sepc)
+    else if (lwp_data_get(lwp, &frame->pc, fp - 1, WORD) != WORD)
     {
-        rt_kprintf(" %p", sepc - 0x4);
+        rc = -RT_EFAULT;
     }
-
-    if (fp > (rt_ubase_t *)USER_VADDR_START && fp < (rt_ubase_t *)USER_VADDR_TOP)
+    else if ((rt_ubase_t)fp == frame->fp)
     {
-        vas = USER_VADDR_START;
-        vae = USER_VADDR_TOP;
+        rc = -RT_ERROR;
     }
     else
     {
-        vas = (rt_ubase_t)&__text_start;
-        vae = (rt_ubase_t)&__text_end;
+        frame->pc -= 0;
+        rc = RT_EOK;
     }
+    return rc;
+}
+#endif /* RT_USING_SMART */
 
-    for (i = j = 0; i < TRANCE_LEVEL; i++)
+rt_err_t rt_hw_backtrace_frame_unwind(rt_thread_t thread, struct rt_hw_backtrace_frame *frame)
+{
+    rt_err_t rc = -RT_ERROR;
+    rt_uintptr_t *fp = (rt_uintptr_t *)frame->fp;
+
+    if (fp && !((long)fp & 0x7))
     {
-        if (RT_ALIGN((rt_ubase_t)fp, sizeof(void *)) != (rt_ubase_t)fp)
+#ifdef RT_USING_SMART
+        if (thread->lwp)
         {
-            break;
+            void *lwp = thread->lwp;
+            void *this_lwp = lwp_self();
+            if (this_lwp == lwp && rt_hw_mmu_v2p(((rt_lwp_t)lwp)->aspace, fp) != ARCH_MAP_FAILED)
+            {
+                rc = _bt_kaddr(fp, frame);
+            }
+            else if (lwp_user_accessible_ext(lwp, (void *)fp, WORD))
+            {
+                rc = _bt_uaddr(lwp, fp, frame);
+            }
+            else
+            {
+                rc = -RT_EFAULT;
+            }
+        }
+        else
+#endif
+        if ((rt_kmem_v2p(fp) != ARCH_MAP_FAILED))
+        {
+            rc = _bt_kaddr(fp, frame);
+        }
+        else
+        {
+            rc = -RT_EINVAL;
         }
-
-        ra = fp - 1;
-        if (!rt_kmem_v2p(ra) || *ra < vas || *ra > vae)
-            break;
-
-        rt_kprintf(" %p", *ra - 0x04);
-
-        fp = fp - 2;
-        if (!rt_kmem_v2p(fp))
-            break;
-        fp = (rt_ubase_t *)(*fp);
-        if (!fp)
-            break;
     }
-
-    rt_kputs("\r\n");
-}
-
-static void _assert_backtrace_cb(const char *ex, const char *func, rt_size_t line)
-{
-    rt_hw_interrupt_disable();
-    rt_kprintf("(%s) assertion failed at function:%s, line number:%d \n", ex, func, line);
-
-    rt_hw_backtrace(0, 0);
-    rt_hw_cpu_shutdown();
-}
-
-static int rt_hw_backtrace_init(void)
-{
-    rt_assert_set_hook(_assert_backtrace_cb);
-
-    return 0;
+    else
+    {
+        rc = -RT_EFAULT;
+    }
+    return rc;
 }
-INIT_BOARD_EXPORT(rt_hw_backtrace_init);
 
-static void backtrace_test(int args, char *argv[])
+rt_err_t rt_hw_backtrace_frame_get(rt_thread_t thread, struct rt_hw_backtrace_frame *frame)
 {
-    int *p = (void *)-1;
-    init_fn_t ft = 0;
+    rt_err_t rc;
 
-    if (args < 2)
+    if (!thread || !frame)
     {
-        rt_kprintf("backtrace_test usage:backtrace_test a(assert)/m(invalid memory)/i(illegal instruction)\r\n");
-        return;
-    }
-
-    if (!rt_strcmp(argv[1], "a"))
-    {
-        rt_kprintf("Assert test:\r\n", argv[1]);
-        RT_ASSERT(0);
-    }
-    else if (!rt_strcmp(argv[1], "m"))
-    {
-        rt_kprintf("Access invalid memory:\r\n", argv[1]);
-        *p = 0;
-    }
-    else if (!rt_strcmp(argv[1], "i"))
-    {
-        rt_kprintf("Illegal instruction:\r\n", argv[1]);
-        ft();
+        rc = -RT_EINVAL;
     }
     else
     {
-        rt_kprintf("Unknown cmd :%s.\r\n", argv[1]);
+        rt_hw_switch_frame_t sframe = thread->sp;
+        frame->pc = sframe->regs[RT_HW_SWITCH_CONTEXT_RA];
+        frame->fp = sframe->regs[RT_HW_SWITCH_CONTEXT_S0];;
+        rc = RT_EOK;
     }
+    return rc;
 }
-MSH_CMD_EXPORT(backtrace_test, backtrace test case);
-
-extern struct rt_thread *rt_current_thread;
-
-#define IN_USERSPACE (sepc > USER_VADDR_START && sepc < USER_VADDR_TOP)
-
-static char *_get_elf_name(size_t sepc)
-{
-    return IN_USERSPACE ? rt_current_thread->parent.name : "rtthread.elf";
-}
-
-#endif /* RT_USING_SMART */

+ 72 - 17
libcpu/risc-v/t-head/c906/context_gcc.S

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006-2021, RT-Thread Development Team
+ * Copyright (c) 2006-2024, RT-Thread Development Team
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -8,12 +8,72 @@
  * 2018/10/28     Bernard      The unify RISC-V porting implementation
  * 2018/12/27     Jesven       Add SMP support
  * 2021/02/02     lizhirui     Add userspace support
+ * 2022/10/22     Shell        Support User mode RVV;
+ *                             Trimming process switch context
  */
 
 #include "cpuport.h"
 #include "stackframe.h"
+#define _REG_IDX(name) RT_HW_SWITCH_CONTEXT_##name
+#define REG_IDX(name) _REG_IDX(name)
 
-    .globl rt_hw_context_switch_to
+.macro SAVE_REG reg, index
+    STORE \reg, \index*REGBYTES(sp)
+.endm
+
+.macro LOAD_REG reg, index
+    LOAD \reg, \index*REGBYTES(sp)
+.endm
+
+.macro RESERVE_CONTEXT
+    addi        sp, sp, -(RT_HW_SWITCH_CONTEXT_SIZE * REGBYTES)
+    SAVE_REG    tp, REG_IDX(TP)
+    SAVE_REG    ra, REG_IDX(RA)
+    SAVE_REG    s0, REG_IDX(S0)
+    SAVE_REG    s1, REG_IDX(S1)
+    SAVE_REG    s2, REG_IDX(S2)
+    SAVE_REG    s3, REG_IDX(S3)
+    SAVE_REG    s4, REG_IDX(S4)
+    SAVE_REG    s5, REG_IDX(S5)
+    SAVE_REG    s6, REG_IDX(S6)
+    SAVE_REG    s7, REG_IDX(S7)
+    SAVE_REG    s8, REG_IDX(S8)
+    SAVE_REG    s9, REG_IDX(S9)
+    SAVE_REG    s10, REG_IDX(S10)
+    SAVE_REG    s11, REG_IDX(S11)
+    csrr        s11, sstatus
+    li          s10, (SSTATUS_SPP)
+    or          s11, s11, s10
+    SAVE_REG    s11, REG_IDX(SSTATUS)
+.endm
+
+.macro RESTORE_CONTEXT
+    LOAD_REG    s11, REG_IDX(SSTATUS)
+    csrw        sstatus, s11
+    LOAD_REG    s11, REG_IDX(S11)
+    LOAD_REG    s10, REG_IDX(S10)
+    LOAD_REG    s9, REG_IDX(S9)
+    LOAD_REG    s8, REG_IDX(S8)
+    LOAD_REG    s7, REG_IDX(S7)
+    LOAD_REG    s6, REG_IDX(S6)
+    LOAD_REG    s5, REG_IDX(S5)
+    LOAD_REG    s4, REG_IDX(S4)
+    LOAD_REG    s3, REG_IDX(S3)
+    LOAD_REG    s2, REG_IDX(S2)
+    LOAD_REG    s1, REG_IDX(S1)
+    LOAD_REG    s0, REG_IDX(S0)
+    LOAD_REG    ra, REG_IDX(RA)
+    LOAD_REG    tp, REG_IDX(TP)
+    addi        sp, sp, RT_HW_SWITCH_CONTEXT_SIZE * REGBYTES
+    csrw        sepc, ra
+.endm
+
+/*
+ * void rt_hw_context_switch_to(rt_ubase_t to);
+ *
+ * a0 --> to SP pointer
+ */
+.globl rt_hw_context_switch_to
 rt_hw_context_switch_to:
     LOAD sp, (a0)
 
@@ -24,31 +84,26 @@ rt_hw_context_switch_to:
         jal lwp_aspace_switch
     #endif
 
-    RESTORE_ALL
+    RESTORE_CONTEXT
     sret
 
 /*
  * void rt_hw_context_switch(rt_ubase_t from, rt_ubase_t to);
  *
- * a0 --> from
- * a1 --> to
+ * a0 --> from SP pointer
+ * a1 --> to SP pointer
+ *
+ * It should only be used on local interrupt disable
  */
-    .globl rt_hw_context_switch
+.globl rt_hw_context_switch
 rt_hw_context_switch:
-    mv t2, sp
-    li t0, 0x120//set SPIE and SPP = 1
-    csrs sstatus, t0//if enter here,caller must be in system thread
-    csrw sepc, ra//return address
-    //saved from thread context
-    SAVE_ALL
-
-    STORE t2, 32 * REGBYTES(sp)//save user_sp
-
+    RESERVE_CONTEXT
     STORE sp, (a0)
 
-    //restore to thread context
+    // restore to thread SP
     LOAD sp, (a1)
 
+    // restore Address Space
     jal rt_thread_self
     mv s1, a0
 
@@ -56,5 +111,5 @@ rt_hw_context_switch:
         jal lwp_aspace_switch
     #endif
 
-    RESTORE_ALL
+    RESTORE_CONTEXT
     sret

+ 51 - 45
libcpu/risc-v/t-head/c906/cpuport.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006-2021, RT-Thread Development Team
+ * Copyright (c) 2006-2024, RT-Thread Development Team
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -14,9 +14,15 @@
 #include <rtthread.h>
 
 #include "cpuport.h"
-#include "sbi.h"
 #include "stack.h"
+#include <sbi.h>
+#include <encoding.h>
 
+#ifdef ARCH_RISCV_FPU
+    #define K_SSTATUS_DEFAULT (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM | SSTATUS_FS)
+#else
+    #define K_SSTATUS_DEFAULT (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM)
+#endif
 #ifdef RT_USING_SMART
 #include <lwp_arch.h>
 #endif
@@ -25,22 +31,42 @@
  * @brief from thread used interrupt context switch
  *
  */
-volatile rt_ubase_t  rt_interrupt_from_thread = 0;
+volatile rt_ubase_t rt_interrupt_from_thread = 0;
 /**
  * @brief to thread used interrupt context switch
  *
  */
-volatile rt_ubase_t  rt_interrupt_to_thread   = 0;
+volatile rt_ubase_t rt_interrupt_to_thread = 0;
 /**
  * @brief flag to indicate context switch in interrupt or not
  *
  */
 volatile rt_ubase_t rt_thread_switch_interrupt_flag = 0;
 
+void *_rt_hw_stack_init(rt_ubase_t *sp, rt_ubase_t ra, rt_ubase_t sstatus)
+{
+    rt_hw_switch_frame_t frame = (rt_hw_switch_frame_t)
+        ((rt_ubase_t)sp - sizeof(struct rt_hw_switch_frame));
+
+    rt_memset(frame, 0, sizeof(struct rt_hw_switch_frame));
+
+    frame->regs[RT_HW_SWITCH_CONTEXT_RA] = ra;
+    frame->regs[RT_HW_SWITCH_CONTEXT_SSTATUS] = sstatus;
+
+    return (void *)frame;
+}
+
+int rt_hw_cpu_id(void)
+{
+    return 0;
+}
 
 /**
- * This function will initialize thread stack
+ * This function will initialize thread stack, we assuming
+ * when scheduler restore this new thread, context will restore
+ * an entry to user first application
  *
+ * s0-s11, ra, sstatus, a0
  * @param tentry the entry of thread
  * @param parameter the parameter of entry
  * @param stack_addr the beginning stack address
@@ -48,41 +74,23 @@ volatile rt_ubase_t rt_thread_switch_interrupt_flag = 0;
  *
  * @return stack address
  */
-rt_uint8_t *rt_hw_stack_init(void       *tentry,
-                             void       *parameter,
+rt_uint8_t *rt_hw_stack_init(void *tentry,
+                             void *parameter,
                              rt_uint8_t *stack_addr,
-                             void       *texit)
+                             void *texit)
 {
-    struct rt_hw_stack_frame *frame;
-    rt_uint8_t         *stk;
-    int                i;
-    extern int __global_pointer$;
-
-    stk  = stack_addr + sizeof(rt_ubase_t);
-    stk  = (rt_uint8_t *)RT_ALIGN_DOWN((rt_ubase_t)stk, REGBYTES);
-    stk -= sizeof(struct rt_hw_stack_frame);
-
-    frame = (struct rt_hw_stack_frame *)stk;
-
-    for (i = 0; i < sizeof(struct rt_hw_stack_frame) / sizeof(rt_ubase_t); i++)
-    {
-        ((rt_ubase_t *)frame)[i] = 0xdeadbeef;
-    }
-
-    frame->ra      = (rt_ubase_t)texit;
-    frame->gp      = (rt_ubase_t)&__global_pointer$;
-    frame->a0      = (rt_ubase_t)parameter;
-    frame->epc     = (rt_ubase_t)tentry;
-    frame->user_sp_exc_stack = (rt_ubase_t)(((rt_ubase_t)stk) + sizeof(struct rt_hw_stack_frame));
-
-    /* force to supervisor mode(SPP=1) and set SPIE and SUM to 1 */
-#ifdef ARCH_RISCV_FPU
-    frame->sstatus = 0x00046120;    /* enable FPU */
-#else
-    frame->sstatus = 0x00040120;
-#endif
-
-    return stk;
+    rt_ubase_t *sp = (rt_ubase_t *)stack_addr;
+    // we use a strict alignment requirement for Q extension
+    sp = (rt_ubase_t *)RT_ALIGN_DOWN((rt_ubase_t)sp, 16);
+
+    (*--sp) = (rt_ubase_t)tentry;
+    (*--sp) = (rt_ubase_t)parameter;
+    (*--sp) = (rt_ubase_t)texit;
+    --sp;   /* alignment */
+
+    /* compatible to RESTORE_CONTEXT */
+    extern void _rt_thread_entry(void);
+    return (rt_uint8_t *)_rt_hw_stack_init(sp, (rt_ubase_t)_rt_thread_entry, K_SSTATUS_DEFAULT);
 }
 
 /*
@@ -101,7 +109,7 @@ void rt_hw_context_switch_interrupt(rt_ubase_t from, rt_ubase_t to, rt_thread_t
     rt_interrupt_to_thread = to;
     rt_thread_switch_interrupt_flag = 1;
 
-    return ;
+    return;
 }
 #endif /* end of RT_USING_SMP */
 
@@ -112,16 +120,14 @@ void rt_hw_cpu_shutdown(void)
     rt_kprintf("shutdown...\n");
 
     level = rt_hw_interrupt_disable();
+
     sbi_shutdown();
-    while (1);
-}
 
-int rt_hw_cpu_id(void)
-{
-    return 0;   /* d1 has one core */
+    while (1)
+        ;
 }
 
 void rt_hw_set_process_id(int pid)
 {
-    //TODO
+    // TODO
 }

+ 9 - 21
libcpu/risc-v/t-head/c906/cpuport.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006-2021, RT-Thread Development Team
+ * Copyright (c) 2006-2024, RT-Thread Development Team
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -14,28 +14,16 @@
 #include <rtconfig.h>
 #include <opcode.h>
 
-/* bytes of register width  */
-#ifdef ARCH_CPU_64BIT
-#define STORE                   sd
-#define LOAD                    ld
-#define REGBYTES                8
-#else
-// error here, not portable
+#ifdef RT_USING_SMP
+typedef union {
+    unsigned long slock;
+    struct __arch_tickets {
+        unsigned short owner;
+        unsigned short next;
+    } tickets;
+} rt_hw_spinlock_t;
 #endif
 
-/* 33 general register */
-#define CTX_GENERAL_REG_NR  33
-
-#ifdef ARCH_RISCV_FPU
-/* 32 fpu register */
-#define CTX_FPU_REG_NR  32
-#else
-#define CTX_FPU_REG_NR  0
-#endif
-
-/* all context registers */
-#define CTX_REG_NR  (CTX_GENERAL_REG_NR + CTX_FPU_REG_NR)
-
 #ifndef __ASSEMBLY__
 #include <rtdef.h>
 

+ 25 - 0
libcpu/risc-v/t-head/c906/cpuport_gcc.S

@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2006-2022, RT-Thread Development Team
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Change Logs:
+ * Date           Author       Notes
+ * 2022-10-19     RT-Thread    the first version
+ */
+
+#include "cpuport.h"
+#include "stackframe.h"
+#include "asm-generic.h"
+
+START_POINT(_rt_thread_entry)
+    LOAD    ra, REGBYTES(sp)    /* thread exit */
+    addi    sp, sp, 2 * REGBYTES
+    LOAD    a0, (sp)            /* parameter */
+    LOAD    t0, REGBYTES(sp)    /* tentry */
+    addi    sp, sp, 2 * REGBYTES
+    mv      s1, ra
+    jalr    t0
+    jalr    s1
+    j       .           /* never here */
+START_POINT_END(_rt_thread_entry)

+ 17 - 0
libcpu/risc-v/t-head/c906/encoding.h

@@ -1,8 +1,25 @@
+/*
+ * Copyright (c) 2006-2022, RT-Thread Development Team
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Change Logs:
+ * Date           Author       Notes
+ */
 // See LICENSE for license details.
 
 #ifndef RISCV_CSR_ENCODING_H
 #define RISCV_CSR_ENCODING_H
 
+#include <rtconfig.h>
+
+#ifdef ARCH_RISCV_VECTOR
+#include "vector_encoding.h"
+
+#else
+#define SSTATUS_VS          0   /* fallback */
+#endif /* ARCH_RISCV_VECTOR */
+
 #define MSTATUS_UIE         0x00000001
 #define MSTATUS_SIE         0x00000002
 #define MSTATUS_HIE         0x00000004

+ 73 - 0
libcpu/risc-v/t-head/c906/ext_context.h

@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2006-2024, RT-Thread Development Team
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Change Logs:
+ * Date           Author       Notes
+ * 2022-10-10     RT-Thread    the first version
+ */
+#ifndef __EXT_CONTEXT_H__
+#define __EXT_CONTEXT_H__
+
+#include <rtconfig.h>
+
+#ifdef ARCH_RISCV_FPU
+/* 32 fpu register */
+#define CTX_FPU_REG_NR  32
+#else
+#define CTX_FPU_REG_NR  0
+#endif /* ARCH_RISCV_FPU */
+
+#ifdef __ASSEMBLY__
+
+/**
+ * ==================================
+ * RISC-V D ISA (Floating)
+ * ==================================
+ */
+
+#ifdef ARCH_RISCV_FPU
+#define FPU_CTX_F0_OFF  (REGBYTES * 0)  /* offsetof(fpu_context_t, fpustatus.f[0])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F1_OFF  (REGBYTES * 1)  /* offsetof(fpu_context_t, fpustatus.f[1])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F2_OFF  (REGBYTES * 2)  /* offsetof(fpu_context_t, fpustatus.f[2])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F3_OFF  (REGBYTES * 3)  /* offsetof(fpu_context_t, fpustatus.f[3])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F4_OFF  (REGBYTES * 4)  /* offsetof(fpu_context_t, fpustatus.f[4])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F5_OFF  (REGBYTES * 5)  /* offsetof(fpu_context_t, fpustatus.f[5])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F6_OFF  (REGBYTES * 6)  /* offsetof(fpu_context_t, fpustatus.f[6])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F7_OFF  (REGBYTES * 7)  /* offsetof(fpu_context_t, fpustatus.f[7])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F8_OFF  (REGBYTES * 8)  /* offsetof(fpu_context_t, fpustatus.f[8])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F9_OFF  (REGBYTES * 9)  /* offsetof(fpu_context_t, fpustatus.f[9])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F10_OFF (REGBYTES * 10) /* offsetof(fpu_context_t, fpustatus.f[10]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F11_OFF (REGBYTES * 11) /* offsetof(fpu_context_t, fpustatus.f[11]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F12_OFF (REGBYTES * 12) /* offsetof(fpu_context_t, fpustatus.f[12]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F13_OFF (REGBYTES * 13) /* offsetof(fpu_context_t, fpustatus.f[13]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F14_OFF (REGBYTES * 14) /* offsetof(fpu_context_t, fpustatus.f[14]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F15_OFF (REGBYTES * 15) /* offsetof(fpu_context_t, fpustatus.f[15]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F16_OFF (REGBYTES * 16) /* offsetof(fpu_context_t, fpustatus.f[16]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F17_OFF (REGBYTES * 17) /* offsetof(fpu_context_t, fpustatus.f[17]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F18_OFF (REGBYTES * 18) /* offsetof(fpu_context_t, fpustatus.f[18]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F19_OFF (REGBYTES * 19) /* offsetof(fpu_context_t, fpustatus.f[19]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F20_OFF (REGBYTES * 20) /* offsetof(fpu_context_t, fpustatus.f[20]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F21_OFF (REGBYTES * 21) /* offsetof(fpu_context_t, fpustatus.f[21]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F22_OFF (REGBYTES * 22) /* offsetof(fpu_context_t, fpustatus.f[22]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F23_OFF (REGBYTES * 23) /* offsetof(fpu_context_t, fpustatus.f[23]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F24_OFF (REGBYTES * 24) /* offsetof(fpu_context_t, fpustatus.f[24]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F25_OFF (REGBYTES * 25) /* offsetof(fpu_context_t, fpustatus.f[25]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F26_OFF (REGBYTES * 26) /* offsetof(fpu_context_t, fpustatus.f[26]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F27_OFF (REGBYTES * 27) /* offsetof(fpu_context_t, fpustatus.f[27]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F28_OFF (REGBYTES * 28) /* offsetof(fpu_context_t, fpustatus.f[28]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F29_OFF (REGBYTES * 29) /* offsetof(fpu_context_t, fpustatus.f[29]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F30_OFF (REGBYTES * 30) /* offsetof(fpu_context_t, fpustatus.f[30]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F31_OFF (REGBYTES * 31) /* offsetof(fpu_context_t, fpustatus.f[31]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#endif /* ARCH_RISCV_FPU */
+
+#endif /* __ASSEMBLY__ */
+
+#ifdef ARCH_RISCV_VECTOR
+#include "rvv_context.h"
+#else /* !ARCH_RISCV_VECTOR */
+#define CTX_VECTOR_REG_NR  0
+#endif /* ARCH_RISCV_VECTOR */
+
+#endif /* __EXT_CONTEXT_H__ */

+ 58 - 80
libcpu/risc-v/t-head/c906/interrupt_gcc.S

@@ -9,103 +9,81 @@
  * 2018/12/27     Jesven       Add SMP schedule
  * 2021/02/02     lizhirui     Add userspace support
  * 2021/12/24     JasonHu      Add user setting save/restore
+ * 2022/10/22     Shell        Support kernel mode RVV;
+ *                             Rewrite trap handling routine
  */
 
 #include "cpuport.h"
 #include "encoding.h"
 #include "stackframe.h"
 
-  .section      .text.entry
-  .align 2
-  .global trap_entry
-  .extern __stack_cpu0
-  .extern get_current_thread_kernel_stack_top
+    .align 2
+    .global trap_entry
+    .global debug_check_sp
 trap_entry:
-    //backup sp
-    csrrw sp, sscratch, sp
-    //load interrupt stack
-    la sp, __stack_cpu0
-    //backup context
+    // distingush exception from kernel or user
+    csrrw   sp, sscratch, sp
+    bnez    sp, _save_context
+
+    // BE REALLY careful with sscratch,
+    // if it's wrong, we could looping here forever
+    // or accessing random memory and seeing things totally
+    // messy after a long time and don't even know why
+_from_kernel:
+    csrr    sp, sscratch
+    j _save_context
+
+_save_context:
     SAVE_ALL
-    
-    RESTORE_SYS_GP
-
-#ifdef RT_USING_SMART
-    //check syscall
-    csrr t0, scause
-    li t1, 8//environment call from u-mode
-    beq t0, t1, syscall_entry
-#endif
-
-    csrr a0, scause
-    csrrc a1, stval, zero
-    csrr  a2, sepc
-    mv    a3, sp
-
-    /* scause, stval, sepc, sp */
-    call  handle_trap
-
-    /* need to switch new thread */
-    la    s0, rt_thread_switch_interrupt_flag
-    lw    s2, 0(s0)
-    beqz  s2, spurious_interrupt
-    sw    zero, 0(s0)
+    // clear sscratch to say 'now in kernel mode'
+    csrw    sscratch, zero
 
-.global rt_hw_context_switch_interrupt_do
-rt_hw_context_switch_interrupt_do:
+    RESTORE_SYS_GP
 
+    // now we are ready to enter interrupt / excepiton handler
+_distinguish_syscall:
+    csrr    t0, scause
 #ifdef RT_USING_SMART
-//swap to thread kernel stack
-    csrr t0, sstatus
-    andi t0, t0, 0x100
-    beqz t0, __restore_sp_from_tcb_interrupt
+    // TODO swap 8 with config macro name
+    li      t1, 8
+    beq     t0, t1, syscall_entry
+    // syscall never return here
 #endif
 
-__restore_sp_from_sscratch_interrupt:
-    csrr t0, sscratch
-    j __move_stack_context_interrupt
-
+_handle_interrupt_and_exception:
+    mv      a0, t0
+    csrrc   a1, stval, zero
+    csrr    a2, sepc
+    // sp as exception frame pointer
+    mv      a3, sp
+    call    handle_trap
+
+_interrupt_exit:
+    la      s0, rt_thread_switch_interrupt_flag
+    lw      s2, 0(s0)
+    beqz    s2, _resume_execution
+    sw      zero, 0(s0)
+
+_context_switch:
+    la      t0, rt_interrupt_from_thread
+    LOAD    a0, 0(t0)
+    la      t0, rt_interrupt_to_thread
+    LOAD    a1, 0(t0)
+    csrr    t0, sstatus
+    andi    t0, t0, ~SSTATUS_SPIE
+    csrw    sstatus, t0
+    jal     rt_hw_context_switch
+
+_resume_execution:
 #ifdef RT_USING_SMART
-__restore_sp_from_tcb_interrupt:
-    la    s0, rt_interrupt_from_thread
-    LOAD  a0, 0(s0)
-    jal rt_thread_sp_to_thread
-    jal get_thread_kernel_stack_top
-    mv t0, a0
+    LOAD    t0, FRAME_OFF_SSTATUS(sp)
+    andi    t0, t0, SSTATUS_SPP
+    beqz    t0, arch_ret_to_user
 #endif
 
-__move_stack_context_interrupt:
-    mv t1, sp//src
-    mv sp, t0//switch stack
-    addi sp, sp, -CTX_REG_NR * REGBYTES
-    //copy context
-    li s0, CTX_REG_NR//cnt
-    mv t2, sp//dst
-
-copy_context_loop_interrupt:
-    LOAD t0, 0(t1)
-    STORE t0, 0(t2)
-    addi s0, s0, -1
-    addi t1, t1, 8
-    addi t2, t2, 8
-    bnez s0, copy_context_loop_interrupt
-
-    la    s0, rt_interrupt_from_thread
-    LOAD  s1, 0(s0)
-    STORE sp, 0(s1)
-
-    la    s0, rt_interrupt_to_thread
-    LOAD  s1, 0(s0)
-    LOAD  sp, 0(s1)
-
-    #ifdef RT_USING_SMART
-        mv a0, s1
-        jal rt_thread_sp_to_thread
-        jal lwp_aspace_switch
-    #endif
-
-spurious_interrupt:
+_resume_kernel:
     RESTORE_ALL
+    csrw    sscratch, zero
     sret
 
 .global rt_hw_interrupt_enable

+ 14 - 1
libcpu/risc-v/t-head/c906/stack.h

@@ -7,13 +7,22 @@
  * Date           Author       Notes
  * 2021-01-30     lizhirui     first version
  * 2021-11-18     JasonHu      add fpu member
+ * 2022-10-22     Shell        Support kernel mode RVV
  */
 
 #ifndef __STACK_H__
 #define __STACK_H__
 
+#include "stackframe.h"
+
 #include <rtthread.h>
 
+typedef struct rt_hw_switch_frame
+{
+    uint64_t regs[RT_HW_SWITCH_CONTEXT_SIZE];
+} *rt_hw_switch_frame_t;
+
+
 struct rt_hw_stack_frame
 {
     rt_ubase_t epc;        /* epc - epc    - program counter                     */
@@ -49,9 +58,13 @@ struct rt_hw_stack_frame
     rt_ubase_t t5;         /* x30 - t5     - temporary register 5                */
     rt_ubase_t t6;         /* x31 - t6     - temporary register 6                */
     rt_ubase_t user_sp_exc_stack;    /* sscratch - user mode sp/exception stack  */
+    rt_ubase_t __padding;  /* align to 16bytes */
 #ifdef ARCH_RISCV_FPU
     rt_ubase_t f[CTX_FPU_REG_NR];      /* f0~f31 */
-#endif
+#endif /* ARCH_RISCV_FPU */
+#ifdef ARCH_RISCV_VECTOR
+    rt_ubase_t v[CTX_VECTOR_REG_NR];
+#endif /* ARCH_RISCV_VECTOR */
 };
 
 #endif

+ 150 - 116
libcpu/risc-v/t-head/c906/stackframe.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006-2021, RT-Thread Development Team
+ * Copyright (c) 2006-2024, RT-Thread Development Team
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -8,61 +8,59 @@
  * 2021-02-02     lizhirui     first version
  * 2021-02-11     lizhirui     fixed gp save/store bug
  * 2021-11-18     JasonHu      add fpu registers save/restore
+ * 2022-10-22     Shell        Support kernel mode RVV
  */
 
 #ifndef __STACKFRAME_H__
 #define __STACKFRAME_H__
 
+#include <rtconfig.h>
+#include "encoding.h"
+#include "ext_context.h"
+
+/* bytes of register width */
+#ifdef ARCH_CPU_64BIT
+#define STORE                   sd
+#define LOAD                    ld
+#define FSTORE                  fsd
+#define FLOAD                   fld
+#define REGBYTES                8
+#else
+// error here, not portable
+#error "Not supported XLEN"
+#endif
+
+/* 33 general register + 1 padding */
+#define CTX_GENERAL_REG_NR  34
+
+/* all context registers */
+#define CTX_REG_NR  (CTX_GENERAL_REG_NR + CTX_FPU_REG_NR + CTX_VECTOR_REG_NR)
+
 #define BYTES(idx)          ((idx) * REGBYTES)
 #define FRAME_OFF_SSTATUS   BYTES(2)
 #define FRAME_OFF_SP        BYTES(32)
 #define FRAME_OFF_GP        BYTES(3)
 
-#include "cpuport.h"
-#include "encoding.h"
-
-#ifdef ARCH_RISCV_FPU
-#define FPU_CTX_F0_OFF   0   /* offsetof(fpu_context_t, fpustatus.f[0])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F1_OFF   8   /* offsetof(fpu_context_t, fpustatus.f[1])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F2_OFF   16  /* offsetof(fpu_context_t, fpustatus.f[2])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F3_OFF   24  /* offsetof(fpu_context_t, fpustatus.f[3])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F4_OFF   32  /* offsetof(fpu_context_t, fpustatus.f[4])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F5_OFF   40  /* offsetof(fpu_context_t, fpustatus.f[5])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F6_OFF   48  /* offsetof(fpu_context_t, fpustatus.f[6])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F7_OFF   56  /* offsetof(fpu_context_t, fpustatus.f[7])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F8_OFF   64  /* offsetof(fpu_context_t, fpustatus.f[8])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F9_OFF   72  /* offsetof(fpu_context_t, fpustatus.f[9])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F10_OFF  80  /* offsetof(fpu_context_t, fpustatus.f[10]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F11_OFF  88  /* offsetof(fpu_context_t, fpustatus.f[11]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F12_OFF  96  /* offsetof(fpu_context_t, fpustatus.f[12]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F13_OFF  104 /* offsetof(fpu_context_t, fpustatus.f[13]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F14_OFF  112 /* offsetof(fpu_context_t, fpustatus.f[14]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F15_OFF  120 /* offsetof(fpu_context_t, fpustatus.f[15]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F16_OFF  128 /* offsetof(fpu_context_t, fpustatus.f[16]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F17_OFF  136 /* offsetof(fpu_context_t, fpustatus.f[17]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F18_OFF  144 /* offsetof(fpu_context_t, fpustatus.f[18]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F19_OFF  152 /* offsetof(fpu_context_t, fpustatus.f[19]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F20_OFF  160 /* offsetof(fpu_context_t, fpustatus.f[20]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F21_OFF  168 /* offsetof(fpu_context_t, fpustatus.f[21]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F22_OFF  176 /* offsetof(fpu_context_t, fpustatus.f[22]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F23_OFF  184 /* offsetof(fpu_context_t, fpustatus.f[23]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F24_OFF  192 /* offsetof(fpu_context_t, fpustatus.f[24]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F25_OFF  200 /* offsetof(fpu_context_t, fpustatus.f[25]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F26_OFF  208 /* offsetof(fpu_context_t, fpustatus.f[26]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F27_OFF  216 /* offsetof(fpu_context_t, fpustatus.f[27]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F28_OFF  224 /* offsetof(fpu_context_t, fpustatus.f[28]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F29_OFF  232 /* offsetof(fpu_context_t, fpustatus.f[29]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F30_OFF  240 /* offsetof(fpu_context_t, fpustatus.f[30]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F31_OFF  248 /* offsetof(fpu_context_t, fpustatus.f[31]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#endif /* ARCH_RISCV_FPU */
-
-/**
- * The register `tp` always save/restore when context switch,
- * we call `lwp_user_setting_save` when syscall enter,
- * call `lwp_user_setting_restore` when syscall exit
- * and modify context stack after `lwp_user_setting_restore` called
- * so that the `tp` can be the correct thread area value.
- */
+/* switch frame */
+#define RT_HW_SWITCH_CONTEXT_SSTATUS    0
+#define RT_HW_SWITCH_CONTEXT_S11        1
+#define RT_HW_SWITCH_CONTEXT_S10        2
+#define RT_HW_SWITCH_CONTEXT_S9         3
+#define RT_HW_SWITCH_CONTEXT_S8         4
+#define RT_HW_SWITCH_CONTEXT_S7         5
+#define RT_HW_SWITCH_CONTEXT_S6         6
+#define RT_HW_SWITCH_CONTEXT_S5         7
+#define RT_HW_SWITCH_CONTEXT_S4         8
+#define RT_HW_SWITCH_CONTEXT_S3         9
+#define RT_HW_SWITCH_CONTEXT_S2         10
+#define RT_HW_SWITCH_CONTEXT_S1         11
+#define RT_HW_SWITCH_CONTEXT_S0         12
+#define RT_HW_SWITCH_CONTEXT_RA         13
+#define RT_HW_SWITCH_CONTEXT_TP         14
+#define RT_HW_SWITCH_CONTEXT_ALIGNMENT  15  // Padding for alignment
+#define RT_HW_SWITCH_CONTEXT_SIZE       16  // Total size of the structure
+
+#ifdef __ASSEMBLY__
 
 .macro SAVE_ALL
 
@@ -70,16 +68,20 @@
     /* reserve float registers */
     addi sp, sp, -CTX_FPU_REG_NR * REGBYTES
 #endif /* ARCH_RISCV_FPU */
+#ifdef ARCH_RISCV_VECTOR
+    /* reserve float registers */
+    addi sp, sp, -CTX_VECTOR_REG_NR * REGBYTES
+#endif /* ARCH_RISCV_VECTOR */
 
     /* save general registers */
     addi sp, sp, -CTX_GENERAL_REG_NR * REGBYTES
     STORE x1,   1 * REGBYTES(sp)
 
     csrr  x1, sstatus
-    STORE x1,   2 * REGBYTES(sp)
+    STORE x1, FRAME_OFF_SSTATUS(sp)
 
     csrr  x1, sepc
-    STORE x1, 0 * REGBYTES(sp)
+    STORE x1,   0 * REGBYTES(sp)
 
     STORE x3,   3 * REGBYTES(sp)
     STORE x4,   4 * REGBYTES(sp) /* save tp */
@@ -120,38 +122,38 @@
 
     li  t0, SSTATUS_FS
     csrs sstatus, t0
-    fsd f0,  FPU_CTX_F0_OFF(t1)
-    fsd f1,  FPU_CTX_F1_OFF(t1)
-    fsd f2,  FPU_CTX_F2_OFF(t1)
-    fsd f3,  FPU_CTX_F3_OFF(t1)
-    fsd f4,  FPU_CTX_F4_OFF(t1)
-    fsd f5,  FPU_CTX_F5_OFF(t1)
-    fsd f6,  FPU_CTX_F6_OFF(t1)
-    fsd f7,  FPU_CTX_F7_OFF(t1)
-    fsd f8,  FPU_CTX_F8_OFF(t1)
-    fsd f9,  FPU_CTX_F9_OFF(t1)
-    fsd f10, FPU_CTX_F10_OFF(t1)
-    fsd f11, FPU_CTX_F11_OFF(t1)
-    fsd f12, FPU_CTX_F12_OFF(t1)
-    fsd f13, FPU_CTX_F13_OFF(t1)
-    fsd f14, FPU_CTX_F14_OFF(t1)
-    fsd f15, FPU_CTX_F15_OFF(t1)
-    fsd f16, FPU_CTX_F16_OFF(t1)
-    fsd f17, FPU_CTX_F17_OFF(t1)
-    fsd f18, FPU_CTX_F18_OFF(t1)
-    fsd f19, FPU_CTX_F19_OFF(t1)
-    fsd f20, FPU_CTX_F20_OFF(t1)
-    fsd f21, FPU_CTX_F21_OFF(t1)
-    fsd f22, FPU_CTX_F22_OFF(t1)
-    fsd f23, FPU_CTX_F23_OFF(t1)
-    fsd f24, FPU_CTX_F24_OFF(t1)
-    fsd f25, FPU_CTX_F25_OFF(t1)
-    fsd f26, FPU_CTX_F26_OFF(t1)
-    fsd f27, FPU_CTX_F27_OFF(t1)
-    fsd f28, FPU_CTX_F28_OFF(t1)
-    fsd f29, FPU_CTX_F29_OFF(t1)
-    fsd f30, FPU_CTX_F30_OFF(t1)
-    fsd f31, FPU_CTX_F31_OFF(t1)
+    FSTORE f0,  FPU_CTX_F0_OFF(t1)
+    FSTORE f1,  FPU_CTX_F1_OFF(t1)
+    FSTORE f2,  FPU_CTX_F2_OFF(t1)
+    FSTORE f3,  FPU_CTX_F3_OFF(t1)
+    FSTORE f4,  FPU_CTX_F4_OFF(t1)
+    FSTORE f5,  FPU_CTX_F5_OFF(t1)
+    FSTORE f6,  FPU_CTX_F6_OFF(t1)
+    FSTORE f7,  FPU_CTX_F7_OFF(t1)
+    FSTORE f8,  FPU_CTX_F8_OFF(t1)
+    FSTORE f9,  FPU_CTX_F9_OFF(t1)
+    FSTORE f10, FPU_CTX_F10_OFF(t1)
+    FSTORE f11, FPU_CTX_F11_OFF(t1)
+    FSTORE f12, FPU_CTX_F12_OFF(t1)
+    FSTORE f13, FPU_CTX_F13_OFF(t1)
+    FSTORE f14, FPU_CTX_F14_OFF(t1)
+    FSTORE f15, FPU_CTX_F15_OFF(t1)
+    FSTORE f16, FPU_CTX_F16_OFF(t1)
+    FSTORE f17, FPU_CTX_F17_OFF(t1)
+    FSTORE f18, FPU_CTX_F18_OFF(t1)
+    FSTORE f19, FPU_CTX_F19_OFF(t1)
+    FSTORE f20, FPU_CTX_F20_OFF(t1)
+    FSTORE f21, FPU_CTX_F21_OFF(t1)
+    FSTORE f22, FPU_CTX_F22_OFF(t1)
+    FSTORE f23, FPU_CTX_F23_OFF(t1)
+    FSTORE f24, FPU_CTX_F24_OFF(t1)
+    FSTORE f25, FPU_CTX_F25_OFF(t1)
+    FSTORE f26, FPU_CTX_F26_OFF(t1)
+    FSTORE f27, FPU_CTX_F27_OFF(t1)
+    FSTORE f28, FPU_CTX_F28_OFF(t1)
+    FSTORE f29, FPU_CTX_F29_OFF(t1)
+    FSTORE f30, FPU_CTX_F30_OFF(t1)
+    FSTORE f31, FPU_CTX_F31_OFF(t1)
 
     /* clr FS domain */
     csrc sstatus, t0
@@ -162,49 +164,77 @@
 
 #endif /* ARCH_RISCV_FPU */
 
+#ifdef ARCH_RISCV_VECTOR
+    csrr    t0, sstatus
+    andi    t0, t0, SSTATUS_VS
+    beqz    t0, 0f
+
+    /* push vector frame */
+    addi t1, sp, (CTX_GENERAL_REG_NR + CTX_FPU_REG_NR) * REGBYTES
+
+    SAVE_VECTOR t1
+0:
+#endif /* ARCH_RISCV_VECTOR */
 .endm
 
+/**
+ * @brief Restore All General Registers, for interrupt handling
+ *
+ */
 .macro RESTORE_ALL
 
+#ifdef ARCH_RISCV_VECTOR
+    // skip on close
+    ld      t0, 2 * REGBYTES(sp)
+    // cannot use vector on initial
+    andi    t0, t0, SSTATUS_VS_CLEAN
+    beqz    t0, 0f
+
+    /* push vector frame */
+    addi t1, sp, (CTX_GENERAL_REG_NR + CTX_FPU_REG_NR) * REGBYTES
+
+    RESTORE_VECTOR t1
+0:
+#endif /* ARCH_RISCV_VECTOR */
+
 #ifdef ARCH_RISCV_FPU
     /* restore float register  */
-    mv t2, sp
-    addi t2, t2, CTX_GENERAL_REG_NR * REGBYTES   /* skip all normal reg */
+    addi t2, sp, CTX_GENERAL_REG_NR * REGBYTES
 
     li  t0, SSTATUS_FS
     csrs sstatus, t0
-    fld f0,  FPU_CTX_F0_OFF(t2)
-    fld f1,  FPU_CTX_F1_OFF(t2)
-    fld f2,  FPU_CTX_F2_OFF(t2)
-    fld f3,  FPU_CTX_F3_OFF(t2)
-    fld f4,  FPU_CTX_F4_OFF(t2)
-    fld f5,  FPU_CTX_F5_OFF(t2)
-    fld f6,  FPU_CTX_F6_OFF(t2)
-    fld f7,  FPU_CTX_F7_OFF(t2)
-    fld f8,  FPU_CTX_F8_OFF(t2)
-    fld f9,  FPU_CTX_F9_OFF(t2)
-    fld f10, FPU_CTX_F10_OFF(t2)
-    fld f11, FPU_CTX_F11_OFF(t2)
-    fld f12, FPU_CTX_F12_OFF(t2)
-    fld f13, FPU_CTX_F13_OFF(t2)
-    fld f14, FPU_CTX_F14_OFF(t2)
-    fld f15, FPU_CTX_F15_OFF(t2)
-    fld f16, FPU_CTX_F16_OFF(t2)
-    fld f17, FPU_CTX_F17_OFF(t2)
-    fld f18, FPU_CTX_F18_OFF(t2)
-    fld f19, FPU_CTX_F19_OFF(t2)
-    fld f20, FPU_CTX_F20_OFF(t2)
-    fld f21, FPU_CTX_F21_OFF(t2)
-    fld f22, FPU_CTX_F22_OFF(t2)
-    fld f23, FPU_CTX_F23_OFF(t2)
-    fld f24, FPU_CTX_F24_OFF(t2)
-    fld f25, FPU_CTX_F25_OFF(t2)
-    fld f26, FPU_CTX_F26_OFF(t2)
-    fld f27, FPU_CTX_F27_OFF(t2)
-    fld f28, FPU_CTX_F28_OFF(t2)
-    fld f29, FPU_CTX_F29_OFF(t2)
-    fld f30, FPU_CTX_F30_OFF(t2)
-    fld f31, FPU_CTX_F31_OFF(t2)
+    FLOAD f0,  FPU_CTX_F0_OFF(t2)
+    FLOAD f1,  FPU_CTX_F1_OFF(t2)
+    FLOAD f2,  FPU_CTX_F2_OFF(t2)
+    FLOAD f3,  FPU_CTX_F3_OFF(t2)
+    FLOAD f4,  FPU_CTX_F4_OFF(t2)
+    FLOAD f5,  FPU_CTX_F5_OFF(t2)
+    FLOAD f6,  FPU_CTX_F6_OFF(t2)
+    FLOAD f7,  FPU_CTX_F7_OFF(t2)
+    FLOAD f8,  FPU_CTX_F8_OFF(t2)
+    FLOAD f9,  FPU_CTX_F9_OFF(t2)
+    FLOAD f10, FPU_CTX_F10_OFF(t2)
+    FLOAD f11, FPU_CTX_F11_OFF(t2)
+    FLOAD f12, FPU_CTX_F12_OFF(t2)
+    FLOAD f13, FPU_CTX_F13_OFF(t2)
+    FLOAD f14, FPU_CTX_F14_OFF(t2)
+    FLOAD f15, FPU_CTX_F15_OFF(t2)
+    FLOAD f16, FPU_CTX_F16_OFF(t2)
+    FLOAD f17, FPU_CTX_F17_OFF(t2)
+    FLOAD f18, FPU_CTX_F18_OFF(t2)
+    FLOAD f19, FPU_CTX_F19_OFF(t2)
+    FLOAD f20, FPU_CTX_F20_OFF(t2)
+    FLOAD f21, FPU_CTX_F21_OFF(t2)
+    FLOAD f22, FPU_CTX_F22_OFF(t2)
+    FLOAD f23, FPU_CTX_F23_OFF(t2)
+    FLOAD f24, FPU_CTX_F24_OFF(t2)
+    FLOAD f25, FPU_CTX_F25_OFF(t2)
+    FLOAD f26, FPU_CTX_F26_OFF(t2)
+    FLOAD f27, FPU_CTX_F27_OFF(t2)
+    FLOAD f28, FPU_CTX_F28_OFF(t2)
+    FLOAD f29, FPU_CTX_F29_OFF(t2)
+    FLOAD f30, FPU_CTX_F30_OFF(t2)
+    FLOAD f31, FPU_CTX_F31_OFF(t2)
 
     /* clr FS domain */
     csrc sstatus, t0
@@ -216,9 +246,11 @@
 #endif /* ARCH_RISCV_FPU */
 
     /* restore general register */
+    addi t0, sp, CTX_REG_NR * REGBYTES
+    csrw sscratch, t0
 
     /* resw ra to sepc */
-    LOAD x1,   0 * REGBYTES(sp)
+    LOAD x1, 0 * REGBYTES(sp)
     csrw sepc, x1
 
     LOAD x1,   2 * REGBYTES(sp)
@@ -275,4 +307,6 @@
     csrci sstatus, 2
 .endm
 
-#endif
+#endif /* __ASSEMBLY__ */
+
+#endif /* __STACKFRAME_H__ */

+ 4 - 2
libcpu/risc-v/virt64/backtrace.c

@@ -15,6 +15,7 @@
 #include <rtthread.h>
 #include <mm_aspace.h>
 #include "riscv_mmu.h"
+#include "stack.h"
 
 #define WORD                            sizeof(rt_base_t)
 #define ARCH_CONTEXT_FETCH(pctx, id)    (*(((unsigned long *)pctx) + (id)))
@@ -117,8 +118,9 @@ rt_err_t rt_hw_backtrace_frame_get(rt_thread_t thread, struct rt_hw_backtrace_fr
     }
     else
     {
-        frame->pc = ARCH_CONTEXT_FETCH(thread->sp, 13);
-        frame->fp = ARCH_CONTEXT_FETCH(thread->sp, 12);
+        rt_hw_switch_frame_t sframe = thread->sp;
+        frame->pc = sframe->regs[RT_HW_SWITCH_CONTEXT_RA];
+        frame->fp = sframe->regs[RT_HW_SWITCH_CONTEXT_S0];;
         rc = RT_EOK;
     }
     return rc;

+ 45 - 75
libcpu/risc-v/virt64/context_gcc.S

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006-2021, RT-Thread Development Team
+ * Copyright (c) 2006-2024, RT-Thread Development Team
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -8,64 +8,64 @@
  * 2018/10/28     Bernard      The unify RISC-V porting implementation
  * 2018/12/27     Jesven       Add SMP support
  * 2021/02/02     lizhirui     Add userspace support
- * 2022/10/22     WangXiaoyao  Support User mode RVV;
+ * 2022/10/22     Shell        Support User mode RVV;
  *                             Trimming process switch context
  */
 
 #include "cpuport.h"
 #include "stackframe.h"
+#define _REG_IDX(name) RT_HW_SWITCH_CONTEXT_##name
+#define REG_IDX(name) _REG_IDX(name)
 
-.macro PUSH_REG reg
-    addi    sp, sp, -REGBYTES
-    STORE   \reg, (sp)
+.macro SAVE_REG reg, index
+    STORE \reg, \index*REGBYTES(sp)
 .endm
 
-.macro POP_REG reg
-    LOAD    \reg, (sp)
-    addi    sp, sp, REGBYTES
+.macro LOAD_REG reg, index
+    LOAD \reg, \index*REGBYTES(sp)
 .endm
 
 .macro RESERVE_CONTEXT
-    PUSH_REG  tp
-    PUSH_REG  ra
-    PUSH_REG  s0
-    PUSH_REG  s1
-    PUSH_REG  s2
-    PUSH_REG  s3
-    PUSH_REG  s4
-    PUSH_REG  s5
-    PUSH_REG  s6
-    PUSH_REG  s7
-    PUSH_REG  s8
-    PUSH_REG  s9
-    PUSH_REG  s10
-    PUSH_REG  s11
-    csrr    s11, sstatus
-    li      s10, (SSTATUS_SPP)
-    or      s11, s11, s10
-    PUSH_REG  s11
-    addi    sp, sp, -REGBYTES
+    addi        sp, sp, -(RT_HW_SWITCH_CONTEXT_SIZE * REGBYTES)
+    SAVE_REG    tp, REG_IDX(TP)
+    SAVE_REG    ra, REG_IDX(RA)
+    SAVE_REG    s0, REG_IDX(S0)
+    SAVE_REG    s1, REG_IDX(S1)
+    SAVE_REG    s2, REG_IDX(S2)
+    SAVE_REG    s3, REG_IDX(S3)
+    SAVE_REG    s4, REG_IDX(S4)
+    SAVE_REG    s5, REG_IDX(S5)
+    SAVE_REG    s6, REG_IDX(S6)
+    SAVE_REG    s7, REG_IDX(S7)
+    SAVE_REG    s8, REG_IDX(S8)
+    SAVE_REG    s9, REG_IDX(S9)
+    SAVE_REG    s10, REG_IDX(S10)
+    SAVE_REG    s11, REG_IDX(S11)
+    csrr        s11, sstatus
+    li          s10, (SSTATUS_SPP)
+    or          s11, s11, s10
+    SAVE_REG    s11, REG_IDX(SSTATUS)
 .endm
 
 .macro RESTORE_CONTEXT
-    addi    sp, sp, REGBYTES
-    POP_REG   s11
-    csrw    sstatus, s11
-    POP_REG   s11
-    POP_REG   s10
-    POP_REG   s9
-    POP_REG   s8
-    POP_REG   s7
-    POP_REG   s6
-    POP_REG   s5
-    POP_REG   s4
-    POP_REG   s3
-    POP_REG   s2
-    POP_REG   s1
-    POP_REG   s0
-    POP_REG   ra
-    POP_REG   tp
-    csrw    sepc, ra
+    LOAD_REG    s11, REG_IDX(SSTATUS)
+    csrw        sstatus, s11
+    LOAD_REG    s11, REG_IDX(S11)
+    LOAD_REG    s10, REG_IDX(S10)
+    LOAD_REG    s9, REG_IDX(S9)
+    LOAD_REG    s8, REG_IDX(S8)
+    LOAD_REG    s7, REG_IDX(S7)
+    LOAD_REG    s6, REG_IDX(S6)
+    LOAD_REG    s5, REG_IDX(S5)
+    LOAD_REG    s4, REG_IDX(S4)
+    LOAD_REG    s3, REG_IDX(S3)
+    LOAD_REG    s2, REG_IDX(S2)
+    LOAD_REG    s1, REG_IDX(S1)
+    LOAD_REG    s0, REG_IDX(S0)
+    LOAD_REG    ra, REG_IDX(RA)
+    LOAD_REG    tp, REG_IDX(TP)
+    addi        sp, sp, RT_HW_SWITCH_CONTEXT_SIZE * REGBYTES
+    csrw        sepc, ra
 .endm
 
 /*
@@ -113,33 +113,3 @@ rt_hw_context_switch:
 
     RESTORE_CONTEXT
     sret
-
-#ifdef ENABLE_VECTOR
-/**
- * @param a0 pointer to frame bottom
- */
-.global rt_hw_vector_ctx_save
-rt_hw_vector_ctx_save:
-    SAVE_VECTOR a0
-    ret
-
-/**
- * @param a0 pointer to frame bottom
- */
-.global rt_hw_vector_ctx_restore
-rt_hw_vector_ctx_restore:
-    RESTORE_VECTOR a0
-    ret
-
-.global rt_hw_disable_vector
-rt_hw_disable_vector:
-    li t0, SSTATUS_VS
-    csrc sstatus, t0
-    ret
-
-.global rt_hw_enable_vector
-rt_hw_enable_vector:
-    li t0, SSTATUS_VS
-    csrs sstatus, t0
-    ret
-#endif /* ENABLE_VECTOR */

+ 19 - 19
libcpu/risc-v/virt64/cpuport.c

@@ -7,6 +7,7 @@
  * Date           Author       Notes
  * 2018/10/28     Bernard      The unify RISC-V porting code.
  * 2021-02-11     lizhirui     add gp support
+ * 2021-11-19     JasonHu      add fpu support
  */
 
 #include <rthw.h>
@@ -17,7 +18,14 @@
 #include <sbi.h>
 #include <encoding.h>
 
-#define K_SSTATUS_DEFAULT (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM | SSTATUS_FS)
+#ifdef ARCH_RISCV_FPU
+    #define K_SSTATUS_DEFAULT (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM | SSTATUS_FS)
+#else
+    #define K_SSTATUS_DEFAULT (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM)
+#endif
+#ifdef RT_USING_SMART
+#include <lwp_arch.h>
+#endif
 
 /**
  * @brief from thread used interrupt context switch
@@ -37,24 +45,15 @@ volatile rt_ubase_t rt_thread_switch_interrupt_flag = 0;
 
 void *_rt_hw_stack_init(rt_ubase_t *sp, rt_ubase_t ra, rt_ubase_t sstatus)
 {
-    (*--sp) = 0;                                /* tp */
-    (*--sp) = ra;                               /* ra */
-    (*--sp) = 0;                                /* s0(fp) */
-    (*--sp) = 0;                                /* s1 */
-    (*--sp) = 0;                                /* s2 */
-    (*--sp) = 0;                                /* s3 */
-    (*--sp) = 0;                                /* s4 */
-    (*--sp) = 0;                                /* s5 */
-    (*--sp) = 0;                                /* s6 */
-    (*--sp) = 0;                                /* s7 */
-    (*--sp) = 0;                                /* s8 */
-    (*--sp) = 0;                                /* s9 */
-    (*--sp) = 0;                                /* s10 */
-    (*--sp) = 0;                                /* s11 */
-    (*--sp) = sstatus;                          /* sstatus */
-    --sp; /* align to 16bytes */
-
-    return (void *)sp;
+    rt_hw_switch_frame_t frame = (rt_hw_switch_frame_t)
+        ((rt_ubase_t)sp - sizeof(struct rt_hw_switch_frame));
+
+    rt_memset(frame, 0, sizeof(struct rt_hw_switch_frame));
+
+    frame->regs[RT_HW_SWITCH_CONTEXT_RA] = ra;
+    frame->regs[RT_HW_SWITCH_CONTEXT_SSTATUS] = sstatus;
+
+    return (void *)frame;
 }
 
 int rt_hw_cpu_id(void)
@@ -87,6 +86,7 @@ rt_uint8_t *rt_hw_stack_init(void *tentry,
     (*--sp) = (rt_ubase_t)tentry;
     (*--sp) = (rt_ubase_t)parameter;
     (*--sp) = (rt_ubase_t)texit;
+    --sp;   /* alignment */
 
     /* compatible to RESTORE_CONTEXT */
     extern void _rt_thread_entry(void);

+ 7 - 41
libcpu/risc-v/virt64/cpuport.h

@@ -12,44 +12,7 @@
 #define CPUPORT_H__
 
 #include <rtconfig.h>
-
-/* bytes of register width  */
-#ifdef ARCH_CPU_64BIT
-#define STORE                   sd
-#define LOAD                    ld
-#define FSTORE                  fsd
-#define FLOAD                   fld
-#define REGBYTES                8
-#else
-// error here, not portable
-#error "Not supported XLEN"
-#endif
-
-/* 33 general register + 1 padding */
-#define CTX_GENERAL_REG_NR  34
-
-#ifdef ENABLE_FPU
-/* 32 fpu register */
-#define CTX_FPU_REG_NR  32
-#else
-#define CTX_FPU_REG_NR  0
-#endif
-
-#ifdef ENABLE_VECTOR
-
-#if defined(ARCH_VECTOR_VLEN_128)
-#define CTX_VECTOR_REGS 64
-#elif defined(ARCH_VECTOR_VLEN_256)
-#define CTX_VECTOR_REGS 128
-#endif
-
-#define CTX_VECTOR_REG_NR  (CTX_VECTOR_REGS + 4)
-#else
-#define CTX_VECTOR_REG_NR  0
-#endif
-
-/* all context registers */
-#define CTX_REG_NR  (CTX_GENERAL_REG_NR + CTX_FPU_REG_NR + CTX_VECTOR_REG_NR)
+#include <opcode.h>
 
 #ifdef RT_USING_SMP
 typedef union {
@@ -63,21 +26,24 @@ typedef union {
 
 #ifndef __ASSEMBLY__
 #include <rtdef.h>
+
 rt_inline void rt_hw_dsb(void)
 {
-    asm volatile("fence":::"memory");
+    __asm__ volatile("fence":::"memory");
 }
 
 rt_inline void rt_hw_dmb(void)
 {
-    asm volatile("fence":::"memory");
+    __asm__ volatile("fence":::"memory");
 }
 
 rt_inline void rt_hw_isb(void)
 {
-    asm volatile(".long 0x0000100F":::"memory");
+    __asm__ volatile(OPC_FENCE_I:::"memory");
 }
 
+int rt_hw_cpu_id(void);
+
 #endif
 
 #endif

+ 5 - 6
libcpu/risc-v/virt64/cpuport_gcc.S

@@ -13,12 +13,11 @@
 #include "asm-generic.h"
 
 START_POINT(_rt_thread_entry)
-    LOAD    ra, (sp)    /* thread exit */
-    addi    sp, sp, REGBYTES
-    LOAD    a0, (sp)    /* parameter */
-    addi    sp, sp, REGBYTES
-    LOAD    t0, (sp)    /* tentry */
-    addi    sp, sp, REGBYTES
+    LOAD    ra, REGBYTES(sp)    /* thread exit */
+    addi    sp, sp, 2 * REGBYTES
+    LOAD    a0, (sp)            /* parameter */
+    LOAD    t0, REGBYTES(sp)    /* tentry */
+    addi    sp, sp, 2 * REGBYTES
     mv      s1, ra
     jalr    t0
     jalr    s1

+ 9 - 5
libcpu/risc-v/virt64/encoding.h

@@ -11,6 +11,15 @@
 #ifndef RISCV_CSR_ENCODING_H
 #define RISCV_CSR_ENCODING_H
 
+#include <rtconfig.h>
+
+#ifdef ARCH_RISCV_VECTOR
+#include "vector_encoding.h"
+
+#else
+#define SSTATUS_VS          0   /* fallback */
+#endif /* ARCH_RISCV_VECTOR */
+
 #define MSTATUS_UIE         0x00000001
 #define MSTATUS_SIE         0x00000002
 #define MSTATUS_HIE         0x00000004
@@ -23,7 +32,6 @@
 #define MSTATUS_HPP         0x00000600
 #define MSTATUS_MPP         0x00001800
 #define MSTATUS_FS          0x00006000
-#define MSTATUS_VS          0x00000600
 #define MSTATUS_XS          0x00018000
 #define MSTATUS_MPRV        0x00020000
 #define MSTATUS_PUM         0x00040000
@@ -41,10 +49,6 @@
 #define SSTATUS_FS_INITIAL  0x00002000
 #define SSTATUS_FS_CLEAN    0x00004000
 #define SSTATUS_FS_DIRTY    0x00006000
-#define SSTATUS_VS          0x00000600 /* Vector Status */
-#define SSTATUS_VS_INITIAL  0x00000200
-#define SSTATUS_VS_CLEAN    0x00000400
-#define SSTATUS_VS_DIRTY    0x00000600
 #define SSTATUS_XS          0x00018000
 #define SSTATUS_SUM         0x00040000
 #define SSTATUS32_SD        0x80000000

+ 18 - 100
libcpu/risc-v/virt64/ext_context.h

@@ -5,29 +5,29 @@
  *
  * Change Logs:
  * Date           Author       Notes
- * 2022-10-10     RT-Thread    the first version,
- *                             compatible to riscv-v-spec-1.0
+ * 2022-10-10     RT-Thread    the first version
  */
 #ifndef __EXT_CONTEXT_H__
 #define __EXT_CONTEXT_H__
 
-#ifdef __ASSEMBLY__
+#include <rtconfig.h>
 
-/**
- * extension context maintenance
- */
+#ifdef ARCH_RISCV_FPU
+/* 32 fpu register */
+#define CTX_FPU_REG_NR  32
+#else
+#define CTX_FPU_REG_NR  0
+#endif /* ARCH_RISCV_FPU */
 
-#include "cpuport.h"
-#include "encoding.h"
-#include "vector_encoding.h"
+#ifdef __ASSEMBLY__
 
 /**
  * ==================================
- * FPU EXTENSION
+ * RISC-V D ISA (Floating)
  * ==================================
  */
 
-#ifdef ENABLE_FPU
+#ifdef ARCH_RISCV_FPU
 #define FPU_CTX_F0_OFF  (REGBYTES * 0)  /* offsetof(fpu_context_t, fpustatus.f[0])  - offsetof(fpu_context_t, fpustatus.f[0]) */
 #define FPU_CTX_F1_OFF  (REGBYTES * 1)  /* offsetof(fpu_context_t, fpustatus.f[1])  - offsetof(fpu_context_t, fpustatus.f[0]) */
 #define FPU_CTX_F2_OFF  (REGBYTES * 2)  /* offsetof(fpu_context_t, fpustatus.f[2])  - offsetof(fpu_context_t, fpustatus.f[0]) */
@@ -60,96 +60,14 @@
 #define FPU_CTX_F29_OFF (REGBYTES * 29) /* offsetof(fpu_context_t, fpustatus.f[29]) - offsetof(fpu_context_t, fpustatus.f[0]) */
 #define FPU_CTX_F30_OFF (REGBYTES * 30) /* offsetof(fpu_context_t, fpustatus.f[30]) - offsetof(fpu_context_t, fpustatus.f[0]) */
 #define FPU_CTX_F31_OFF (REGBYTES * 31) /* offsetof(fpu_context_t, fpustatus.f[31]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#endif /* ENABLE_FPU */
-
-/**
- * ==================================
- * VECTOR EXTENSION
- * ==================================
- */
-
-#ifdef ENABLE_VECTOR
-
-#define VEC_FRAME_VSTART    (0 * REGBYTES)
-#define VEC_FRAME_VTYPE     (1 * REGBYTES)
-#define VEC_FRAME_VL        (2 * REGBYTES)
-#define VEC_FRAME_VCSR      (3 * REGBYTES)
-#define VEC_FRAME_V0        (4 * REGBYTES)
-
-.macro GET_VEC_FRAME_LEN, xreg
-    csrr    \xreg, vlenb
-    slli    \xreg, \xreg, 5
-    addi    \xreg, \xreg, 4 * REGBYTES
-.endm
-
-/**
- * @brief save vector extension hardware state
- *
- * @param dst register storing bottom of storage block
- *
- */
-.macro SAVE_VECTOR, dst
-    mv      t1, \dst
-
-    csrr    t0, vstart
-    STORE   t0, VEC_FRAME_VSTART(t1)
-    csrr    t0, vtype
-    STORE   t0, VEC_FRAME_VTYPE(t1)
-    csrr    t0, vl
-    STORE   t0, VEC_FRAME_VL(t1)
-    csrr    t0, vcsr
-    STORE   t0, VEC_FRAME_VCSR(t1)
-
-    addi    t1, t1, VEC_FRAME_V0
-
-    // config vector setting,
-    // t2 is updated to length of a vector group in bytes
-    VEC_CONFIG_SETVLI(t2, x0, VEC_IMM_SEW_8, VEC_IMM_LMUL_8)
-
-    vse8.v  v0, (t1)
-    add     t1, t1, t2
-    vse8.v  v8, (t1)
-    add     t1, t1, t2
-    vse8.v  v16, (t1)
-    add     t1, t1, t2
-    vse8.v  v24, (t1)
-.endm
-
-/**
- * @brief restore vector extension hardware states
- *
- * @param dst register storing bottom of storage block
- *
- */
-.macro RESTORE_VECTOR, dst
-    // restore vector registers first since it will modify vector states
-    mv      t0, \dst
-    addi    t1, t0, VEC_FRAME_V0
-
-    VEC_CONFIG_SETVLI(t2, x0, VEC_IMM_SEW_8, VEC_IMM_LMUL_8)
-
-    vle8.v  v0, (t1)
-    add     t1, t1, t2
-    vle8.v  v8, (t1)
-    add     t1, t1, t2
-    vle8.v  v16, (t1)
-    add     t1, t1, t2
-    vle8.v  v24, (t1)
-
-    mv      t1, t0
-
-    LOAD    t0, VEC_FRAME_VSTART(t1)
-    csrw    vstart, t0
-    LOAD    t0, VEC_FRAME_VCSR(t1)
-    csrw    vcsr, t0
-
-    LOAD    t0, VEC_FRAME_VTYPE(t1)
-    LOAD    t3, VEC_FRAME_VL(t1)
-    VEC_CONFIG_SET_VL_VTYPE(t3, t0)
-.endm
-
-#endif /* ENABLE_VECTOR */
+#endif /* ARCH_RISCV_FPU */
 
 #endif /* __ASSEMBLY__ */
 
+#ifdef ARCH_RISCV_VECTOR
+#include "rvv_context.h"
+#else /* !ARCH_RISCV_VECTOR */
+#define CTX_VECTOR_REG_NR  0
+#endif /* ARCH_RISCV_VECTOR */
+
 #endif /* __EXT_CONTEXT_H__ */

+ 4 - 1
libcpu/risc-v/virt64/interrupt_gcc.S

@@ -9,7 +9,7 @@
  * 2018/12/27     Jesven       Add SMP schedule
  * 2021/02/02     lizhirui     Add userspace support
  * 2021/12/24     JasonHu      Add user setting save/restore
- * 2022/10/22     WangXiaoyao  Support kernel mode RVV;
+ * 2022/10/22     Shell        Support kernel mode RVV;
  *                             Rewrite trap handling routine
  */
 
@@ -69,6 +69,9 @@ _context_switch:
     LOAD    a0, 0(t0)
     la      t0, rt_interrupt_to_thread
     LOAD    a1, 0(t0)
+    csrr    t0, sstatus
+    andi    t0, t0, ~SSTATUS_SPIE
+    csrw    sstatus, t0
     jal     rt_hw_context_switch
 
 _resume_execution:

+ 111 - 0
libcpu/risc-v/virt64/rvv_context.h

@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2006-2024, RT-Thread Development Team
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Change Logs:
+ * Date           Author       Notes
+ * 2022-10-10     RT-Thread    the first version,
+ *                             compatible to riscv-v-spec-1.0
+ */
+#ifndef __RVV_CONTEXT_H__
+#define __RVV_CONTEXT_H__
+
+#include "cpuport.h"
+#include "encoding.h"
+
+#if defined(ARCH_VECTOR_VLEN_128)
+    #define CTX_VECTOR_REGS 64
+#elif defined(ARCH_VECTOR_VLEN_256)
+    #define CTX_VECTOR_REGS 128
+#else
+#error "No supported VLEN"
+#endif /* VLEN */
+
+#define CTX_VECTOR_REG_NR  (CTX_VECTOR_REGS + 4)
+
+/**
+ * ==================================
+ * VECTOR EXTENSION
+ * ==================================
+ */
+
+#define VEC_FRAME_VSTART    (0 * REGBYTES)
+#define VEC_FRAME_VTYPE     (1 * REGBYTES)
+#define VEC_FRAME_VL        (2 * REGBYTES)
+#define VEC_FRAME_VCSR      (3 * REGBYTES)
+#define VEC_FRAME_V0        (4 * REGBYTES)
+
+.macro GET_VEC_FRAME_LEN, xreg
+    csrr    \xreg, vlenb
+    slli    \xreg, \xreg, 5
+    addi    \xreg, \xreg, 4 * REGBYTES
+.endm
+
+/**
+ * @brief save vector extension hardware state
+ *
+ * @param dst register storing bottom of storage block
+ *
+ */
+.macro SAVE_VECTOR, dst
+    mv      t1, \dst
+
+    csrr    t0, vstart
+    STORE   t0, VEC_FRAME_VSTART(t1)
+    csrr    t0, vtype
+    STORE   t0, VEC_FRAME_VTYPE(t1)
+    csrr    t0, vl
+    STORE   t0, VEC_FRAME_VL(t1)
+    csrr    t0, vcsr
+    STORE   t0, VEC_FRAME_VCSR(t1)
+
+    addi    t1, t1, VEC_FRAME_V0
+
+    // config vector setting,
+    // t2 is updated to length of a vector group in bytes
+    VEC_CONFIG_SETVLI(t2, x0, VEC_IMM_SEW_8, VEC_IMM_LMUL_8)
+
+    vse8.v  v0, (t1)
+    add     t1, t1, t2
+    vse8.v  v8, (t1)
+    add     t1, t1, t2
+    vse8.v  v16, (t1)
+    add     t1, t1, t2
+    vse8.v  v24, (t1)
+.endm
+
+/**
+ * @brief restore vector extension hardware states
+ *
+ * @param dst register storing bottom of storage block
+ *
+ */
+.macro RESTORE_VECTOR, dst
+    // restore vector registers first since it will modify vector states
+    mv      t0, \dst
+    addi    t1, t0, VEC_FRAME_V0
+
+    VEC_CONFIG_SETVLI(t2, x0, VEC_IMM_SEW_8, VEC_IMM_LMUL_8)
+
+    vle8.v  v0, (t1)
+    add     t1, t1, t2
+    vle8.v  v8, (t1)
+    add     t1, t1, t2
+    vle8.v  v16, (t1)
+    add     t1, t1, t2
+    vle8.v  v24, (t1)
+
+    mv      t1, t0
+
+    LOAD    t0, VEC_FRAME_VSTART(t1)
+    csrw    vstart, t0
+    LOAD    t0, VEC_FRAME_VCSR(t1)
+    csrw    vcsr, t0
+
+    LOAD    t0, VEC_FRAME_VTYPE(t1)
+    LOAD    t3, VEC_FRAME_VL(t1)
+    VEC_CONFIG_SET_VL_VTYPE(t3, t0)
+.endm
+
+#endif /* __RVV_CONTEXT_H__ */

+ 16 - 5
libcpu/risc-v/virt64/stack.h

@@ -6,12 +6,23 @@
  * Change Logs:
  * Date           Author       Notes
  * 2021-01-30     lizhirui     first version
+ * 2021-11-18     JasonHu      add fpu member
+ * 2022-10-22     Shell        Support kernel mode RVV
  */
 
 #ifndef __STACK_H__
 #define __STACK_H__
 
+#include "stackframe.h"
+
 #include <rtthread.h>
+
+typedef struct rt_hw_switch_frame
+{
+    uint64_t regs[RT_HW_SWITCH_CONTEXT_SIZE];
+} *rt_hw_switch_frame_t;
+
+
 struct rt_hw_stack_frame
 {
     rt_ubase_t epc;        /* epc - epc    - program counter                     */
@@ -47,13 +58,13 @@ struct rt_hw_stack_frame
     rt_ubase_t t5;         /* x30 - t5     - temporary register 5                */
     rt_ubase_t t6;         /* x31 - t6     - temporary register 6                */
     rt_ubase_t user_sp_exc_stack;    /* sscratch - user mode sp/exception stack  */
-    rt_ubase_t __padding; /* align to 16bytes */
-#ifdef ENABLE_FPU
+    rt_ubase_t __padding;  /* align to 16bytes */
+#ifdef ARCH_RISCV_FPU
     rt_ubase_t f[CTX_FPU_REG_NR];      /* f0~f31 */
-#endif
-#ifdef ENABLE_VECTOR
+#endif /* ARCH_RISCV_FPU */
+#ifdef ARCH_RISCV_VECTOR
     rt_ubase_t v[CTX_VECTOR_REG_NR];
-#endif
+#endif /* ARCH_RISCV_VECTOR */
 };
 
 #endif

+ 51 - 14
libcpu/risc-v/virt64/stackframe.h

@@ -8,33 +8,70 @@
  * 2021-02-02     lizhirui     first version
  * 2021-02-11     lizhirui     fixed gp save/store bug
  * 2021-11-18     JasonHu      add fpu registers save/restore
- * 2022/10/22     WangXiaoyao  Support kernel mode RVV;
+ * 2022-10-22     Shell        Support kernel mode RVV
  */
 
 #ifndef __STACKFRAME_H__
 #define __STACKFRAME_H__
 
-#include "cpuport.h"
+#include <rtconfig.h>
 #include "encoding.h"
 #include "ext_context.h"
 
+/* bytes of register width */
+#ifdef ARCH_CPU_64BIT
+#define STORE                   sd
+#define LOAD                    ld
+#define FSTORE                  fsd
+#define FLOAD                   fld
+#define REGBYTES                8
+#else
+// error here, not portable
+#error "Not supported XLEN"
+#endif
+
+/* 33 general register + 1 padding */
+#define CTX_GENERAL_REG_NR  34
+
+/* all context registers */
+#define CTX_REG_NR  (CTX_GENERAL_REG_NR + CTX_FPU_REG_NR + CTX_VECTOR_REG_NR)
+
 #define BYTES(idx)          ((idx) * REGBYTES)
 #define FRAME_OFF_SSTATUS   BYTES(2)
 #define FRAME_OFF_SP        BYTES(32)
 #define FRAME_OFF_GP        BYTES(3)
 
+/* switch frame */
+#define RT_HW_SWITCH_CONTEXT_SSTATUS    0
+#define RT_HW_SWITCH_CONTEXT_S11        1
+#define RT_HW_SWITCH_CONTEXT_S10        2
+#define RT_HW_SWITCH_CONTEXT_S9         3
+#define RT_HW_SWITCH_CONTEXT_S8         4
+#define RT_HW_SWITCH_CONTEXT_S7         5
+#define RT_HW_SWITCH_CONTEXT_S6         6
+#define RT_HW_SWITCH_CONTEXT_S5         7
+#define RT_HW_SWITCH_CONTEXT_S4         8
+#define RT_HW_SWITCH_CONTEXT_S3         9
+#define RT_HW_SWITCH_CONTEXT_S2         10
+#define RT_HW_SWITCH_CONTEXT_S1         11
+#define RT_HW_SWITCH_CONTEXT_S0         12
+#define RT_HW_SWITCH_CONTEXT_RA         13
+#define RT_HW_SWITCH_CONTEXT_TP         14
+#define RT_HW_SWITCH_CONTEXT_ALIGNMENT  15  // Padding for alignment
+#define RT_HW_SWITCH_CONTEXT_SIZE       16  // Total size of the structure
+
 #ifdef __ASSEMBLY__
 
 .macro SAVE_ALL
 
-#ifdef ENABLE_FPU
+#ifdef ARCH_RISCV_FPU
     /* reserve float registers */
     addi sp, sp, -CTX_FPU_REG_NR * REGBYTES
-#endif /* ENABLE_FPU */
-#ifdef ENABLE_VECTOR
+#endif /* ARCH_RISCV_FPU */
+#ifdef ARCH_RISCV_VECTOR
     /* reserve float registers */
     addi sp, sp, -CTX_VECTOR_REG_NR * REGBYTES
-#endif /* ENABLE_VECTOR */
+#endif /* ARCH_RISCV_VECTOR */
 
     /* save general registers */
     addi sp, sp, -CTX_GENERAL_REG_NR * REGBYTES
@@ -78,7 +115,7 @@
     csrr t0, sscratch
     STORE t0, 32 * REGBYTES(sp)
 
-#ifdef ENABLE_FPU
+#ifdef ARCH_RISCV_FPU
     /* backup sp and adjust sp to save float registers */
     mv t1, sp
     addi t1, t1, CTX_GENERAL_REG_NR * REGBYTES
@@ -125,9 +162,9 @@
     li t0, SSTATUS_FS_CLEAN
     csrs sstatus, t0
 
-#endif /* ENABLE_FPU */
+#endif /* ARCH_RISCV_FPU */
 
-#ifdef ENABLE_VECTOR
+#ifdef ARCH_RISCV_VECTOR
     csrr    t0, sstatus
     andi    t0, t0, SSTATUS_VS
     beqz    t0, 0f
@@ -137,7 +174,7 @@
 
     SAVE_VECTOR t1
 0:
-#endif /* ENABLE_VECTOR */
+#endif /* ARCH_RISCV_VECTOR */
 .endm
 
 /**
@@ -146,7 +183,7 @@
  */
 .macro RESTORE_ALL
 
-#ifdef ENABLE_VECTOR
+#ifdef ARCH_RISCV_VECTOR
     // skip on close
     ld      t0, 2 * REGBYTES(sp)
     // cannot use vector on initial
@@ -158,9 +195,9 @@
 
     RESTORE_VECTOR t1
 0:
-#endif /* ENABLE_VECTOR */
+#endif /* ARCH_RISCV_VECTOR */
 
-#ifdef ENABLE_FPU
+#ifdef ARCH_RISCV_FPU
     /* restore float register  */
     addi t2, sp, CTX_GENERAL_REG_NR * REGBYTES
 
@@ -206,7 +243,7 @@
     li t0, SSTATUS_FS_CLEAN
     csrs sstatus, t0
 
-#endif /* ENABLE_FPU */
+#endif /* ARCH_RISCV_FPU */
 
     /* restore general register */
     addi t0, sp, CTX_REG_NR * REGBYTES

+ 8 - 1
libcpu/risc-v/virt64/vector_encoding.h

@@ -12,8 +12,15 @@
 #ifndef __VECTOR_ENCODING_H__
 #define __VECTOR_ENCODING_H__
 
+/* mstatus/sstatus */
+#define MSTATUS_VS          0x00000600
+#define SSTATUS_VS          0x00000600 /* Vector Status */
+#define SSTATUS_VS_INITIAL  0x00000200
+#define SSTATUS_VS_CLEAN    0x00000400
+#define SSTATUS_VS_DIRTY    0x00000600
+
 /**
- * assembler names used for vset{i}vli vtypei immediate 
+ * assembler names used for vset{i}vli vtypei immediate
  */
 
 #define VEC_IMM_SEW_8      e8

+ 45 - 0
libcpu/risc-v/virt64/vector_gcc.S

@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2006-2024, RT-Thread Development Team
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Change Logs:
+ * Date           Author       Notes
+ * 2018/10/28     Bernard      The unify RISC-V porting implementation
+ * 2018/12/27     Jesven       Add SMP support
+ * 2021/02/02     lizhirui     Add userspace support
+ * 2022/10/22     Shell        Support User mode RVV;
+ *                             Trimming process switch context
+ * 2024/09/01     Shell        Separated vector ctx from the generic
+ */
+
+#include "cpuport.h"
+#include "stackframe.h"
+
+/**
+ * @param a0 pointer to frame bottom
+ */
+.global rt_hw_vector_ctx_save
+rt_hw_vector_ctx_save:
+    SAVE_VECTOR a0
+    ret
+
+/**
+ * @param a0 pointer to frame bottom
+ */
+.global rt_hw_vector_ctx_restore
+rt_hw_vector_ctx_restore:
+    RESTORE_VECTOR a0
+    ret
+
+.global rt_hw_disable_vector
+rt_hw_disable_vector:
+    li t0, SSTATUS_VS
+    csrc sstatus, t0
+    ret
+
+.global rt_hw_enable_vector
+rt_hw_enable_vector:
+    li t0, SSTATUS_VS
+    csrs sstatus, t0
+    ret