Просмотр исходного кода

[riscv] add vector context in kernel;

wangxiaoyao 3 лет назад
Родитель
Сommit
13d8e7e982

+ 4 - 0
bsp/qemu-virt64-riscv/Kconfig

@@ -35,6 +35,10 @@ config ENABLE_FPU
     bool "Enable FPU"
     default y
 
+config ENABLE_VECTOR
+    bool "Using RISC-V Vector Extension"
+    default n
+
 config RT_USING_USERSPACE_32BIT_LIMIT
     bool "Enable userspace 32bit limit"
     default n

+ 20 - 0
libcpu/risc-v/virt64/context.h

@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2006-2022, RT-Thread Development Team
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Change Logs:
+ * Date           Author       Notes
+ * 2022-10-10     RT-Thread    the first version
+ */
+
+#ifndef __CONTEXT_H__
+#define __CONTEXT_H__
+
+void rt_hw_vector_ctx_save(void *buf);
+void rt_hw_vector_ctx_restore(void *buf);
+
+void rt_hw_disable_vector();
+void rt_hw_enable_vector();
+
+#endif /* __CONTEXT_H__ */

+ 49 - 10
libcpu/risc-v/virt64/context_gcc.S

@@ -32,23 +32,32 @@ rt_hw_context_switch_to:
 /*
  * void rt_hw_context_switch(rt_ubase_t from, rt_ubase_t to);
  *
- * a0 --> from
- * a1 --> to
+ * a0 --> from SP pointer
+ * a1 --> to SP pointer
  */
     .globl rt_hw_context_switch
 rt_hw_context_switch:
-    mv t2, sp
-    li t0, 0x120//set SPIE and SPP = 1
-    csrs sstatus, t0//if enter here,caller must be in system thread
-    csrw sepc, ra//return address
-    //saved from thread context
-    SAVE_ALL
+    // backup prev stack top of from thread
+    csrw sscratch, sp
+
+    // on sret, reset status to
+    // 1. interrupt DISABLE
+    // 2. cpu in supervisor mode
+    // 3. reserved other status
+    li t0, 0x120
+    csrs sstatus, t0
 
-    STORE t2, 32 * REGBYTES(sp)//save user_sp
+    // simulate a exception return to supervisor mode
+    // by storing ra in sepc of from-thread stack frame
+    csrw sepc, ra
+
+    // NOTE: we do NOT guarantee that any temporary registers
+    // will remains their value after SAVE_ALL
+    SAVE_ALL
 
     STORE sp, (a0)
 
-    //restore to thread context
+    // restore to thread context
     LOAD sp, (a1)
 
     la s0, rt_current_thread
@@ -65,3 +74,33 @@ rt_hw_context_switch:
 
     RESTORE_ALL
     sret
+
+#ifdef ENABLE_VECTOR
+/**
+ * @param a0 pointer to frame bottom
+ */
+.global rt_hw_vector_ctx_save
+rt_hw_vector_ctx_save:
+    SAVE_VECTOR a0
+    ret
+
+/**
+ * @param a0 pointer to frame bottom
+ */
+.global rt_hw_vector_ctx_restore
+rt_hw_vector_ctx_restore:
+    RESTORE_VECTOR a0
+    ret
+
+.global rt_hw_disable_vector
+rt_hw_disable_vector:
+    li t0, SSTATUS_VS
+    csrc sstatus, t0
+    ret
+
+.global rt_hw_enable_vector
+rt_hw_enable_vector:
+    li t0, SSTATUS_VS
+    csrs sstatus, t0
+    ret
+#endif /* ENABLE_VECTOR */

+ 10 - 1
libcpu/risc-v/virt64/cpuport.h

@@ -20,6 +20,7 @@
 #define REGBYTES                8
 #else
 // error here, not portable
+#error "Not supported XLEN"
 #endif
 
 /* 33 general register */
@@ -32,8 +33,16 @@
 #define CTX_FPU_REG_NR  0
 #endif
 
+#ifdef ENABLE_VECTOR
+/* 32 128/256 bits registers, for risc-v 64, we assuming 64 64-bit regs */
+/* TODO we should detect VLEN on the fly */
+#define CTX_VECTOR_REG_NR  (64 + 4)
+#else
+#define CTX_VECTOR_REG_NR  0
+#endif
+
 /* all context registers */
-#define CTX_REG_NR  (CTX_GENERAL_REG_NR + CTX_FPU_REG_NR)
+#define CTX_REG_NR  (CTX_GENERAL_REG_NR + CTX_FPU_REG_NR + CTX_VECTOR_REG_NR)
 
 #ifndef __ASSEMBLY__
 rt_inline void rt_hw_dsb()

+ 5 - 0
libcpu/risc-v/virt64/encoding.h

@@ -23,6 +23,7 @@
 #define MSTATUS_HPP         0x00000600
 #define MSTATUS_MPP         0x00001800
 #define MSTATUS_FS          0x00006000
+#define MSTATUS_VS          0x00000600
 #define MSTATUS_XS          0x00018000
 #define MSTATUS_MPRV        0x00020000
 #define MSTATUS_PUM         0x00040000
@@ -40,6 +41,10 @@
 #define SSTATUS_FS_INITIAL  0x00002000
 #define SSTATUS_FS_CLEAN    0x00004000
 #define SSTATUS_FS_DIRTY    0x00006000
+#define SSTATUS_VS          0x00000600 /* Vector Status */
+#define SSTATUS_VS_INITIAL  0x00000200
+#define SSTATUS_VS_CLEAN    0x00000400
+#define SSTATUS_VS_DIRTY    0x00000600
 #define SSTATUS_XS          0x00018000
 #define SSTATUS_PUM         0x00040000
 #define SSTATUS32_SD        0x80000000

+ 155 - 0
libcpu/risc-v/virt64/ext_context.h

@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2006-2022, RT-Thread Development Team
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Change Logs:
+ * Date           Author       Notes
+ * 2022-10-10     RT-Thread    the first version,
+ *                             compatible to riscv-v-spec-1.0
+ */
+#ifndef __EXT_CONTEXT_H__
+#define __EXT_CONTEXT_H__
+
+#ifdef __ASSEMBLY__
+
+/**
+ * extension context maintenance
+ */
+
+#include "cpuport.h"
+#include "encoding.h"
+#include "vector_encoding.h"
+
+/**
+ * ==================================
+ * FPU EXTENSION
+ * ==================================
+ */
+
+#ifdef ENABLE_FPU
+#define FPU_CTX_F0_OFF   0   /* offsetof(fpu_context_t, fpustatus.f[0])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F1_OFF   8   /* offsetof(fpu_context_t, fpustatus.f[1])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F2_OFF   16  /* offsetof(fpu_context_t, fpustatus.f[2])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F3_OFF   24  /* offsetof(fpu_context_t, fpustatus.f[3])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F4_OFF   32  /* offsetof(fpu_context_t, fpustatus.f[4])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F5_OFF   40  /* offsetof(fpu_context_t, fpustatus.f[5])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F6_OFF   48  /* offsetof(fpu_context_t, fpustatus.f[6])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F7_OFF   56  /* offsetof(fpu_context_t, fpustatus.f[7])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F8_OFF   64  /* offsetof(fpu_context_t, fpustatus.f[8])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F9_OFF   72  /* offsetof(fpu_context_t, fpustatus.f[9])  - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F10_OFF  80  /* offsetof(fpu_context_t, fpustatus.f[10]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F11_OFF  88  /* offsetof(fpu_context_t, fpustatus.f[11]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F12_OFF  96  /* offsetof(fpu_context_t, fpustatus.f[12]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F13_OFF  104 /* offsetof(fpu_context_t, fpustatus.f[13]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F14_OFF  112 /* offsetof(fpu_context_t, fpustatus.f[14]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F15_OFF  120 /* offsetof(fpu_context_t, fpustatus.f[15]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F16_OFF  128 /* offsetof(fpu_context_t, fpustatus.f[16]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F17_OFF  136 /* offsetof(fpu_context_t, fpustatus.f[17]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F18_OFF  144 /* offsetof(fpu_context_t, fpustatus.f[18]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F19_OFF  152 /* offsetof(fpu_context_t, fpustatus.f[19]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F20_OFF  160 /* offsetof(fpu_context_t, fpustatus.f[20]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F21_OFF  168 /* offsetof(fpu_context_t, fpustatus.f[21]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F22_OFF  176 /* offsetof(fpu_context_t, fpustatus.f[22]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F23_OFF  184 /* offsetof(fpu_context_t, fpustatus.f[23]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F24_OFF  192 /* offsetof(fpu_context_t, fpustatus.f[24]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F25_OFF  200 /* offsetof(fpu_context_t, fpustatus.f[25]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F26_OFF  208 /* offsetof(fpu_context_t, fpustatus.f[26]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F27_OFF  216 /* offsetof(fpu_context_t, fpustatus.f[27]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F28_OFF  224 /* offsetof(fpu_context_t, fpustatus.f[28]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F29_OFF  232 /* offsetof(fpu_context_t, fpustatus.f[29]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F30_OFF  240 /* offsetof(fpu_context_t, fpustatus.f[30]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#define FPU_CTX_F31_OFF  248 /* offsetof(fpu_context_t, fpustatus.f[31]) - offsetof(fpu_context_t, fpustatus.f[0]) */
+#endif /* ENABLE_FPU */
+
+/**
+ * ==================================
+ * VECTOR EXTENSION
+ * ==================================
+ */
+
+#ifdef ENABLE_VECTOR
+
+#define VEC_FRAME_VSTART    (0 * REGBYTES)
+#define VEC_FRAME_VTYPE     (1 * REGBYTES)
+#define VEC_FRAME_VL        (2 * REGBYTES)
+#define VEC_FRAME_VCSR      (3 * REGBYTES)
+#define VEC_FRAME_V0        (4 * REGBYTES)
+
+.macro GET_VEC_FRAME_LEN, xreg
+    csrr    \xreg, vlenb
+    slli    \xreg, \xreg, 5
+    addi    \xreg, \xreg, 4 * REGBYTES
+.endm
+
+/**
+ * @brief save vector extension hardware state
+ * 
+ * @param dst register storing bottom of storage block
+ * 
+ */
+.macro SAVE_VECTOR, dst
+    mv      t1, \dst
+
+    csrr    t0, vstart
+    STORE   t0, VEC_FRAME_VSTART(t1)
+    csrr    t0, vtype
+    STORE   t0, VEC_FRAME_VTYPE(t1)
+    csrr    t0, vl
+    STORE   t0, VEC_FRAME_VL(t1)
+    csrr    t0, vcsr
+    STORE   t0, VEC_FRAME_VCSR(t1)
+
+    addi    t1, t1, VEC_FRAME_V0
+
+    // config vector setting,
+    // t2 is updated to length of a vector group in bytes
+    VEC_CONFIG_SETVLI(t2, x0, VEC_IMM_SEW_8, VEC_IMM_LMUL_8)
+
+    vse8.v  v0, (t1)
+    add     t1, t1, t2
+    vse8.v  v8, (t1)
+    add     t1, t1, t2
+    vse8.v  v16, (t1)
+    add     t1, t1, t2
+    vse8.v  v24, (t1)
+.endm
+
+/**
+ * @brief restore vector extension hardware states
+ * 
+ * @param dst register storing bottom of storage block
+ * 
+ */
+.macro RESTORE_VECTOR, dst
+    // restore vector registers first since it will modify vector states
+    mv      t0, \dst
+    addi    t1, t0, VEC_FRAME_V0
+
+    VEC_CONFIG_SETVLI(t2, x0, VEC_IMM_SEW_8, VEC_IMM_LMUL_8)
+
+    vle8.v  v0, (t1)
+    add     t1, t1, t2
+    vle8.v  v8, (t1)
+    add     t1, t1, t2
+    vle8.v  v16, (t1)
+    add     t1, t1, t2
+    vle8.v  v24, (t1)
+
+    mv      t1, t0
+
+    LOAD    t0, VEC_FRAME_VSTART(t1)
+    csrw    vstart, t0
+    LOAD    t0, VEC_FRAME_VCSR(t1)
+    csrw    vcsr, t0
+
+    LOAD    t0, VEC_FRAME_VTYPE(t1)
+    LOAD    t3, VEC_FRAME_VL(t1)
+    VEC_CONFIG_SET_VL_VTYPE(t3, t0)
+.endm
+
+#endif /* ENABLE_VECTOR */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __EXT_CONTEXT_H__ */

+ 3 - 0
libcpu/risc-v/virt64/stack.h

@@ -50,6 +50,9 @@ struct rt_hw_stack_frame
 #ifdef ENABLE_FPU
     rt_ubase_t f[CTX_FPU_REG_NR];      /* f0~f31 */
 #endif
+#ifdef ENABLE_VECTOR
+    rt_ubase_t v[CTX_VECTOR_REG_NR];
+#endif
 };
 
 #endif

+ 30 - 37
libcpu/risc-v/virt64/stackframe.h

@@ -15,41 +15,7 @@
 
 #include "cpuport.h"
 #include "encoding.h"
-
-#ifdef ENABLE_FPU
-#define FPU_CTX_F0_OFF   0   /* offsetof(fpu_context_t, fpustatus.f[0])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F1_OFF   8   /* offsetof(fpu_context_t, fpustatus.f[1])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F2_OFF   16  /* offsetof(fpu_context_t, fpustatus.f[2])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F3_OFF   24  /* offsetof(fpu_context_t, fpustatus.f[3])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F4_OFF   32  /* offsetof(fpu_context_t, fpustatus.f[4])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F5_OFF   40  /* offsetof(fpu_context_t, fpustatus.f[5])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F6_OFF   48  /* offsetof(fpu_context_t, fpustatus.f[6])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F7_OFF   56  /* offsetof(fpu_context_t, fpustatus.f[7])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F8_OFF   64  /* offsetof(fpu_context_t, fpustatus.f[8])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F9_OFF   72  /* offsetof(fpu_context_t, fpustatus.f[9])  - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F10_OFF  80  /* offsetof(fpu_context_t, fpustatus.f[10]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F11_OFF  88  /* offsetof(fpu_context_t, fpustatus.f[11]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F12_OFF  96  /* offsetof(fpu_context_t, fpustatus.f[12]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F13_OFF  104 /* offsetof(fpu_context_t, fpustatus.f[13]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F14_OFF  112 /* offsetof(fpu_context_t, fpustatus.f[14]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F15_OFF  120 /* offsetof(fpu_context_t, fpustatus.f[15]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F16_OFF  128 /* offsetof(fpu_context_t, fpustatus.f[16]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F17_OFF  136 /* offsetof(fpu_context_t, fpustatus.f[17]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F18_OFF  144 /* offsetof(fpu_context_t, fpustatus.f[18]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F19_OFF  152 /* offsetof(fpu_context_t, fpustatus.f[19]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F20_OFF  160 /* offsetof(fpu_context_t, fpustatus.f[20]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F21_OFF  168 /* offsetof(fpu_context_t, fpustatus.f[21]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F22_OFF  176 /* offsetof(fpu_context_t, fpustatus.f[22]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F23_OFF  184 /* offsetof(fpu_context_t, fpustatus.f[23]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F24_OFF  192 /* offsetof(fpu_context_t, fpustatus.f[24]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F25_OFF  200 /* offsetof(fpu_context_t, fpustatus.f[25]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F26_OFF  208 /* offsetof(fpu_context_t, fpustatus.f[26]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F27_OFF  216 /* offsetof(fpu_context_t, fpustatus.f[27]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F28_OFF  224 /* offsetof(fpu_context_t, fpustatus.f[28]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F29_OFF  232 /* offsetof(fpu_context_t, fpustatus.f[29]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F30_OFF  240 /* offsetof(fpu_context_t, fpustatus.f[30]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#define FPU_CTX_F31_OFF  248 /* offsetof(fpu_context_t, fpustatus.f[31]) - offsetof(fpu_context_t, fpustatus.f[0]) */
-#endif /* ENABLE_FPU */
+#include "ext_context.h"
 
 /**
  * The register `tp` always save/restore when context switch,
@@ -65,6 +31,10 @@
     /* reserve float registers */
     addi sp, sp, -CTX_FPU_REG_NR * REGBYTES
 #endif /* ENABLE_FPU */
+#ifdef ENABLE_VECTOR
+    /* reserve float registers */
+    addi sp, sp, -CTX_VECTOR_REG_NR * REGBYTES
+#endif /* ENABLE_VECTOR */
 
     /* save general registers */
     addi sp, sp, -CTX_GENERAL_REG_NR * REGBYTES
@@ -157,14 +127,37 @@
 
 #endif /* ENABLE_FPU */
 
+#ifdef ENABLE_VECTOR
+    csrr    t0, sstatus
+    andi    t0, t0, SSTATUS_VS
+    beqz    t0, 0f
+
+    /* push vector frame */
+    addi t1, sp, (CTX_GENERAL_REG_NR + CTX_FPU_REG_NR) * REGBYTES
+
+    SAVE_VECTOR t1
+0:
+#endif /* ENABLE_VECTOR */
 .endm
 
 .macro RESTORE_ALL
 
+#ifdef ENABLE_VECTOR
+    // skip on close
+    csrr    t0, sstatus
+    andi    t0, t0, SSTATUS_VS
+    beqz    t0, 0f
+
+    /* push vector frame */
+    addi t1, sp, (CTX_GENERAL_REG_NR + CTX_FPU_REG_NR) * REGBYTES
+
+    RESTORE_VECTOR t1
+0:
+#endif /* ENABLE_VECTOR */
+
 #ifdef ENABLE_FPU
     /* restore float register  */
-    mv t2, sp
-    addi t2, t2, CTX_GENERAL_REG_NR * REGBYTES   /* skip all normal reg */
+    addi t2, sp, CTX_GENERAL_REG_NR * REGBYTES
 
     li  t0, SSTATUS_FS
     csrs sstatus, t0

+ 40 - 0
libcpu/risc-v/virt64/trap.c

@@ -172,6 +172,41 @@ void handle_user(rt_size_t scause, rt_size_t stval, rt_size_t sepc, struct rt_hw
     sys_exit(-1);
 }
 
+static void vector_enable(struct rt_hw_stack_frame *sp)
+{
+    sp->sstatus |= SSTATUS_VS_INITIAL;
+}
+
+/** 
+ * detect V/D support, and do not distinguish V/D instruction
+ */
+static int illegal_inst_recoverable(rt_ubase_t stval, struct rt_hw_stack_frame *sp)
+{
+    // first 7 bits is opcode
+    int opcode = stval & 0x7f;
+    int csr = (stval & 0xFFF00000) >> 20;
+    // ref riscv-v-spec-1.0, [Vector Instruction Formats]
+    int width = ((stval & 0x7000) >> 12) - 1;
+    int flag = 0;
+
+    switch (opcode)
+    {
+    case 0x57: // V
+    case 0x27: // scalar FLOAT
+    case 0x07:
+    case 0x73: // CSR
+        flag = 1;
+        break;
+    }
+
+    if (flag)
+    {
+        vector_enable(sp);
+    }
+
+    return flag;
+}
+
 /* Trap entry */
 void handle_trap(rt_size_t scause, rt_size_t stval, rt_size_t sepc, struct rt_hw_stack_frame *sp)
 {
@@ -213,6 +248,11 @@ void handle_trap(rt_size_t scause, rt_size_t stval, rt_size_t sepc, struct rt_hw
         }
         else
         {
+            if (scause == 0x2)
+            {
+                if (!(sp->sstatus & SSTATUS_VS) && illegal_inst_recoverable(stval, sp))
+                    return;
+            }
             if (!(sp->sstatus & 0x100))
             {
                 handle_user(scause, stval, sepc, sp);

+ 44 - 0
libcpu/risc-v/virt64/vector_encoding.h

@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2006-2022, RT-Thread Development Team
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Change Logs:
+ * Date           Author       Notes
+ * 2022-10-10     RT-Thread    the first version,
+ *                             compatible to riscv-v-spec-1.0
+ */
+
+#ifndef __VECTOR_ENCODING_H__
+#define __VECTOR_ENCODING_H__
+
+/**
+ * assembler names used for vset{i}vli vtypei immediate 
+ */
+
+#define VEC_IMM_SEW_8      e8
+#define VEC_IMM_SEW_16     e16
+#define VEC_IMM_SEW_32     e32
+#define VEC_IMM_SEW_64     e64
+/* group setting, encoding by multiplier */
+#define VEC_IMM_LMUL_F8     mf8
+#define VEC_IMM_LMUL_F4     mf4
+#define VEC_IMM_LMUL_F2     mf2
+#define VEC_IMM_LMUL_1      m1
+#define VEC_IMM_LMUL_2      m2
+#define VEC_IMM_LMUL_4      m4
+#define VEC_IMM_LMUL_8      m8
+/* TAIL & MASK agnostic bits */
+#define VEC_IMM_TAIL_AGNOSTIC   ta
+#define VEC_IMM_MASK_AGNOSTIC   ma
+#define VEC_IMM_TAMA            VEC_IMM_TAIL_AGNOSTIC, VEC_IMM_MASK_AGNOSTIC
+#define VEC_IMM_TAMU            VEC_IMM_TAIL_AGNOSTIC
+#define VEC_IMM_TUMA            VEC_IMM_MASK_AGNOSTIC
+
+/**
+ * configuration setting instruction
+ */
+#define VEC_CONFIG_SETVLI(xVl, xAvl, vtype...)   vsetvli xVl, xAvl, ##vtype
+#define VEC_CONFIG_SET_VL_VTYPE(xVl, xVtype)   vsetvl x0, xVl, xVtype
+
+#endif /* __VECTOR_ENCODING_H__ */