Browse Source

[libcpu/riscv] add cache coherence op

wangxiaoyao 3 years ago
parent
commit
a9fcc899b1

+ 1 - 1
bsp/allwinner/libraries/libos/src/os.c

@@ -142,7 +142,7 @@ void awos_arch_mems_clean_dcache_region(unsigned long start, unsigned long len)
 
 void awos_arch_mems_clean_flush_dcache_region(unsigned long start, unsigned long len)
 {
-    rt_hw_cpu_dcache_clean_flush((void *)start, len);
+    rt_hw_cpu_dcache_clean_invalidate((void *)start, len);
 }
 
 void awos_arch_mems_flush_dcache_region(unsigned long start, unsigned long len)

+ 71 - 66
libcpu/risc-v/t-head/c906/cache.c

@@ -7,6 +7,9 @@
  * Date           Author       Notes
  * 2021-01-29     lizhirui     first version
  * 2021-11-05     JasonHu      add c906 cache inst
+ * 2022-11-09     WangXiaoyao  Support cache coherence operations;
+ *                             improve portability and make
+ *                             no assumption on undefined behavior
  */
 
 #include <rthw.h>
@@ -14,6 +17,9 @@
 #include <board.h>
 #include <riscv.h>
 
+#include "opcode.h"
+#include "cache.h"
+
 #define L1_CACHE_BYTES (64)
 
 /**
@@ -25,60 +31,35 @@ static void dcache_inv_range(unsigned long start, unsigned long end) __attribute
 static void dcache_wbinv_range(unsigned long start, unsigned long end) __attribute__((optimize("O0")));
 static void icache_inv_range(unsigned long start, unsigned long end) __attribute__((optimize("O0")));
 
+#define CACHE_OP_RS1 %0
+#define CACHE_OP_RANGE(instr)                                  \
+    {                                                          \
+        register rt_ubase_t i = start & ~(L1_CACHE_BYTES - 1); \
+        for (; i < end; i += L1_CACHE_BYTES)                   \
+        {                                                      \
+            __asm__ volatile(instr ::"r"(i)                    \
+                             : "memory");                      \
+        }                                                      \
+    }
+
 static void dcache_wb_range(unsigned long start, unsigned long end)
 {
-    unsigned long i = start & ~(L1_CACHE_BYTES - 1);
-
-    for (; i < end; i += L1_CACHE_BYTES)
-    {
-        /* asm volatile("dcache.cva %0\n"::"r"(i):"memory"); */
-        /*
-         * compiler always use a5 = i.
-         * a6 not used, so we use a6 here.
-         */
-        asm volatile("mv a6, %0\n"::"r"(i):"memory");   /* a6 = a5(i) */
-        asm volatile(".long 0x0257800b");               /* dcache.cva a6 */
-    }
-    asm volatile(".long 0x01b0000b");   /* sync.is */
+    CACHE_OP_RANGE(OPC_DCACHE_CVA(CACHE_OP_RS1));
 }
 
 static void dcache_inv_range(unsigned long start, unsigned long end)
 {
-    unsigned long i = start & ~(L1_CACHE_BYTES - 1);
-
-    for (; i < end; i += L1_CACHE_BYTES)
-    {
-        /* asm volatile("dcache.iva %0\n"::"r"(i):"memory"); */
-        asm volatile("mv a6, %0\n"::"r"(i):"memory");   /* a6 = a5(i) */
-        asm volatile(".long 0x0268000b");               /* dcache.iva a6 */
-    }
-    asm volatile(".long 0x01b0000b");
+    CACHE_OP_RANGE(OPC_DCACHE_IVA(CACHE_OP_RS1));
 }
 
 static void dcache_wbinv_range(unsigned long start, unsigned long end)
 {
-    unsigned long i = start & ~(L1_CACHE_BYTES - 1);
-
-    for (; i < end; i += L1_CACHE_BYTES)
-    {
-        /* asm volatile("dcache.civa %0\n"::"r"(i):"memory"); */
-        asm volatile("mv a6, %0\n"::"r"(i):"memory");   /* a6 = a5(i) */
-        asm volatile(".long 0x0278000b");               /* dcache.civa a6 */
-    }
-    asm volatile(".long 0x01b0000b");
+    CACHE_OP_RANGE(OPC_DCACHE_CIVA(CACHE_OP_RS1));
 }
 
 static void icache_inv_range(unsigned long start, unsigned long end)
 {
-    unsigned long i = start & ~(L1_CACHE_BYTES - 1);
-
-    for (; i < end; i += L1_CACHE_BYTES)
-    {
-        /* asm volatile("icache.iva %0\n"::"r"(i):"memory"); */
-        asm volatile("mv a6, %0\n"::"r"(i):"memory");   /* a6 = a5(i) */
-        asm volatile(".long 0x0308000b");               /* icache.iva a6 */
-    }
-    asm volatile(".long 0x01b0000b");
+    CACHE_OP_RANGE(OPC_ICACHE_IVA(CACHE_OP_RS1));
 }
 
 rt_inline rt_uint32_t rt_cpu_icache_line_size(void)
@@ -91,65 +72,89 @@ rt_inline rt_uint32_t rt_cpu_dcache_line_size(void)
     return L1_CACHE_BYTES;
 }
 
-void rt_hw_cpu_icache_invalidate(void *addr,int size)
+void rt_hw_cpu_icache_invalidate_local(void *addr, int size)
 {
     icache_inv_range((unsigned long)addr, (unsigned long)((unsigned char *)addr + size));
+    rt_hw_cpu_sync_i();
 }
 
-void rt_hw_cpu_dcache_invalidate(void *addr,int size)
+void rt_hw_cpu_dcache_invalidate_local(void *addr, int size)
 {
     dcache_inv_range((unsigned long)addr, (unsigned long)((unsigned char *)addr + size));
+    rt_hw_cpu_sync();
 }
 
-void rt_hw_cpu_dcache_clean(void *addr,int size)
+void rt_hw_cpu_dcache_clean_local(void *addr, int size)
 {
     dcache_wb_range((unsigned long)addr, (unsigned long)((unsigned char *)addr + size));
+    rt_hw_cpu_sync();
 }
 
-void rt_hw_cpu_dcache_clean_flush(void *addr,int size)
+void rt_hw_cpu_dcache_clean_invalidate_local(void *addr, int size)
 {
     dcache_wbinv_range((unsigned long)addr, (unsigned long)((unsigned char *)addr + size));
+    rt_hw_cpu_sync();
 }
 
-void rt_hw_cpu_icache_ops(int ops,void *addr,int size)
+/**
+ * =====================================================
+ * Architecture Independent API
+ * =====================================================
+ */
+
+void rt_hw_cpu_icache_ops(int ops, void *addr, int size)
 {
-    if(ops == RT_HW_CACHE_INVALIDATE)
+    if (ops == RT_HW_CACHE_INVALIDATE)
     {
-        rt_hw_cpu_icache_invalidate(addr, size);
+        rt_hw_cpu_icache_invalidate_local(addr, size);
     }
 }
 
-void rt_hw_cpu_dcache_ops(int ops,void *addr,int size)
+void rt_hw_cpu_dcache_ops(int ops, void *addr, int size)
 {
-    if(ops == RT_HW_CACHE_FLUSH)
+    if (ops == RT_HW_CACHE_FLUSH)
     {
-        rt_hw_cpu_dcache_clean(addr, size);
+        rt_hw_cpu_dcache_clean_local(addr, size);
     }
     else
     {
-        rt_hw_cpu_dcache_invalidate(addr, size);
+        rt_hw_cpu_dcache_invalidate_local(addr, size);
     }
 }
 
-void rt_hw_cpu_dcache_clean_all(void)
-{
-    /* asm volatile("dcache.call\n":::"memory"); */
-    asm volatile(".long 0x0010000b\n":::"memory");
-}
-
-void rt_hw_cpu_dcache_invalidate_all(void)
+void rt_hw_sync_cache_local(void *addr, int size)
 {
-    /* asm volatile("dcache.ciall\n":::"memory"); */
-    asm volatile(".long 0x0030000b\n":::"memory");
+    rt_hw_cpu_dcache_clean_local(addr, size);
+    rt_hw_cpu_icache_invalidate_local(addr, size);
 }
 
-void rt_hw_cpu_icache_invalidate_all(void)
-{
-    /* asm volatile("icache.iall\n":::"memory"); */
-    asm volatile(".long 0x0100000b\n":::"memory");
-}
+#ifdef RT_USING_LWP
+#include <lwp_arch.h>
+#define ICACHE (1 << 0)
+#define DCACHE (1 << 1)
+#define BCACHE (ICACHE | DCACHE)
 
+/**
+ * TODO moving syscall to kernel
+ */
 int sys_cacheflush(void *addr, int size, int cache)
 {
-    return 0;
+    /* must in user space */
+    if ((size_t)addr >= USER_VADDR_START && (size_t)addr + size < USER_VADDR_TOP)
+    {
+        /**
+         * we DO NOT check argument 'cache' invalid error
+         */
+        if ((cache & DCACHE) != 0)
+        {
+            rt_hw_cpu_dcache_clean_invalidate_local(addr, size);
+        }
+        if ((cache & ICACHE) != 0)
+        {
+            rt_hw_cpu_icache_invalidate_local(addr, size);
+        }
+        return 0;
+    }
+    return -RT_ERROR;
 }
+#endif

+ 87 - 4
libcpu/risc-v/t-head/c906/cache.h

@@ -11,13 +11,96 @@
 #ifndef CACHE_H__
 #define CACHE_H__
 
-void rt_hw_cpu_dcache_clean(void *addr,int size);
-void rt_hw_cpu_icache_invalidate(void *addr,int size);
-void rt_hw_cpu_dcache_invalidate(void *addr,int size);
+#include "opcode.h"
+
+#ifndef ALWAYS_INLINE
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#endif
+
+#define rt_hw_cpu_sync() __asm__ volatile(OPC_SYNC:: \
+                                              : "memory")
+
+#define rt_hw_cpu_sync_i() __asm__ volatile(OPC_SYNC_I:: \
+                                                : "memory");
+
+/**
+ * ========================================
+ * Local cpu cache maintainence operations
+ * ========================================
+ */
+
+void rt_hw_cpu_dcache_clean_local(void *addr, int size);
+void rt_hw_cpu_dcache_invalidate_local(void *addr, int size);
+void rt_hw_cpu_dcache_clean_invalidate_local(void *addr, int size);
+
+void rt_hw_cpu_icache_invalidate_local(void *addr, int size);
+
+ALWAYS_INLINE void rt_hw_cpu_dcache_clean_all_local(void)
+{
+    __asm__ volatile(OPC_DCACHE_CALL ::
+                         : "memory");
+    rt_hw_cpu_sync();
+}
+
+ALWAYS_INLINE void rt_hw_cpu_dcache_invalidate_all_local(void)
+{
+    __asm__ volatile(OPC_DCACHE_IALL ::
+                         : "memory");
+    rt_hw_cpu_sync();
+}
+
+ALWAYS_INLINE void rt_hw_cpu_dcache_clean_invalidate_all_local(void)
+{
+    __asm__ volatile(OPC_DCACHE_CIALL ::
+                         : "memory");
+    rt_hw_cpu_sync();
+}
+
+ALWAYS_INLINE void rt_hw_cpu_icache_invalidate_all_local(void)
+{
+    __asm__ volatile(OPC_ICACHE_IALL ::
+                         : "memory");
+    rt_hw_cpu_sync_i();
+}
+
+/**
+ * ========================================
+ * Multi-core cache maintainence operations
+ * ========================================
+ */
+
+#ifdef RT_USING_SMP
+#error "TODO: cache maintainence have not ported to RISC-V SMP yet"
+
+void rt_hw_cpu_dcache_clean(void *addr, int size);
+void rt_hw_cpu_dcache_invalidate(void *addr, int size);
+void rt_hw_cpu_dcache_clean_invalidate(void *addr, int size);
 
-void rt_hw_cpu_dcache_clean_flush(void *addr,int size);
 void rt_hw_cpu_dcache_clean_all(void);
 void rt_hw_cpu_dcache_invalidate_all(void);
+void rt_hw_cpu_dcache_clean_invalidate_all(void);
+
+void rt_hw_cpu_icache_invalidate(void *addr, int size);
 void rt_hw_cpu_icache_invalidate_all(void);
 
+#else /* !RT_USING_SMP */
+
+#define rt_hw_cpu_dcache_clean rt_hw_cpu_dcache_clean_local
+#define rt_hw_cpu_dcache_invalidate rt_hw_cpu_dcache_invalidate_local
+#define rt_hw_cpu_dcache_clean_invalidate rt_hw_cpu_dcache_clean_invalidate_local
+
+#define rt_hw_cpu_dcache_clean_all rt_hw_cpu_dcache_clean_all_local
+#define rt_hw_cpu_dcache_invalidate_all rt_hw_cpu_dcache_invalidate_all_local
+#define rt_hw_cpu_dcache_clean_invalidate_all rt_hw_cpu_dcache_clean_invalidate_all_local
+
+#define rt_hw_cpu_icache_invalidate rt_hw_cpu_icache_invalidate_local
+#define rt_hw_cpu_icache_invalidate_all rt_hw_cpu_icache_invalidate_all_local
+
+#endif /* RT_USING_SMP */
+
+/**
+ * @brief Synchronize cache to Point of Coherent
+ */
+void rt_hw_sync_cache_local(void *addr, int size);
+
 #endif /* CACHE_H__ */

+ 79 - 0
libcpu/risc-v/t-head/c906/opcode.h

@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2006-2021, RT-Thread Development Team
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Change Logs:
+ * Date           Author       Notes
+ * 2022-11-09     WangXiaoyao  Add portable asm support
+ */
+#ifndef __OPCODE_H__
+#define __OPCODE_H__
+
+/**
+ * @brief binary opcode pseudo operations
+ * Used to bypass toolchain restriction on extension ISA
+ * 
+ * WARNING: Xuantie ISAs are not compatible to each other in opcode.
+ * It's painful to port this file, and should be really careful.
+ */
+
+#define ___TOSTR(str) #str
+#define __TOSTR(str) ___TOSTR(str)
+#define _TOSTR(str) __TOSTR(str)
+
+/**
+ * @brief RISC-V instruction formats
+ */
+
+/** 
+ * R type: .insn r opcode6, func3, func7, rd, rs1, rs2
+ * 
+ * +-------+-----+-----+-------+----+---------+
+ * | func7 | rs2 | rs1 | func3 | rd | opcode6 |
+ * +-------+-----+-----+-------+----+---------+
+ * 31      25    20    15      12   7        0
+ */
+#define __OPC_INSN_FORMAT_R(opcode, func3, func7, rd, rs1, rs2) \
+    ".insn r "_TOSTR(opcode)","_TOSTR(func3)","_TOSTR(func7)","_TOSTR(rd)","_TOSTR(rs1)","_TOSTR(rs2)
+
+/**
+ * @brief Xuantie T-HEAD extension ISA format
+ * Compatible to Xuantie C906R2S1 user manual v06
+ */
+#define __OPC_INSN_FORMAT_CACHE(func7, rs2, rs1) \
+    __OPC_INSN_FORMAT_R(0x0b, 0x0, func7, x0, rs1, rs2)
+
+#ifdef _TOOLCHAIN_SUPP_XTHEADE_ISA_
+#define OPC_SYNC                "sync"
+#define OPC_SYNC_I              "sync.i"
+
+#define OPC_DCACHE_CALL         "dcache.call"
+#define OPC_DCACHE_IALL         "dcache.iall"
+#define OPC_DCACHE_CIALL        "dcache.ciall"
+
+#define OPC_ICACHE_IALL         "icache.iall"
+
+#define OPC_DCACHE_CVA(rs1)     "dcache.cva "_TOSTR(rs1)
+#define OPC_DCACHE_IVA(rs1)     "dcache.iva "_TOSTR(rs1)
+#define OPC_DCACHE_CIVA(rs1)    "dcache.civa "_TOSTR(rs1)
+
+#define OPC_ICACHE_IVA(rs1)     "icache.iva "_TOSTR(rs1)
+#else /* !_TOOLCHAIN_NOT_SUPP_THEAD_ISA_ */
+#define OPC_SYNC                ".long 0x0180000B"
+#define OPC_SYNC_I              ".long 0x01A0000B"
+
+#define OPC_DCACHE_CALL         ".long 0x0010000B"
+#define OPC_DCACHE_IALL         ".long 0x0020000B"
+#define OPC_DCACHE_CIALL        ".long 0x0030000B"
+
+#define OPC_ICACHE_IALL         ".long 0x0100000B"
+
+#define OPC_DCACHE_CVA(rs1)     __OPC_INSN_FORMAT_CACHE(0x1, x4, rs1)
+#define OPC_DCACHE_IVA(rs1)     __OPC_INSN_FORMAT_CACHE(0x1, x6, rs1)
+#define OPC_DCACHE_CIVA(rs1)    __OPC_INSN_FORMAT_CACHE(0x1, x7, rs1)
+
+#define OPC_ICACHE_IVA(rs1)     __OPC_INSN_FORMAT_CACHE(0x1, x16, rs1)
+#endif /* _TOOLCHAIN_NOT_SUPP_THEAD_ISA_ */
+
+#endif /* __OPCODE_H__ */

+ 12 - 32
libcpu/risc-v/virt64/cache.c

@@ -12,6 +12,7 @@
 #include <rtdef.h>
 #include <board.h>
 #include <riscv.h>
+#include <cache.h>
 
 rt_inline rt_uint32_t rt_cpu_icache_line_size()
 {
@@ -23,59 +24,38 @@ rt_inline rt_uint32_t rt_cpu_dcache_line_size()
     return 0;
 }
 
-void rt_hw_cpu_icache_invalidate(void *addr,int size)
+void rt_hw_cpu_icache_ops(int ops, void *addr, int size)
 {
-
-}
-
-void rt_hw_cpu_dcache_invalidate(void *addr,int size)
-{
-
-}
-
-void rt_hw_cpu_dcache_clean(void *addr,int size)
-{
-
-}
-
-void rt_hw_cpu_icache_ops(int ops,void *addr,int size)
-{
-    if(ops == RT_HW_CACHE_INVALIDATE)
+    if (ops == RT_HW_CACHE_INVALIDATE)
     {
-        rt_hw_cpu_icache_invalidate(addr,size);
+        rt_hw_cpu_icache_invalidate(addr, size);
     }
 }
 
-void rt_hw_cpu_dcache_ops(int ops,void *addr,int size)
+void rt_hw_cpu_dcache_ops(int ops, void *addr, int size)
 {
-    if(ops == RT_HW_CACHE_FLUSH)
+    if (ops == RT_HW_CACHE_FLUSH)
     {
-        rt_hw_cpu_dcache_clean(addr,size);
+        rt_hw_cpu_dcache_clean(addr, size);
     }
     else
     {
-        rt_hw_cpu_dcache_invalidate(addr,size);
+        rt_hw_cpu_dcache_invalidate(addr, size);
     }
 }
 
-void rt_hw_cpu_dcache_flush_all()
-{
-
-}
-
-void rt_hw_cpu_icache_invalidate_all()
+rt_base_t rt_hw_cpu_icache_status_local()
 {
-
+    return 0;
 }
 
-rt_base_t rt_hw_cpu_icache_status()
+rt_base_t rt_hw_cpu_dcache_status()
 {
     return 0;
 }
 
-rt_base_t rt_hw_cpu_dcache_status()
+void rt_hw_sync_cache_local(void *addr, int size)
 {
-    return 0;
 }
 
 int sys_cacheflush(void *addr, int size, int cache)

+ 59 - 0
libcpu/risc-v/virt64/cache.h

@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2006-2021, RT-Thread Development Team
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Change Logs:
+ * Date           Author       Notes
+ * 2022-11-09     RT-Thread    The first version
+ */
+#ifndef __CACHE_H__
+#define __CACHE_H__
+
+#ifndef ALWAYS_INLINE
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#endif
+
+/**
+ * @brief These APIs may not be supported by a specified architecture
+ * But we have to include to all the cases to be 'general purpose'
+ */
+
+ALWAYS_INLINE void rt_hw_cpu_dcache_clean_local(void *addr, int size) {}
+ALWAYS_INLINE void rt_hw_cpu_dcache_invalidate_local(void *addr, int size) {}
+ALWAYS_INLINE void rt_hw_cpu_dcache_clean_invalidate_local(void *addr, int size) {}
+
+ALWAYS_INLINE void rt_hw_cpu_dcache_clean_all_local() {}
+ALWAYS_INLINE void rt_hw_cpu_dcache_invalidate_all_local(void) {}
+ALWAYS_INLINE void rt_hw_cpu_dcache_clean_invalidate_all_local(void) {}
+
+ALWAYS_INLINE void rt_hw_cpu_icache_invalidate_local(void *addr, int size) {}
+ALWAYS_INLINE void rt_hw_cpu_icache_invalidate_all_local() {}
+
+/**
+ * @brief Multi-core
+ */
+
+#define rt_hw_cpu_dcache_clean rt_hw_cpu_dcache_clean_local
+#define rt_hw_cpu_dcache_invalidate rt_hw_cpu_dcache_invalidate_local
+#define rt_hw_cpu_dcache_clean_invalidate rt_hw_cpu_dcache_clean_invalidate_local
+
+#define rt_hw_cpu_dcache_clean_all rt_hw_cpu_dcache_clean_all_local
+#define rt_hw_cpu_dcache_invalidate_all rt_hw_cpu_dcache_invalidate_all_local
+#define rt_hw_cpu_dcache_clean_invalidate_all rt_hw_cpu_dcache_clean_invalidate_all_local
+
+#define rt_hw_cpu_icache_invalidate rt_hw_cpu_icache_invalidate_local
+#define rt_hw_cpu_icache_invalidate_all rt_hw_cpu_icache_invalidate_all_local
+
+/** instruction barrier */
+void rt_hw_cpu_sync(void);
+
+/**
+ * @brief local cpu icahce & dcache synchronization
+ *
+ * @param addr
+ * @param size
+ */
+void rt_hw_sync_cache_local(void *addr, int size);
+
+#endif /* __CACHE_H__ */

+ 2 - 4
libcpu/risc-v/virt64/mmu.c

@@ -15,6 +15,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <lwp_mm.h>
+#include <cache.h>
 
 #define DBG_TAG "mmu"
 #define DBG_LVL DBG_INFO
@@ -25,9 +26,6 @@
 #include "mmu.h"
 
 void *current_mmu_table = RT_NULL;
-void rt_hw_cpu_icache_invalidate_all();
-void rt_hw_cpu_dcache_flush_all();
-void rt_hw_cpu_dcache_clean(void *addr, rt_size_t size);
 
 volatile rt_ubase_t MMUTable[__SIZE(VPN2_BIT)] __attribute__((aligned(4 * 1024)));
 
@@ -48,7 +46,7 @@ void rt_hw_mmu_switch(void *mmu_table)
     current_mmu_table = mmu_table;
     RT_ASSERT(__CHECKALIGN(mmu_table, PAGE_OFFSET_BIT));
     mmu_set_pagetable((rt_ubase_t)mmu_table);
-    rt_hw_cpu_dcache_flush_all();
+    rt_hw_cpu_dcache_clean_all();
     rt_hw_cpu_icache_invalidate_all();
 }