Jelajahi Sumber

[components/mm] support for scalable memory management (#7277)

* [mm/page] multi-list page manager
[mm/page] page debugger
[libcpu/aarch64] hugepage support

* [quality] remove void-arith

* [format] remove kasan codes
Shell 2 tahun lalu
induk
melakukan
470454d201

+ 2 - 1
components/drivers/virtio/virtio_net.c

@@ -11,6 +11,7 @@
 #include <rthw.h>
 #include <rtthread.h>
 #include <cpuport.h>
+#include <mm_aspace.h>
 
 #ifdef RT_USING_VIRTIO_NET
 
@@ -106,7 +107,7 @@ static struct pbuf *virtio_net_rx(rt_device_t dev)
 #ifdef RT_USING_SMP
             level = rt_spin_lock_irqsave(&virtio_dev->spinlock);
 #endif
-            rt_memcpy(p->payload, (void *)VIRTIO_PA2VA(queue_rx->desc[id].addr), len);
+            rt_memcpy(p->payload, (void *)queue_rx->desc[id].addr - PV_OFFSET, len);
 
             queue_rx->used_idx++;
 

+ 1 - 1
components/lwp/arch/aarch64/cortex-a/lwp_arch.c

@@ -26,7 +26,7 @@ int arch_user_space_init(struct rt_lwp *lwp)
 {
     size_t *mmu_table;
 
-    mmu_table = (size_t *)rt_pages_alloc(0);
+    mmu_table = (size_t *)rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE);
     if (!mmu_table)
     {
         return -RT_ENOMEM;

+ 1 - 1
components/lwp/arch/risc-v/rv64/lwp_arch.c

@@ -91,7 +91,7 @@ int arch_user_space_init(struct rt_lwp *lwp)
 {
     rt_ubase_t *mmu_table;
 
-    mmu_table = (rt_ubase_t *)rt_pages_alloc(0);
+    mmu_table = (rt_ubase_t *)rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE);
     if (!mmu_table)
     {
         return -RT_ENOMEM;

+ 1 - 1
components/lwp/arch/x86/i386/lwp_arch.c

@@ -82,7 +82,7 @@ int arch_user_space_init(struct rt_lwp *lwp)
 {
     rt_size_t *mmu_table;
 
-    mmu_table = (rt_size_t *)rt_pages_alloc(0);
+    mmu_table = (rt_size_t *)rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE);
     if (!mmu_table)
     {
         return -1;

+ 14 - 1
components/lwp/lwp.c

@@ -9,6 +9,7 @@
  * 2018-11-02     heyuanjie    fix complie error in iar
  * 2021-02-03     lizhirui     add 64-bit arch support and riscv64 arch support
  * 2021-08-26     linzhenxing  add lwp_setcwd\lwp_getcwd
+ * 2023-02-20     wangxiaoyao  inv icache before new app startup
  */
 
 #include <rthw.h>
@@ -1097,10 +1098,22 @@ static void _lwp_thread_entry(void *parameter)
         icache_invalid_all();
     }
 
+    /**
+     * without ASID support, it will be a special case when trying to run application
+     * and exit multiple times and a same page frame allocated to it bound to
+     * different text segment. Then we are in a situation where icache contains
+     * out-of-dated data and must be handle by the running core itself.
+     * with ASID support, this should be a rare case that ASID & page frame both
+     * identical to previous running application.
+     *
+     * For a new application loaded into memory, icache are seen as empty. And there
+     * should be nothing in the icache entry to match. So this icache invalidation
+     * operation should have barely influence.
+     */
     rt_hw_icache_invalidate_all();
 
 #ifdef ARCH_MM_MMU
-    arch_start_umode(lwp->args, lwp->text_entry, (void *)USER_STACK_VEND, tid->stack_addr + tid->stack_size);
+    arch_start_umode(lwp->args, lwp->text_entry, (void *)USER_STACK_VEND, (char *)tid->stack_addr + tid->stack_size);
 #else
     arch_start_umode(lwp->args, lwp->text_entry, lwp->data_entry, (void *)((uint32_t)lwp->data_entry + lwp->data_size));
 #endif /* ARCH_MM_MMU */

+ 3 - 4
components/lwp/lwp_shm.c

@@ -6,6 +6,7 @@
  * Change Logs:
  * Date           Author       Notes
  * 2019-10-12     Jesven       first version
+ * 2023-02-20     wangxiaoyao  adapt to mm
  */
 #include <rthw.h>
 #include <rtthread.h>
@@ -17,8 +18,6 @@
 
 #include <lwp_user_mm.h>
 #include <mmu.h>
-#include <mm_aspace.h>
-#include <mm_flag.h>
 
 /* the kernel structure to represent a share-memory */
 struct lwp_shm_struct
@@ -64,7 +63,7 @@ static void on_shm_page_fault(struct rt_varea *varea, struct rt_aspace_fault_msg
 
     /* map all share page frames to user space in a time */
     void *page = (void *)shm->addr;
-    void *pg_paddr = page + PV_OFFSET;
+    void *pg_paddr = (char *)page + PV_OFFSET;
     err = rt_varea_map_range(varea, varea->start, pg_paddr, shm->size);
 
     if (err == RT_EOK)
@@ -140,7 +139,7 @@ static int _lwp_shmget(size_t key, size_t size, int create)
 
         /* allocate pages up to 2's exponent to cover the required size */
         bit = rt_page_bits(size);
-        page_addr = rt_pages_alloc(bit);           /* virtual address */
+        page_addr = rt_pages_alloc_ext(bit, PAGE_ANY_AVAILABLE);   /* virtual address */
         if (!page_addr)
         {
             goto err;

+ 5 - 5
components/lwp/lwp_syscall.c

@@ -306,7 +306,7 @@ static void _crt_thread_entry(void *parameter)
     user_stack &= ~7; //align 8
 
 #ifdef ARCH_MM_MMU
-    arch_crt_start_umode(parameter, tid->user_entry, (void *)user_stack, tid->stack_addr + tid->stack_size);
+    arch_crt_start_umode(parameter, tid->user_entry, (void *)user_stack, (char *)tid->stack_addr + tid->stack_size);
 #else
     set_user_context((void*)user_stack);
     arch_start_umode(parameter, tid->user_entry, ((struct rt_lwp *)tid->lwp)->data_entry, (void*)user_stack);
@@ -1861,7 +1861,7 @@ static char *_insert_args(int new_argc, char *new_argv[], struct lwp_args_info *
     {
         goto quit;
     }
-    page = rt_pages_alloc(0); /* 1 page */
+    page = rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE); /* 1 page */
     if (!page)
     {
         goto quit;
@@ -2065,7 +2065,7 @@ int load_ldso(struct rt_lwp *lwp, char *exec_name, char *const argv[], char *con
         }
     }
 
-    page = rt_pages_alloc(0); /* 1 page */
+    page = rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE); /* 1 page */
     if (!page)
     {
         SET_ERRNO(ENOMEM);
@@ -2252,7 +2252,7 @@ sysret_t sys_execve(const char *path, char *const argv[], char *const envp[])
         SET_ERRNO(EINVAL);
         goto quit;
     }
-    page = rt_pages_alloc(0); /* 1 page */
+    page = rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE); /* 1 page */
     if (!page)
     {
         SET_ERRNO(ENOMEM);
@@ -2396,7 +2396,7 @@ sysret_t sys_execve(const char *path, char *const argv[], char *const envp[])
         arch_start_umode(lwp->args,
                 lwp->text_entry,
                 (void*)USER_STACK_VEND,
-                thread->stack_addr + thread->stack_size);
+                (char *)thread->stack_addr + thread->stack_size);
         /* never reach here */
     }
     return -EINVAL;

+ 8 - 7
components/lwp/lwp_user_mm.c

@@ -10,6 +10,7 @@
  * 2021-02-12     lizhirui     add 64-bit support for lwp_brk
  * 2021-02-19     lizhirui     add riscv64 support for lwp_user_accessable and lwp_get_from_user
  * 2021-06-07     lizhirui     modify user space bound check
+ * 2022-12-25     wangxiaoyao  adapt to new mm
  */
 
 #include <rtthread.h>
@@ -122,7 +123,7 @@ static void _user_do_page_fault(struct rt_varea *varea,
 
     if (lwp_objs->source)
     {
-        void *paddr = rt_hw_mmu_v2p(lwp_objs->source, msg->fault_vaddr);
+        char *paddr = rt_hw_mmu_v2p(lwp_objs->source, msg->fault_vaddr);
         if (paddr != ARCH_MAP_FAILED)
         {
             void *vaddr;
@@ -130,7 +131,7 @@ static void _user_do_page_fault(struct rt_varea *varea,
 
             if (!(varea->flag & MMF_TEXT))
             {
-                void *cp = rt_pages_alloc(0);
+                void *cp = rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE);
                 if (cp)
                 {
                     memcpy(cp, vaddr, ARCH_PAGE_SIZE);
@@ -220,9 +221,9 @@ int lwp_unmap_user(struct rt_lwp *lwp, void *va)
 static void _dup_varea(rt_varea_t varea, struct rt_lwp *src_lwp,
                        rt_aspace_t dst)
 {
-    void *vaddr = varea->start;
-    void *vend = vaddr + varea->size;
-    if (vaddr < (void *)USER_STACK_VSTART || vaddr >= (void *)USER_STACK_VEND)
+    char *vaddr = varea->start;
+    char *vend = vaddr + varea->size;
+    if (vaddr < (char *)USER_STACK_VSTART || vaddr >= (char *)USER_STACK_VEND)
     {
         while (vaddr != vend)
         {
@@ -430,7 +431,7 @@ void *lwp_map_user_phy(struct rt_lwp *lwp, void *map_va, void *map_pa,
                        size_t map_size, int cached)
 {
     int err;
-    void *va;
+    char *va;
     size_t offset = 0;
 
     if (!map_size)
@@ -458,7 +459,7 @@ void *lwp_map_user_phy(struct rt_lwp *lwp, void *map_va, void *map_pa,
     rt_size_t attr = cached ? MMU_MAP_U_RWCB : MMU_MAP_U_RW;
 
     err =
-        rt_aspace_map_phy(lwp->aspace, &hint, attr, MM_PA_TO_OFF(map_pa), &va);
+        rt_aspace_map_phy(lwp->aspace, &hint, attr, MM_PA_TO_OFF(map_pa), (void **)&va);
     if (err != RT_EOK)
     {
         va = RT_NULL;

+ 2 - 2
components/mm/avl_adpt.c

@@ -78,7 +78,7 @@ static struct rt_varea *search(struct util_avl_root *root,
     {
         rt_varea_t varea = VAREA_ENTRY(node);
         int cmp = compare(range.start, range.end, varea->start,
-                          varea->start + varea->size - 1);
+                          (char *)varea->start + varea->size - 1);
 
         if (cmp < 0)
         {
@@ -118,7 +118,7 @@ rt_varea_t _aspace_bst_search_exceed(struct rt_aspace *aspace, void *start)
         if (cmp < 0)
         {
             /* varae exceed start */
-            ptrdiff_t off = va_s - start;
+            ptrdiff_t off = (char *)va_s - (char *)start;
             if (off < min_off)
             {
                 min_off = off;

+ 3 - 3
components/mm/ioremap.c

@@ -33,12 +33,12 @@ enum ioremap_type
 
 static void *_ioremap_type(void *paddr, size_t size, enum ioremap_type type)
 {
-    void *v_addr = NULL;
+    char *v_addr = NULL;
     size_t attr;
     size_t lo_off;
     int err;
 
-    lo_off = (uintptr_t)paddr & ARCH_PAGE_MASK;
+    lo_off = (rt_ubase_t)paddr & ARCH_PAGE_MASK;
 
     struct rt_mm_va_hint hint = {
         .prefer = RT_NULL,
@@ -62,7 +62,7 @@ static void *_ioremap_type(void *paddr, size_t size, enum ioremap_type type)
     default:
         return v_addr;
     }
-    err = rt_aspace_map_phy(&rt_kernel_space, &hint, attr, MM_PA_TO_OFF(paddr), &v_addr);
+    err = rt_aspace_map_phy(&rt_kernel_space, &hint, attr, MM_PA_TO_OFF(paddr), (void **)&v_addr);
 
     if (err)
     {

+ 38 - 43
components/mm/mm_aspace.c

@@ -134,24 +134,16 @@ static int _do_named_map(rt_aspace_t aspace, void *vaddr, rt_size_t length,
     int err = RT_EOK;
 
     /* it's ensured by caller that (void*)end will not overflow */
-    void *end = vaddr + length;
     void *phyaddr = (void *)(offset << MM_PAGE_SHIFT);
-    while (vaddr != end)
+
+    void *ret = rt_hw_mmu_map(aspace, vaddr, phyaddr, length, attr);
+    if (ret == RT_NULL)
     {
-        /* TODO try to map with huge TLB, when flag & HUGEPAGE */
-        rt_size_t pgsz = ARCH_PAGE_SIZE;
-        void *ret = rt_hw_mmu_map(aspace, vaddr, phyaddr, pgsz, attr);
-        if (ret == RT_NULL)
-        {
-            err = -RT_ERROR;
-            break;
-        }
-        vaddr += pgsz;
-        phyaddr += pgsz;
+        err = -RT_ERROR;
     }
 
     if (err == RT_EOK)
-        rt_hw_tlb_invalidate_range(aspace, end - length, length, ARCH_PAGE_SIZE);
+        rt_hw_tlb_invalidate_range(aspace, vaddr, length, ARCH_PAGE_SIZE);
 
     return err;
 }
@@ -164,7 +156,7 @@ rt_inline void _do_page_fault(struct rt_aspace_fault_msg *msg, rt_size_t off,
     msg->fault_vaddr = vaddr;
     msg->fault_op = MM_FAULT_OP_READ;
     msg->fault_type = MM_FAULT_TYPE_PAGE_FAULT;
-    msg->response.status = -1;
+    msg->response.status = MM_FAULT_STATUS_UNRECOVERABLE;
     msg->response.vaddr = 0;
     msg->response.size = 0;
 
@@ -180,9 +172,9 @@ int _varea_map_with_msg(rt_varea_t varea, struct rt_aspace_fault_msg *msg)
          * the page returned by handler is not checked
          * cause no much assumption can make on it
          */
-        void *store = msg->response.vaddr;
+        char *store = msg->response.vaddr;
         rt_size_t store_sz = msg->response.size;
-        if (msg->fault_vaddr + store_sz > varea->start + varea->size)
+        if ((char *)msg->fault_vaddr + store_sz > (char *)varea->start + varea->size)
         {
             LOG_W("%s: too much (0x%lx) of buffer on vaddr %p is provided",
                     __func__, store_sz, msg->fault_vaddr);
@@ -232,9 +224,9 @@ static int _do_prefetch(rt_aspace_t aspace, rt_varea_t varea, void *start,
     int err = RT_EOK;
 
     /* it's ensured by caller that start & size ara page-aligned */
-    void *end = start + size;
-    void *vaddr = start;
-    rt_size_t off = varea->offset + ((start - varea->start) >> ARCH_PAGE_SHIFT);
+    char *end = (char *)start + size;
+    char *vaddr = start;
+    rt_size_t off = varea->offset + ((vaddr - (char *)varea->start) >> ARCH_PAGE_SHIFT);
 
     while (vaddr != end)
     {
@@ -243,8 +235,10 @@ static int _do_prefetch(rt_aspace_t aspace, rt_varea_t varea, void *start,
         _do_page_fault(&msg, off, vaddr, varea->mem_obj, varea);
 
         if (_varea_map_with_msg(varea, &msg))
+        {
+            err = -RT_ENOMEM;
             break;
-
+        }
         /**
          * It's hard to identify the mapping pattern on a customized handler
          * So we terminate the prefetch process on that case
@@ -386,7 +380,7 @@ rt_varea_t _varea_create(void *start, rt_size_t size)
 }
 
 #define _IS_OVERFLOW(start, length) ((length) > (0ul - (uintptr_t)(start)))
-#define _IS_OVERSIZE(start, length, limit_s, limit_sz) (((length) + (rt_size_t)((start) - (limit_start))) > (limit_size))
+#define _IS_OVERSIZE(start, length, limit_s, limit_sz) (((length) + (rt_size_t)((char *)(start) - (char *)(limit_start))) > (limit_size))
 
 static inline int _not_in_range(void *start, rt_size_t length,
                                 void *limit_start, rt_size_t limit_size)
@@ -449,6 +443,10 @@ int rt_aspace_map(rt_aspace_t aspace, void **addr, rt_size_t length,
             {
                 rt_free(varea);
             }
+            else
+            {
+                *addr = varea->start;
+            }
         }
         else
         {
@@ -461,10 +459,7 @@ int rt_aspace_map(rt_aspace_t aspace, void **addr, rt_size_t length,
     {
         *addr = NULL;
     }
-    else
-    {
-        *addr = varea->start;
-    }
+
     return err;
 }
 
@@ -642,7 +637,7 @@ int rt_aspace_unmap(rt_aspace_t aspace, void *addr)
     if (_not_in_range(addr, 1, aspace->start, aspace->size))
     {
         LOG_I("%s: %lx not in range of aspace[%lx:%lx]", __func__, addr,
-              aspace->start, aspace->start + aspace->size);
+              aspace->start, (char *)aspace->start + aspace->size);
         return -RT_EINVAL;
     }
 
@@ -658,7 +653,7 @@ static inline void *_lower(void *a, void *b)
 
 static inline void *_align(void *va, rt_ubase_t align_mask)
 {
-    return (void *)((rt_ubase_t)(va + ~align_mask) & align_mask);
+    return (void *)((rt_ubase_t)((char *)va + ~align_mask) & align_mask);
 }
 
 static void *_ascending_search(rt_varea_t varea, rt_size_t req_size,
@@ -667,17 +662,17 @@ static void *_ascending_search(rt_varea_t varea, rt_size_t req_size,
     void *ret = RT_NULL;
     while (varea && varea->start < limit.end)
     {
-        void *candidate = varea->start + varea->size;
+        char *candidate = (char *)varea->start + varea->size;
         candidate = _align(candidate, align_mask);
 
-        if (candidate > limit.end || limit.end - candidate + 1 < req_size)
+        if (candidate > (char *)limit.end || (char *)limit.end - candidate + 1 < req_size)
             break;
 
         rt_varea_t nx_va = ASPACE_VAREA_NEXT(varea);
         if (nx_va)
         {
             rt_size_t gap_size =
-                _lower(limit.end, nx_va->start - 1) - candidate + 1;
+                (char *)_lower(limit.end, (char *)nx_va->start - 1) - candidate + 1;
             if (gap_size >= req_size)
             {
                 ret = candidate;
@@ -703,15 +698,15 @@ static void *_find_head_and_asc_search(rt_aspace_t aspace, rt_size_t req_size,
     rt_varea_t varea = _aspace_bst_search_exceed(aspace, limit.start);
     if (varea)
     {
-        void *candidate = _align(limit.start, align_mask);
-        rt_size_t gap_size = varea->start - candidate;
+        char *candidate = _align(limit.start, align_mask);
+        rt_size_t gap_size = (char *)varea->start - candidate;
         if (gap_size >= req_size)
         {
             rt_varea_t former = _aspace_bst_search(aspace, limit.start);
             if (former)
             {
-                candidate = _align(former->start + former->size, align_mask);
-                gap_size = varea->start - candidate;
+                candidate = _align((char *)former->start + former->size, align_mask);
+                gap_size = (char *)varea->start - candidate;
 
                 if (gap_size >= req_size)
                     va = candidate;
@@ -730,12 +725,12 @@ static void *_find_head_and_asc_search(rt_aspace_t aspace, rt_size_t req_size,
     }
     else
     {
-        void *candidate;
+        char *candidate;
         rt_size_t gap_size;
 
         candidate = limit.start;
         candidate = _align(candidate, align_mask);
-        gap_size = limit.end - candidate + 1;
+        gap_size = (char *)limit.end - candidate + 1;
 
         if (gap_size >= req_size)
             va = candidate;
@@ -750,7 +745,7 @@ static void *_find_free(rt_aspace_t aspace, void *prefer, rt_size_t req_size,
 {
     rt_varea_t varea = NULL;
     void *va = RT_NULL;
-    struct _mm_range limit = {limit_start, limit_start + limit_size - 1};
+    struct _mm_range limit = {limit_start, (char *)limit_start + limit_size - 1};
 
     rt_ubase_t align_mask = ~0ul;
     if (flags & MMF_REQUEST_ALIGN)
@@ -762,7 +757,7 @@ static void *_find_free(rt_aspace_t aspace, void *prefer, rt_size_t req_size,
     {
         /* if prefer and free, just return the prefer region */
         prefer = _align(prefer, align_mask);
-        struct _mm_range range = {prefer, prefer + req_size - 1};
+        struct _mm_range range = {prefer, (char *)prefer + req_size - 1};
         varea = _aspace_bst_search_overlap(aspace, range);
 
         if (!varea)
@@ -780,7 +775,7 @@ static void *_find_free(rt_aspace_t aspace, void *prefer, rt_size_t req_size,
             if (va == RT_NULL)
             {
                 /* rewind to first range */
-                limit.end = varea->start - 1;
+                limit.end = (char *)varea->start - 1;
                 va = _find_head_and_asc_search(aspace, req_size, align_mask,
                                                limit);
             }
@@ -798,7 +793,7 @@ int rt_aspace_load_page(rt_aspace_t aspace, void *addr, rt_size_t npage)
 {
     int err = RT_EOK;
     rt_varea_t varea;
-    void *end = addr + (npage << ARCH_PAGE_SHIFT);
+    char *end = (char *)addr + (npage << ARCH_PAGE_SHIFT);
 
     WR_LOCK(aspace);
     varea = _aspace_bst_search(aspace, addr);
@@ -809,7 +804,7 @@ int rt_aspace_load_page(rt_aspace_t aspace, void *addr, rt_size_t npage)
         LOG_W("%s: varea not exist", __func__);
         err = -RT_ENOENT;
     }
-    else if (addr >= end || (rt_size_t)addr & ARCH_PAGE_MASK ||
+    else if ((char *)addr >= end || (rt_size_t)addr & ARCH_PAGE_MASK ||
              _not_in_range(addr, npage << ARCH_PAGE_SHIFT, varea->start,
                            varea->size))
     {
@@ -938,12 +933,12 @@ static int _dump(rt_varea_t varea, void *arg)
 {
     if (varea->mem_obj && varea->mem_obj->get_name)
     {
-        rt_kprintf("[%p - %p] %s\n", varea->start, varea->start + varea->size,
+        rt_kprintf("[%p - %p] %s\n", varea->start, (char *)varea->start + varea->size,
                    varea->mem_obj->get_name(varea));
     }
     else
     {
-        rt_kprintf("[%p - %p] phy-map\n", varea->start, varea->start + varea->size);
+        rt_kprintf("[%p - %p] phy-map\n", varea->start, (char *)varea->start + varea->size);
         rt_kprintf("\t\\_ paddr = %p\n",  varea->offset << MM_PAGE_SHIFT);
     }
     return 0;

+ 1 - 1
components/mm/mm_fault.c

@@ -104,7 +104,7 @@ int rt_aspace_fault_try_fix(struct rt_aspace_fault_msg *msg)
         if (varea)
         {
             void *pa = rt_hw_mmu_v2p(aspace, msg->fault_vaddr);
-            msg->off = (msg->fault_vaddr - varea->start) >> ARCH_PAGE_SHIFT;
+            msg->off = ((char *)msg->fault_vaddr - (char *)varea->start) >> ARCH_PAGE_SHIFT;
 
             /* permission checked by fault op */
             switch (msg->fault_op)

+ 3 - 3
components/mm/mm_object.c

@@ -56,21 +56,21 @@ void rt_varea_pgmgr_pop_all(rt_varea_t varea)
 
 void rt_varea_pgmgr_pop(rt_varea_t varea, void *vaddr, rt_size_t size)
 {
-    void *vend = vaddr + size;
+    void *vend = (char *)vaddr + size;
     while (vaddr != vend)
     {
         rt_page_t page = rt_page_addr2page(vaddr);
         page->pre->next = page->next;
         page->next->pre = page->pre;
         rt_pages_free(vaddr, 0);
-        vaddr += ARCH_PAGE_SIZE;
+        vaddr = (char *)vaddr + ARCH_PAGE_SIZE;
     }
 }
 
 static void on_page_fault(struct rt_varea *varea, struct rt_aspace_fault_msg *msg)
 {
     void *page;
-    page = rt_pages_alloc(0);
+    page = rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE);
 
     if (!page)
     {

+ 309 - 107
components/mm/mm_page.c

@@ -8,6 +8,7 @@
  * 2019-11-01     Jesven       The first version
  * 2022-12-13     WangXiaoyao  Hot-pluggable, extensible
  *                             page management algorithm
+ * 2023-02-20     WangXiaoyao  Multi-list page-management
  */
 #include <rtthread.h>
 
@@ -39,7 +40,8 @@ static void *init_mpr_cont_start;
 
 static struct rt_varea mpr_varea;
 
-static struct rt_page *page_list[RT_PAGE_MAX_ORDER];
+static struct rt_page *page_list_low[RT_PAGE_MAX_ORDER];
+static struct rt_page *page_list_high[RT_PAGE_MAX_ORDER];
 
 #define page_start ((rt_page_t)rt_mpr_start)
 
@@ -61,21 +63,18 @@ static void hint_free(rt_mm_va_hint_t hint)
 
 static void on_page_fault(struct rt_varea *varea, struct rt_aspace_fault_msg *msg)
 {
-    void *init_start = (void *)init_mpr_align_start;
-    void *init_end = (void *)init_mpr_align_end;
-    if (msg->fault_vaddr < init_end && msg->fault_vaddr >= init_start)
+    char *init_start = (void *)init_mpr_align_start;
+    char *init_end = (void *)init_mpr_align_end;
+    if ((char *)msg->fault_vaddr < init_end && (char *)msg->fault_vaddr >= init_start)
     {
-        rt_size_t offset = msg->fault_vaddr - init_start;
+        rt_size_t offset = (char *)msg->fault_vaddr - init_start;
         msg->response.status = MM_FAULT_STATUS_OK;
-        msg->response.vaddr = init_mpr_cont_start + offset;
+        msg->response.vaddr = (char *)init_mpr_cont_start + offset;
         msg->response.size = ARCH_PAGE_SIZE;
     }
     else
     {
-        void *raw_page = rt_pages_alloc(0);
-        msg->response.status = MM_FAULT_STATUS_OK;
-        msg->response.vaddr = raw_page;
-        msg->response.size = ARCH_PAGE_SIZE;
+        rt_mm_dummy_mapper.on_page_fault(varea, msg);
     }
 }
 
@@ -85,15 +84,123 @@ static struct rt_mem_obj mm_page_mapper = {
     .hint_free = hint_free,
 };
 
+#ifdef RT_DEBUG_PAGE_LEAK
+static volatile int enable;
+static rt_page_t _trace_head;
+#define TRACE_ALLOC(pg, size)       _trace_alloc(pg, __builtin_return_address(0), size)
+#define TRACE_FREE(pgaddr, size)    _trace_free(pgaddr, __builtin_return_address(0), size)
+
+void rt_page_leak_trace_start()
+{
+    // TODO multicore safety
+    _trace_head = NULL;
+    enable = 1;
+}
+MSH_CMD_EXPORT(rt_page_leak_trace_start, start page leak tracer);
+
+static void _collect()
+{
+    rt_page_t page = _trace_head;
+    if (!page)
+    {
+        rt_kputs("ok!\n");
+    }
+
+    while (page)
+    {
+        rt_page_t next = page->next;
+        void *pg_va = rt_page_page2addr(page);
+        LOG_W("LEAK: %p, allocator: %p, size bits: %lx", pg_va, page->caller, page->trace_size);
+        rt_pages_free(pg_va, page->trace_size);
+        page = next;
+    }
+}
+
+void rt_page_leak_trace_stop()
+{
+    // TODO multicore safety
+    enable = 0;
+    _collect();
+}
+MSH_CMD_EXPORT(rt_page_leak_trace_stop, stop page leak tracer);
+
+static void _trace_alloc(rt_page_t page, void *caller, size_t size_bits)
+{
+    if (enable)
+    {
+        page->caller = caller;
+        page->trace_size = size_bits;
+        page->tl_prev = NULL;
+        page->tl_next = NULL;
+
+        if (_trace_head == NULL)
+        {
+            _trace_head = page;
+        }
+        else
+        {
+            _trace_head->tl_prev = page;
+            page->tl_next = _trace_head;
+            _trace_head = page;
+        }
+    }
+}
+
+void _report(rt_page_t page, size_bits, char *msg)
+{
+    void *pg_va = rt_page_page2addr(page);
+    LOG_W("%s: %p, allocator: %p, size bits: %lx", msg, pg_va, page->caller, page->trace_size);
+    rt_kputs("backtrace\n");
+    rt_hw_backtrace(0, 0);
+}
+
+static void _trace_free(rt_page_t page, void *caller, size_t size_bits)
+{
+    if (enable)
+    {
+        /* free after free */
+        if (page->trace_size == 0xabadcafe)
+        {
+            _report("free after free")
+            return ;
+        }
+        else if (page->trace_size != size_bits)
+        {
+            rt_kprintf("free with size bits %lx\n", size_bits);
+            _report("incompatible size bits parameter");
+            return ;
+        }
+
+        if (page->ref_cnt == 1)
+        {
+            if (page->tl_prev)
+                page->tl_prev->tl_next = page->tl_next;
+            if (page->tl_next)
+                page->tl_next->tl_prev = page->tl_prev;
+
+            if (page == _trace_head)
+                _trace_head = page->next;
+
+            page->tl_prev = NULL;
+            page->tl_next = NULL;
+            page->trace_size = 0xabadcafe;
+        }
+    }
+}
+#else
+#define TRACE_ALLOC(x, y)
+#define TRACE_FREE(x, y)
+#endif
+
 static inline void *page_to_addr(rt_page_t page)
 {
-    return (void *)((page - page_start) << ARCH_PAGE_SHIFT) - PV_OFFSET;
+    return (void *)(((page - page_start) << ARCH_PAGE_SHIFT) - PV_OFFSET);
 }
 
 static inline rt_page_t addr_to_page(rt_page_t pg_start, void *addr)
 {
-    addr += PV_OFFSET;
-    return &pg_start[((uintptr_t)addr >> ARCH_PAGE_SHIFT)];
+    addr = (char *)addr + PV_OFFSET;
+    return &pg_start[((rt_ubase_t)addr >> ARCH_PAGE_SHIFT)];
 }
 
 #define FLOOR(val, align) (((rt_size_t)(val) + (align)-1) & ~((align)-1))
@@ -143,7 +250,7 @@ void *rt_page_page2addr(struct rt_page *p)
     return page_to_addr(p);
 }
 
-static inline struct rt_page *buddy_get(struct rt_page *p,
+static inline struct rt_page *_buddy_get(struct rt_page *p,
                                         rt_uint32_t size_bits)
 {
     rt_size_t addr;
@@ -153,7 +260,7 @@ static inline struct rt_page *buddy_get(struct rt_page *p,
     return rt_page_addr2page((void *)addr);
 }
 
-static void page_remove(struct rt_page *p, rt_uint32_t size_bits)
+static void _page_remove(rt_page_t page_list[], struct rt_page *p, rt_uint32_t size_bits)
 {
     if (p->pre)
     {
@@ -172,7 +279,7 @@ static void page_remove(struct rt_page *p, rt_uint32_t size_bits)
     p->size_bits = ARCH_ADDRESS_WIDTH_BITS;
 }
 
-static void page_insert(struct rt_page *p, rt_uint32_t size_bits)
+static void _page_insert(rt_page_t page_list[], struct rt_page *p, rt_uint32_t size_bits)
 {
     p->next = page_list[size_bits];
     if (p->next)
@@ -194,7 +301,7 @@ static void _pages_ref_inc(struct rt_page *p, rt_uint32_t size_bits)
     idx = idx & ~((1UL << size_bits) - 1);
 
     page_head = page_start + idx;
-    page_head = (void *)page_head + early_offset;
+    page_head = (void *)((char *)page_head + early_offset);
     page_head->ref_cnt++;
 }
 
@@ -211,13 +318,13 @@ static int _pages_ref_get(struct rt_page *p, rt_uint32_t size_bits)
     return page_head->ref_cnt;
 }
 
-static int _pages_free(struct rt_page *p, rt_uint32_t size_bits)
+static int _pages_free(rt_page_t page_list[], struct rt_page *p, rt_uint32_t size_bits)
 {
     rt_uint32_t level = size_bits;
     struct rt_page *buddy;
 
     RT_ASSERT(p >= page_start);
-    RT_ASSERT((void *)p < rt_mpr_start + rt_mpr_size);
+    RT_ASSERT((char *)p < (char *)rt_mpr_start + rt_mpr_size);
     RT_ASSERT(rt_kmem_v2p(p));
     RT_ASSERT(p->ref_cnt > 0);
     RT_ASSERT(p->size_bits == ARCH_ADDRESS_WIDTH_BITS);
@@ -231,10 +338,10 @@ static int _pages_free(struct rt_page *p, rt_uint32_t size_bits)
 
     while (level < RT_PAGE_MAX_ORDER - 1)
     {
-        buddy = buddy_get(p, level);
+        buddy = _buddy_get(p, level);
         if (buddy && buddy->size_bits == level)
         {
-            page_remove(buddy, level);
+            _page_remove(page_list, buddy, level);
             p = (p < buddy) ? p : buddy;
             level++;
         }
@@ -243,18 +350,18 @@ static int _pages_free(struct rt_page *p, rt_uint32_t size_bits)
             break;
         }
     }
-    page_insert(p, level);
+    _page_insert(page_list, p, level);
     return 1;
 }
 
-static struct rt_page *_pages_alloc(rt_uint32_t size_bits)
+static struct rt_page *_pages_alloc(rt_page_t page_list[], rt_uint32_t size_bits)
 {
     struct rt_page *p;
 
     if (page_list[size_bits])
     {
         p = page_list[size_bits];
-        page_remove(p, size_bits);
+        _page_remove(page_list, p, size_bits);
     }
     else
     {
@@ -273,11 +380,11 @@ static struct rt_page *_pages_alloc(rt_uint32_t size_bits)
         }
 
         p = page_list[level];
-        page_remove(p, level);
+        _page_remove(page_list, p, level);
         while (level > size_bits)
         {
-            page_insert(p, level - 1);
-            p = buddy_get(p, level - 1);
+            _page_insert(page_list, p, level - 1);
+            p = _buddy_get(p, level - 1);
             level--;
         }
     }
@@ -286,12 +393,12 @@ static struct rt_page *_pages_alloc(rt_uint32_t size_bits)
     return p;
 }
 
-static void _early_page_remove(rt_page_t page, rt_uint32_t size_bits)
+static void _early_page_remove(rt_page_t page_list[], rt_page_t page, rt_uint32_t size_bits)
 {
-    rt_page_t page_cont = (void *)page + early_offset;
+    rt_page_t page_cont = (rt_page_t)((char *)page + early_offset);
     if (page_cont->pre)
     {
-        rt_page_t pre_cont = (void *)page_cont->pre + early_offset;
+        rt_page_t pre_cont = (rt_page_t)((char *)page_cont->pre + early_offset);
         pre_cont->next = page_cont->next;
     }
     else
@@ -301,23 +408,23 @@ static void _early_page_remove(rt_page_t page, rt_uint32_t size_bits)
 
     if (page_cont->next)
     {
-        rt_page_t next_cont = (void *)page_cont->next + early_offset;
+        rt_page_t next_cont = (rt_page_t)((char *)page_cont->next + early_offset);
         next_cont->pre = page_cont->pre;
     }
 
     page_cont->size_bits = ARCH_ADDRESS_WIDTH_BITS;
 }
 
-static void _early_page_insert(rt_page_t page, int size_bits)
+static void _early_page_insert(rt_page_t page_list[], rt_page_t page, int size_bits)
 {
     RT_ASSERT((void *)page >= rt_mpr_start &&
-              (void *)page - rt_mpr_start < +rt_mpr_size);
-    rt_page_t page_cont = (void *)page + early_offset;
+              ((char *)page - (char *)rt_mpr_start) < rt_mpr_size);
+    rt_page_t page_cont = (rt_page_t)((char *)page + early_offset);
 
     page_cont->next = page_list[size_bits];
     if (page_cont->next)
     {
-        rt_page_t next_cont = (void *)page_cont->next + early_offset;
+        rt_page_t next_cont = (rt_page_t)((char *)page_cont->next + early_offset);
         next_cont->pre = page;
     }
     page_cont->pre = 0;
@@ -325,14 +432,14 @@ static void _early_page_insert(rt_page_t page, int size_bits)
     page_cont->size_bits = size_bits;
 }
 
-static struct rt_page *_early_pages_alloc(rt_uint32_t size_bits)
+static struct rt_page *_early_pages_alloc(rt_page_t page_list[], rt_uint32_t size_bits)
 {
     struct rt_page *p;
 
     if (page_list[size_bits])
     {
         p = page_list[size_bits];
-        _early_page_remove(p, size_bits);
+        _early_page_remove(page_list, p, size_bits);
     }
     else
     {
@@ -351,20 +458,35 @@ static struct rt_page *_early_pages_alloc(rt_uint32_t size_bits)
         }
 
         p = page_list[level];
-        _early_page_remove(p, level);
+        _early_page_remove(page_list, p, level);
         while (level > size_bits)
         {
-            _early_page_insert(p, level - 1);
-            p = buddy_get(p, level - 1);
+            _early_page_insert(page_list, p, level - 1);
+            p = _buddy_get(p, level - 1);
             level--;
         }
     }
-    rt_page_t page_cont = (void *)p + early_offset;
+    rt_page_t page_cont = (rt_page_t)((char *)p + early_offset);
     page_cont->size_bits = ARCH_ADDRESS_WIDTH_BITS;
     page_cont->ref_cnt = 1;
     return p;
 }
 
+static rt_page_t *_get_page_list(void *vaddr)
+{
+    rt_ubase_t pa_int = (rt_ubase_t)vaddr + PV_OFFSET;
+    rt_page_t *list;
+    if (pa_int > UINT32_MAX)
+    {
+        list = page_list_high;
+    }
+    else
+    {
+        list = page_list_low;
+    }
+    return list;
+}
+
 int rt_page_ref_get(void *addr, rt_uint32_t size_bits)
 {
     struct rt_page *p;
@@ -389,27 +511,73 @@ void rt_page_ref_inc(void *addr, rt_uint32_t size_bits)
     rt_hw_interrupt_enable(level);
 }
 
-static rt_page_t (*pages_alloc_handler)(rt_uint32_t size_bits);
+static rt_page_t (*pages_alloc_handler)(rt_page_t page_list[], rt_uint32_t size_bits);
 
-void *rt_pages_alloc(rt_uint32_t size_bits)
+/* if not, we skip the finding on page_list_high */
+static size_t _high_page_configured = 0;
+
+static rt_page_t *_flag_to_page_list(size_t flags)
+{
+    rt_page_t *page_list;
+    if (_high_page_configured && (flags & PAGE_ANY_AVAILABLE))
+    {
+        page_list = page_list_high;
+    }
+    else
+    {
+        page_list = page_list_low;
+    }
+    return page_list;
+}
+
+static void *_do_pages_alloc(rt_uint32_t size_bits, size_t flags)
 {
     void *alloc_buf = RT_NULL;
     struct rt_page *p;
     rt_base_t level;
+    rt_page_t *page_list = _flag_to_page_list(flags);
 
     level = rt_hw_interrupt_disable();
-    p = pages_alloc_handler(size_bits);
+    p = pages_alloc_handler(page_list, size_bits);
     rt_hw_interrupt_enable(level);
+
+    if (!p && page_list != page_list_low)
+    {
+        /* fall back */
+        page_list = page_list_low;
+
+        level = rt_hw_interrupt_disable();
+        p = pages_alloc_handler(page_list, size_bits);
+        rt_hw_interrupt_enable(level);
+    }
+
     if (p)
     {
         alloc_buf = page_to_addr(p);
+
+        #ifdef RT_DEBUG_PAGE_LEAK
+            level = rt_hw_interrupt_disable();
+            TRACE_ALLOC(p, size_bits);
+            rt_hw_interrupt_enable(level);
+        #endif
     }
     return alloc_buf;
 }
 
+void *rt_pages_alloc(rt_uint32_t size_bits)
+{
+    return _do_pages_alloc(size_bits, 0);
+}
+
+void *rt_pages_alloc_ext(rt_uint32_t size_bits, size_t flags)
+{
+    return _do_pages_alloc(size_bits, flags);
+}
+
 int rt_pages_free(void *addr, rt_uint32_t size_bits)
 {
     struct rt_page *p;
+    rt_page_t *page_list = _get_page_list(addr);
     int real_free = 0;
 
     p = rt_page_addr2page(addr);
@@ -417,14 +585,18 @@ int rt_pages_free(void *addr, rt_uint32_t size_bits)
     {
         rt_base_t level;
         level = rt_hw_interrupt_disable();
-        real_free = _pages_free(p, size_bits);
+        real_free = _pages_free(page_list, p, size_bits);
+        if (real_free)
+            TRACE_FREE(p, size_bits);
         rt_hw_interrupt_enable(level);
     }
+
     return real_free;
 }
 
 void rt_page_list(void) __attribute__((alias("list_page")));
 
+#warning TODO: improve list page
 void list_page(void)
 {
     int i;
@@ -435,7 +607,7 @@ void list_page(void)
 
     for (i = 0; i < RT_PAGE_MAX_ORDER; i++)
     {
-        struct rt_page *p = page_list[i];
+        struct rt_page *p = page_list_low[i];
 
         rt_kprintf("level %d ", i);
 
@@ -447,6 +619,21 @@ void list_page(void)
         }
         rt_kprintf("\n");
     }
+    for (i = 0; i < RT_PAGE_MAX_ORDER; i++)
+    {
+        struct rt_page *p = page_list_high[i];
+
+        rt_kprintf("level %d ", i);
+
+        while (p)
+        {
+            total += (1UL << i);
+            rt_kprintf("[0x%08p]", rt_page_page2addr(p));
+            p = p->next;
+        }
+        rt_kprintf("\n");
+    }
+
     rt_hw_interrupt_enable(level);
     rt_kprintf("free pages is 0x%08lx (%ld KB)\n", total, total * ARCH_PAGE_SIZE / 1024);
     rt_kprintf("-------------------------------\n");
@@ -462,7 +649,17 @@ void rt_page_get_info(rt_size_t *total_nr, rt_size_t *free_nr)
     level = rt_hw_interrupt_disable();
     for (i = 0; i < RT_PAGE_MAX_ORDER; i++)
     {
-        struct rt_page *p = page_list[i];
+        struct rt_page *p = page_list_low[i];
+
+        while (p)
+        {
+            total_free += (1UL << i);
+            p = p->next;
+        }
+    }
+    for (i = 0; i < RT_PAGE_MAX_ORDER; i++)
+    {
+        struct rt_page *p = page_list_high[i];
 
         while (p)
         {
@@ -475,6 +672,62 @@ void rt_page_get_info(rt_size_t *total_nr, rt_size_t *free_nr)
     *free_nr = total_free;
 }
 
+static void _install_page(rt_page_t mpr_head, rt_region_t region, void *insert_handler)
+{
+    void (*insert)(rt_page_t *page_list, rt_page_t page, int size_bits) = insert_handler;
+    rt_region_t shadow;
+    shadow.start = region.start & ~shadow_mask;
+    shadow.end = FLOOR(region.end, shadow_mask + 1);
+
+    if (shadow.end > UINT32_MAX)
+        _high_page_configured = 1;
+
+    rt_page_t shad_head = addr_to_page(mpr_head, (void *)shadow.start);
+    rt_page_t shad_tail = addr_to_page(mpr_head, (void *)shadow.end);
+    rt_page_t head = addr_to_page(mpr_head, (void *)region.start);
+    rt_page_t tail = addr_to_page(mpr_head, (void *)region.end);
+
+    /* mark shadow pages as illegal */
+    for (rt_page_t iter = shad_head; iter < head; iter++)
+    {
+        iter->size_bits = ARCH_ADDRESS_WIDTH_BITS;
+    }
+    for (rt_page_t iter = tail; iter < shad_tail; iter++)
+    {
+        iter->size_bits = ARCH_ADDRESS_WIDTH_BITS;
+    }
+
+    /* insert reserved pages to list */
+    const int max_order = RT_PAGE_MAX_ORDER + ARCH_PAGE_SHIFT - 1;
+    while (region.start != region.end)
+    {
+        struct rt_page *p;
+        int align_bits;
+        int size_bits;
+
+        size_bits =
+            ARCH_ADDRESS_WIDTH_BITS - 1 - rt_hw_clz(region.end - region.start);
+        align_bits = rt_hw_ctz(region.start);
+        if (align_bits < size_bits)
+        {
+            size_bits = align_bits;
+        }
+        if (size_bits > max_order)
+        {
+            size_bits = max_order;
+        }
+
+        p = addr_to_page(mpr_head, (void *)region.start);
+        p->size_bits = ARCH_ADDRESS_WIDTH_BITS;
+        p->ref_cnt = 0;
+
+        /* insert to list */
+        rt_page_t *page_list = _get_page_list((void *)region.start);
+        insert(page_list, (rt_page_t)((char *)p - early_offset), size_bits - ARCH_PAGE_SHIFT);
+        region.start += (1UL << size_bits);
+    }
+}
+
 void rt_page_init(rt_region_t reg)
 {
     int i;
@@ -500,7 +753,8 @@ void rt_page_init(rt_region_t reg)
     /* init free list */
     for (i = 0; i < RT_PAGE_MAX_ORDER; i++)
     {
-        page_list[i] = 0;
+        page_list_low[i] = 0;
+        page_list_high[i] = 0;
     }
 
     /* map MPR area */
@@ -524,9 +778,9 @@ void rt_page_init(rt_region_t reg)
     rt_size_t init_mpr_npage = init_mpr_size >> ARCH_PAGE_SHIFT;
 
     init_mpr_cont_start = (void *)reg.start;
-    void *init_mpr_cont_end = init_mpr_cont_start + init_mpr_size;
-    early_offset = init_mpr_cont_start - (void *)init_mpr_align_start;
-    rt_page_t mpr_cont = rt_mpr_start + early_offset;
+    rt_size_t init_mpr_cont_end = (rt_size_t)init_mpr_cont_start + init_mpr_size;
+    early_offset = (rt_size_t)init_mpr_cont_start - init_mpr_align_start;
+    rt_page_t mpr_cont = (void *)((char *)rt_mpr_start + early_offset);
 
     /* mark init mpr pages as illegal */
     rt_page_t head_cont = addr_to_page(mpr_cont, (void *)reg.start);
@@ -536,48 +790,8 @@ void rt_page_init(rt_region_t reg)
         iter->size_bits = ARCH_ADDRESS_WIDTH_BITS;
     }
 
-    /* mark shadow pages as illegal */
-    rt_page_t shad_head_cont = addr_to_page(mpr_cont, (void *)shadow.start);
-    for (rt_page_t iter = shad_head_cont; iter < head_cont; iter++)
-    {
-        iter->size_bits = ARCH_ADDRESS_WIDTH_BITS;
-    }
-    rt_page_t shad_tail_cont = addr_to_page(mpr_cont, (void *)shadow.end);
-    for (rt_page_t iter = tail_cont; iter < shad_tail_cont; iter++)
-    {
-        iter->size_bits = ARCH_ADDRESS_WIDTH_BITS;
-    }
-
-    /* insert reserved pages to list */
-    reg.start = (rt_size_t)init_mpr_cont_end;
-    const int max_order = RT_PAGE_MAX_ORDER + ARCH_PAGE_SHIFT - 1;
-    while (reg.start != reg.end)
-    {
-        struct rt_page *p;
-        int align_bits;
-        int size_bits;
-
-        size_bits =
-            ARCH_ADDRESS_WIDTH_BITS - 1 - rt_hw_clz(reg.end - reg.start);
-        align_bits = rt_hw_ctz(reg.start);
-        if (align_bits < size_bits)
-        {
-            size_bits = align_bits;
-        }
-        if (size_bits > max_order)
-        {
-            size_bits = max_order;
-        }
-
-        p = addr_to_page(mpr_cont, (void *)reg.start);
-        p->size_bits = ARCH_ADDRESS_WIDTH_BITS;
-        p->ref_cnt = 0;
-
-        /* insert to list */
-        _early_page_insert((void *)p - early_offset,
-                           size_bits - ARCH_PAGE_SHIFT);
-        reg.start += (1UL << size_bits);
-    }
+    reg.start = init_mpr_cont_end;
+    _install_page(mpr_cont, reg, _early_page_insert);
 
     pages_alloc_handler = _early_pages_alloc;
     /* doing the page table bushiness */
@@ -594,7 +808,7 @@ void rt_page_init(rt_region_t reg)
 static int _load_mpr_area(void *head, void *tail)
 {
     int err = 0;
-    void *iter = (void *)((uintptr_t)head & ~ARCH_PAGE_MASK);
+    char *iter = (char *)((rt_ubase_t)head & ~ARCH_PAGE_MASK);
     tail = (void *)FLOOR(tail, ARCH_PAGE_SIZE);
 
     while (iter != tail)
@@ -630,19 +844,7 @@ int rt_page_install(rt_region_t region)
 
         if (err == RT_EOK)
         {
-            while (region.start != region.end)
-            {
-                struct rt_page *p;
-                int size_bits;
-
-                size_bits = RT_PAGE_MAX_ORDER - 1;
-                p = addr_to_page(page_start, (void *)region.start);
-                p->size_bits = ARCH_ADDRESS_WIDTH_BITS;
-                p->ref_cnt = 1;
-
-                _pages_free(p, size_bits);
-                region.start += (1UL << (size_bits + ARCH_PAGE_SHIFT));
-            }
+            _install_page(rt_mpr_start, region, _page_insert);
         }
     }
     return err;

+ 31 - 0
components/mm/mm_page.h

@@ -23,10 +23,35 @@
         union {struct {fields}; char _padding[GET_FLOOR(struct {fields})];};\
     } *rt_page_t
 
+/**
+ * @brief PAGE ALLOC FLAGS
+ *
+ * @info PAGE_ANY_AVAILABLE
+ * page allocation default to use lower region, this behavior can change by setting
+ * PAGE_ANY_AVAILABLE
+ */
+
+#define PAGE_ANY_AVAILABLE 0x1ul
+
+
+#ifdef RT_DEBUG_PAGE_LEAK
+#define DEBUG_FIELD {           \
+    /* trace list */            \
+    struct rt_page *tl_next;    \
+    struct rt_page *tl_prev;    \
+    void *caller;               \
+    size_t trace_size;          \
+}
+#else
+#define DEBUG_FIELD
+#endif
+
 DEF_PAGE_T(
     struct rt_page *next;   /* same level next */
     struct rt_page *pre;    /* same level pre  */
 
+    DEBUG_FIELD
+
     rt_uint32_t size_bits;     /* if is ARCH_ADDRESS_WIDTH_BITS, means not free */
     rt_uint32_t ref_cnt;       /* page group ref count */
 );
@@ -49,6 +74,8 @@ void rt_page_cleanup(void);
 
 void *rt_pages_alloc(rt_uint32_t size_bits);
 
+void *rt_pages_alloc_ext(rt_uint32_t size_bits, size_t flags);
+
 void rt_page_ref_inc(void *addr, rt_uint32_t size_bits);
 
 int rt_page_ref_get(void *addr, rt_uint32_t size_bits);
@@ -78,4 +105,8 @@ struct rt_page *rt_page_addr2page(void *addr);
  */
 int rt_page_install(rt_region_t region);
 
+void rt_page_leak_trace_start(void);
+
+void rt_page_leak_trace_stop(void);
+
 #endif /* __MM_PAGE_H__ */

+ 4 - 1
libcpu/aarch64/common/cache.h

@@ -23,7 +23,10 @@ void rt_hw_cpu_dcache_invalidate(void *start_addr, unsigned long size);
 
 static inline void rt_hw_icache_invalidate_all(void)
 {
-    __asm_invalidate_icache_all();
+    /* wait for any modification complete */
+    __asm__ volatile ("dsb ishst");
+    __asm__ volatile ("ic iallu");
+    __asm__ volatile ("isb");
 }
 
 void rt_hw_cpu_icache_invalidate(void *addr, rt_size_t size);

+ 4 - 1
libcpu/aarch64/common/cpu.c

@@ -132,7 +132,10 @@ static rt_uint64_t _read_be_number(void *start, int size)
 {
     rt_uint64_t buf = 0;
     for (; size > 0; size--)
-        buf = (buf << 32) | fdt32_to_cpu(*(uint32_t *)start++);
+    {
+        buf = (buf << 32) | fdt32_to_cpu(*(uint32_t *)start);
+        start = (uint32_t *)start + 1;
+    }
     return buf;
 }
 

+ 134 - 39
libcpu/aarch64/common/mmu.c

@@ -1,13 +1,14 @@
 /*
- * Copyright (c) 2006-2018, RT-Thread Development Team
+ * Copyright (c) 2006-2023, RT-Thread Development Team
  *
  * SPDX-License-Identifier: Apache-2.0
  *
  * Change Logs:
  * Date           Author       Notes
  * 2012-01-10     bernard      porting to AM1808
+ * 2021-11-28     GuEe-GUI     first version
+ * 2022-12-10     WangXiaoyao  porting to MM
  */
-
 #include <board.h>
 #include <rthw.h>
 #include <rtthread.h>
@@ -79,6 +80,7 @@ static void _kenrel_unmap_4K(unsigned long *lv0_tbl, void *v_addr)
         {
             break;
         }
+        /* next table entry in current level */
         level_info[level].pos = cur_lv_tbl + off;
         cur_lv_tbl = (unsigned long *)(page & MMU_ADDRESS_MASK);
         cur_lv_tbl = (unsigned long *)((unsigned long)cur_lv_tbl - PV_OFFSET);
@@ -119,8 +121,7 @@ static void _kenrel_unmap_4K(unsigned long *lv0_tbl, void *v_addr)
     return;
 }
 
-static int _kenrel_map_4K(unsigned long *lv0_tbl, void *vaddr, void *paddr,
-                          unsigned long attr)
+static int _kernel_map_4K(unsigned long *lv0_tbl, void *vaddr, void *paddr, unsigned long attr)
 {
     int ret = 0;
     int level;
@@ -145,7 +146,7 @@ static int _kenrel_map_4K(unsigned long *lv0_tbl, void *vaddr, void *paddr,
         off &= MMU_LEVEL_MASK;
         if (!(cur_lv_tbl[off] & MMU_TYPE_USED))
         {
-            page = (unsigned long)rt_pages_alloc(0);
+            page = (unsigned long)rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE);
             if (!page)
             {
                 ret = MMU_MAP_ERROR_NOPAGE;
@@ -188,19 +189,104 @@ err:
     return ret;
 }
 
+static int _kernel_map_2M(unsigned long *lv0_tbl, void *vaddr, void *paddr, unsigned long attr)
+{
+    int ret = 0;
+    int level;
+    unsigned long *cur_lv_tbl = lv0_tbl;
+    unsigned long page;
+    unsigned long off;
+    unsigned long va = (unsigned long)vaddr;
+    unsigned long pa = (unsigned long)paddr;
+
+    int level_shift = MMU_ADDRESS_BITS;
+
+    if (va & ARCH_SECTION_MASK)
+    {
+        return MMU_MAP_ERROR_VANOTALIGN;
+    }
+    if (pa & ARCH_SECTION_MASK)
+    {
+        return MMU_MAP_ERROR_PANOTALIGN;
+    }
+    for (level = 0; level < MMU_TBL_BLOCK_2M_LEVEL; level++)
+    {
+        off = (va >> level_shift);
+        off &= MMU_LEVEL_MASK;
+        if (!(cur_lv_tbl[off] & MMU_TYPE_USED))
+        {
+            page = (unsigned long)rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE);
+            if (!page)
+            {
+                ret = MMU_MAP_ERROR_NOPAGE;
+                goto err;
+            }
+            rt_memset((char *)page, 0, ARCH_PAGE_SIZE);
+            rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, (void *)page, ARCH_PAGE_SIZE);
+            cur_lv_tbl[off] = (page + PV_OFFSET) | MMU_TYPE_TABLE;
+            rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, cur_lv_tbl + off, sizeof(void *));
+        }
+        else
+        {
+            page = cur_lv_tbl[off];
+            page &= MMU_ADDRESS_MASK;
+            /* page to va */
+            page -= PV_OFFSET;
+            rt_page_ref_inc((void *)page, 0);
+        }
+        page = cur_lv_tbl[off];
+        if ((page & MMU_TYPE_MASK) == MMU_TYPE_BLOCK)
+        {
+            /* is block! error! */
+            ret = MMU_MAP_ERROR_CONFLICT;
+            goto err;
+        }
+        cur_lv_tbl = (unsigned long *)(page & MMU_ADDRESS_MASK);
+        cur_lv_tbl = (unsigned long *)((unsigned long)cur_lv_tbl - PV_OFFSET);
+        level_shift -= MMU_LEVEL_SHIFT;
+    }
+    /* now is level page */
+    attr &= MMU_ATTRIB_MASK;
+    pa |= (attr | MMU_TYPE_BLOCK); /* block */
+    off = (va >> ARCH_SECTION_SHIFT);
+    off &= MMU_LEVEL_MASK;
+    cur_lv_tbl[off] = pa;
+    rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, cur_lv_tbl + off, sizeof(void *));
+    return ret;
+err:
+    _kenrel_unmap_4K(lv0_tbl, (void *)va);
+    return ret;
+}
+
 void *rt_hw_mmu_map(rt_aspace_t aspace, void *v_addr, void *p_addr, size_t size,
                     size_t attr)
 {
     int ret = -1;
 
     void *unmap_va = v_addr;
-    size_t npages = size >> ARCH_PAGE_SHIFT;
+    size_t npages;
+    size_t stride;
+    int (*mapper)(unsigned long *lv0_tbl, void *vaddr, void *paddr, unsigned long attr);
+
+    if (((rt_ubase_t)v_addr & ARCH_SECTION_MASK) || (size & ARCH_SECTION_MASK))
+    {
+        /* legacy 4k mapping */
+        npages = size >> ARCH_PAGE_SHIFT;
+        stride = ARCH_PAGE_SIZE;
+        mapper = _kernel_map_4K;
+    }
+    else
+    {
+        /* 2m huge page */
+        npages = size >> ARCH_SECTION_SHIFT;
+        stride = ARCH_SECTION_SIZE;
+        mapper = _kernel_map_2M;
+    }
 
-    // TODO trying with HUGEPAGE here
     while (npages--)
     {
         MM_PGTBL_LOCK(aspace);
-        ret = _kenrel_map_4K(aspace->page_table, v_addr, p_addr, attr);
+        ret = mapper(aspace->page_table, v_addr, p_addr, attr);
         MM_PGTBL_UNLOCK(aspace);
 
         if (ret != 0)
@@ -213,12 +299,12 @@ void *rt_hw_mmu_map(rt_aspace_t aspace, void *v_addr, void *p_addr, size_t size,
                 MM_PGTBL_LOCK(aspace);
                 _kenrel_unmap_4K(aspace->page_table, (void *)unmap_va);
                 MM_PGTBL_UNLOCK(aspace);
-                unmap_va += ARCH_PAGE_SIZE;
+                unmap_va = (char *)unmap_va + stride;
             }
             break;
         }
-        v_addr += ARCH_PAGE_SIZE;
-        p_addr += ARCH_PAGE_SIZE;
+        v_addr = (char *)v_addr + stride;
+        p_addr = (char *)p_addr + stride;
     }
 
     if (ret == 0)
@@ -244,7 +330,7 @@ void rt_hw_mmu_unmap(rt_aspace_t aspace, void *v_addr, size_t size)
         MM_PGTBL_LOCK(aspace);
         _kenrel_unmap_4K(aspace->page_table, v_addr);
         MM_PGTBL_UNLOCK(aspace);
-        v_addr += ARCH_PAGE_SIZE;
+        v_addr = (char *)v_addr + ARCH_PAGE_SIZE;
     }
 }
 
@@ -254,7 +340,7 @@ void rt_hw_aspace_switch(rt_aspace_t aspace)
     {
         void *pgtbl = aspace->page_table;
         pgtbl = rt_kmem_v2p(pgtbl);
-        uintptr_t tcr;
+        rt_ubase_t tcr;
 
         __asm__ volatile("msr ttbr0_el1, %0" ::"r"(pgtbl) : "memory");
 
@@ -337,20 +423,19 @@ void rt_hw_mmu_setup(rt_aspace_t aspace, struct mem_desc *mdesc, int desc_nr)
     rt_page_cleanup();
 }
 
-
 #ifdef RT_USING_SMART
-static inline void _init_region(void *vaddr, size_t size)
+static void _init_region(void *vaddr, size_t size)
 {
     rt_ioremap_start = vaddr;
     rt_ioremap_size = size;
-    rt_mpr_start = rt_ioremap_start - rt_mpr_size;
+    rt_mpr_start = (char *)rt_ioremap_start - rt_mpr_size;
 }
 #else
 
-#define RTOS_VEND ((void *)0xfffffffff000UL)
+#define RTOS_VEND (0xfffffffff000UL)
 static inline void _init_region(void *vaddr, size_t size)
 {
-    rt_mpr_start = RTOS_VEND - rt_mpr_size;
+    rt_mpr_start = (void *)(RTOS_VEND - rt_mpr_size);
 }
 #endif
 
@@ -395,7 +480,7 @@ int rt_hw_mmu_map_init(rt_aspace_t aspace, void *v_address, size_t size,
     rt_aspace_init(aspace, (void *)KERNEL_VADDR_START, 0 - KERNEL_VADDR_START,
                    vtable);
 #else
-    rt_aspace_init(aspace, (void *)0x1000, RTOS_VEND - (void *)0x1000, vtable);
+    rt_aspace_init(aspace, (void *)0x1000, RTOS_VEND - 0x1000ul, vtable);
 #endif
 
     _init_region(v_address, size);
@@ -586,26 +671,35 @@ void *rt_hw_mmu_v2p(rt_aspace_t aspace, void *v_addr)
 {
     int level_shift;
     unsigned long paddr;
-    unsigned long *pte = _query(aspace, v_addr, &level_shift);
 
-    if (pte)
+    if (aspace == &rt_kernel_space)
     {
-        paddr = *pte & MMU_ADDRESS_MASK;
-        paddr |= (uintptr_t)v_addr & ((1ul << level_shift) - 1);
+        paddr = (unsigned long)rt_hw_mmu_kernel_v2p(v_addr);
     }
     else
     {
-        paddr = (unsigned long)ARCH_MAP_FAILED;
+        unsigned long *pte = _query(aspace, v_addr, &level_shift);
+
+        if (pte)
+        {
+            paddr = *pte & MMU_ADDRESS_MASK;
+            paddr |= (rt_ubase_t)v_addr & ((1ul << level_shift) - 1);
+        }
+        else
+        {
+            paddr = (unsigned long)ARCH_MAP_FAILED;
+        }
     }
+
     return (void *)paddr;
 }
 
-static int _noncache(uintptr_t *pte)
+static int _noncache(rt_ubase_t *pte)
 {
     int err = 0;
-    const uintptr_t idx_shift = 2;
-    const uintptr_t idx_mask = 0x7 << idx_shift;
-    uintptr_t entry = *pte;
+    const rt_ubase_t idx_shift = 2;
+    const rt_ubase_t idx_mask = 0x7 << idx_shift;
+    rt_ubase_t entry = *pte;
     if ((entry & idx_mask) == (NORMAL_MEM << idx_shift))
     {
         *pte = (entry & ~idx_mask) | (NORMAL_NOCACHE_MEM << idx_shift);
@@ -618,12 +712,12 @@ static int _noncache(uintptr_t *pte)
     return err;
 }
 
-static int _cache(uintptr_t *pte)
+static int _cache(rt_ubase_t *pte)
 {
     int err = 0;
-    const uintptr_t idx_shift = 2;
-    const uintptr_t idx_mask = 0x7 << idx_shift;
-    uintptr_t entry = *pte;
+    const rt_ubase_t idx_shift = 2;
+    const rt_ubase_t idx_mask = 0x7 << idx_shift;
+    rt_ubase_t entry = *pte;
     if ((entry & idx_mask) == (NORMAL_NOCACHE_MEM << idx_shift))
     {
         *pte = (entry & ~idx_mask) | (NORMAL_MEM << idx_shift);
@@ -636,7 +730,7 @@ static int _cache(uintptr_t *pte)
     return err;
 }
 
-static int (*control_handler[MMU_CNTL_DUMMY_END])(uintptr_t *pte) = {
+static int (*control_handler[MMU_CNTL_DUMMY_END])(rt_ubase_t *pte) = {
     [MMU_CNTL_CACHE] = _cache,
     [MMU_CNTL_NONCACHE] = _noncache,
 };
@@ -646,17 +740,18 @@ int rt_hw_mmu_control(struct rt_aspace *aspace, void *vaddr, size_t size,
 {
     int level_shift;
     int err = -RT_EINVAL;
-    void *vend = vaddr + size;
+    rt_ubase_t vstart = (rt_ubase_t)vaddr;
+    rt_ubase_t vend = vstart + size;
 
-    int (*handler)(uintptr_t * pte);
+    int (*handler)(rt_ubase_t * pte);
     if (cmd >= 0 && cmd < MMU_CNTL_DUMMY_END)
     {
         handler = control_handler[cmd];
 
-        while (vaddr < vend)
+        while (vstart < vend)
         {
-            uintptr_t *pte = _query(aspace, vaddr, &level_shift);
-            void *range_end = vaddr + (1ul << level_shift);
+            rt_ubase_t *pte = _query(aspace, (void *)vstart, &level_shift);
+            rt_ubase_t range_end = vstart + (1ul << level_shift);
             RT_ASSERT(range_end <= vend);
 
             if (pte)
@@ -664,7 +759,7 @@ int rt_hw_mmu_control(struct rt_aspace *aspace, void *vaddr, size_t size,
                 err = handler(pte);
                 RT_ASSERT(err == RT_EOK);
             }
-            vaddr = range_end;
+            vstart = range_end;
         }
     }
     else

+ 22 - 0
libcpu/aarch64/common/mmu.h

@@ -97,6 +97,28 @@ static inline void *rt_hw_mmu_tbl_get()
     return (void *)(tbl & ((1ul << 48) - 2));
 }
 
+static inline void *rt_hw_mmu_kernel_v2p(void *v_addr)
+{
+    rt_ubase_t par;
+    void *paddr;
+    asm volatile("at s1e1w, %0"::"r"(v_addr):"memory");
+    asm volatile("mrs %0, par_el1":"=r"(par)::"memory");
+
+    if (par & 0x1)
+    {
+        paddr = ARCH_MAP_FAILED;
+    }
+    else
+    {
+        #define MMU_ADDRESS_MASK 0x0000fffffffff000UL
+        par &= MMU_ADDRESS_MASK;
+        par |= (rt_ubase_t)v_addr & ARCH_PAGE_MASK;
+        paddr =  (void *)par;
+    }
+
+    return paddr;
+}
+
 int rt_hw_mmu_control(struct rt_aspace *aspace, void *vaddr, size_t size,
                       enum rt_mmu_cntl cmd);
 

+ 17 - 9
src/Kconfig

@@ -261,6 +261,12 @@ if RT_DEBUG
         int
         default 1 if RT_DEBUG_MEMHEAP_CONFIG
 
+    if ARCH_MM_MMU
+        config RT_DEBUG_PAGE_LEAK
+            bool "Enable page leaking tracer"
+            default n
+    endif
+
     config RT_DEBUG_MODULE_CONFIG
         bool "Enable debugging of Application Module"
         default n
@@ -305,15 +311,17 @@ endmenu
 
 menu "Memory Management"
 
-    config RT_PAGE_MAX_ORDER
-        int "Max order of pages allocatable by page allocator"
-        default 11
-        help
-            For example, A value of 11 means the maximum chunk of contiguous memory
-            allocatable by page system is 2^(11 + ARCH_PAGE_BITS - 1) Bytes.
-            Large memory requirement can consume all system resource, and should
-            consider reserved memory instead to enhance system endurance.
-            Max order should at least satisfied usage by huge page.
+    if ARCH_MM_MMU
+        config RT_PAGE_MAX_ORDER
+            int "Max order of pages allocatable by page allocator"
+            default 11
+            help
+                For example, A value of 11 means the maximum chunk of contiguous memory
+                allocatable by page system is 2^(11 + ARCH_PAGE_BITS - 1) Bytes.
+                Large memory requirement can consume all system resource, and should
+                consider reserved memory instead to enhance system endurance.
+                Max order should at least satisfied usage by huge page.
+    endif
 
     config RT_USING_MEMPOOL
         bool "Using memory pool"