Browse Source

feat: mm: added affinity pages allocator

This patch introduces a tagged pages allocator to address the existing problems
of page aliasing on specific platforms and the requirement of page coloring.
It implements an affinity-id aware page manager by separating the runtime page
list into two types: a normal single linked-list and a multi-dimensional affinity-list.

Changes:
- Introduced tagged pages allocator and managing algorithm for affinity pages list
- Modified components to support affinity-id list management
- Updated page allocation and freeing functions to handle tagged pages
- Added configuration options for page affinity block size and debugging
- Modified mmap and elf loading to respect affinity settings
- Enhanced page list management to support multi-dimensional affinity-list

Signed-off-by: Shell <smokewood@qq.com>
Shell 7 months ago
parent
commit
9386411d13

+ 1 - 0
components/dfs/dfs_v2/include/dfs_file.h

@@ -137,6 +137,7 @@ struct dfs_mmap2_args
     int prot;
     int prot;
     int flags;
     int flags;
     off_t pgoffset;
     off_t pgoffset;
+    size_t min_align_size;
 
 
     struct rt_lwp *lwp;
     struct rt_lwp *lwp;
     void *ret;
     void *ret;

+ 1 - 0
components/dfs/dfs_v2/src/dfs_file_mmap.c

@@ -76,6 +76,7 @@ static void *_map_data_to_uspace(struct dfs_mmap2_args *mmap2, void *data, rt_er
         map_vaddr = (void *)((size_t)map_vaddr & ~ARCH_PAGE_MASK);
         map_vaddr = (void *)((size_t)map_vaddr & ~ARCH_PAGE_MASK);
 
 
         k_flags = lwp_user_mm_flag_to_kernel(mmap2->flags);
         k_flags = lwp_user_mm_flag_to_kernel(mmap2->flags);
+        k_flags = MMF_CREATE(k_flags, mmap2->min_align_size);
         k_attr = lwp_user_mm_attr_to_kernel(mmap2->prot);
         k_attr = lwp_user_mm_attr_to_kernel(mmap2->prot);
 
 
         map_vaddr = _do_mmap(lwp, map_vaddr, map_size, k_attr, k_flags, mmap2->pgoffset, data, code);
         map_vaddr = _do_mmap(lwp, map_vaddr, map_size, k_attr, k_flags, mmap2->pgoffset, data, code);

+ 5 - 4
components/dfs/dfs_v2/src/dfs_pcache.c

@@ -710,14 +710,15 @@ static int dfs_page_unmap(struct dfs_page *page)
     return 0;
     return 0;
 }
 }
 
 
-static struct dfs_page *dfs_page_create(void)
+static struct dfs_page *dfs_page_create(off_t pos)
 {
 {
     struct dfs_page *page = RT_NULL;
     struct dfs_page *page = RT_NULL;
+    int affid = RT_PAGE_PICK_AFFID(pos);
 
 
     page = rt_calloc(1, sizeof(struct dfs_page));
     page = rt_calloc(1, sizeof(struct dfs_page));
     if (page)
     if (page)
     {
     {
-        page->page = rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE);
+        page->page = rt_pages_alloc_tagged(0, affid, PAGE_ANY_AVAILABLE);
         if (page->page)
         if (page->page)
         {
         {
             //memset(page->page, 0x00, ARCH_PAGE_SIZE);
             //memset(page->page, 0x00, ARCH_PAGE_SIZE);
@@ -1008,12 +1009,12 @@ static struct dfs_page *dfs_aspace_load_page(struct dfs_file *file, off_t pos)
         struct dfs_vnode *vnode = file->vnode;
         struct dfs_vnode *vnode = file->vnode;
         struct dfs_aspace *aspace = vnode->aspace;
         struct dfs_aspace *aspace = vnode->aspace;
 
 
-        page = dfs_page_create();
+        page = dfs_page_create(pos);
         if (page)
         if (page)
         {
         {
             page->aspace = aspace;
             page->aspace = aspace;
             page->size = ARCH_PAGE_SIZE;
             page->size = ARCH_PAGE_SIZE;
-            page->fpos = pos / ARCH_PAGE_SIZE * ARCH_PAGE_SIZE;
+            page->fpos = RT_ALIGN_DOWN(pos, ARCH_PAGE_SIZE);
             aspace->ops->read(file, page);
             aspace->ops->read(file, page);
             page->ref_count ++;
             page->ref_count ++;
 
 

+ 1 - 1
components/lwp/lwp_elf.c

@@ -576,7 +576,7 @@ static int elf_aux_fill(elf_load_info_t *load_info)
     elf_addr_t *aux_info;
     elf_addr_t *aux_info;
     uint32_t random_value = rt_tick_get();
     uint32_t random_value = rt_tick_get();
     size_t prot = PROT_READ | PROT_WRITE;
     size_t prot = PROT_READ | PROT_WRITE;
-    size_t flags = MAP_PRIVATE;
+    size_t flags = MAP_FIXED | MAP_PRIVATE;
     rt_lwp_t lwp = load_info->lwp;
     rt_lwp_t lwp = load_info->lwp;
     void *va;
     void *va;
 
 

+ 87 - 8
components/lwp/lwp_user_mm.c

@@ -503,29 +503,99 @@ void *lwp_user_memory_remap_to_kernel(rt_lwp_t lwp, void *uaddr, size_t length)
 
 
     return kaddr;
     return kaddr;
 }
 }
+#include <dfs_dentry.h>
+#define _AFFBLK_PGOFFSET (RT_PAGE_AFFINITY_BLOCK_SIZE >> MM_PAGE_SHIFT)
+
+static rt_base_t _aligned_for_weak_mapping(off_t *ppgoff, rt_size_t *plen, rt_size_t *palign)
+{
+    off_t aligned_pgoffset, pgoffset = *ppgoff;
+    rt_size_t length = *plen;
+    rt_size_t min_align_size = *palign;
+    rt_base_t aligned_size = 0;
+
+    if (pgoffset >= 0)
+    {
+        /* force an alignment */
+        aligned_pgoffset =
+            RT_ALIGN_DOWN(pgoffset, RT_PAGE_AFFINITY_BLOCK_SIZE >> MM_PAGE_SHIFT);
+        aligned_size = (pgoffset - aligned_pgoffset) << MM_PAGE_SHIFT;
+
+        if (aligned_pgoffset != pgoffset)
+        {
+            /**
+             * If requested pgoffset is not sitting on an aligned page offset,
+             * expand the request mapping to force an alignment.
+             */
+            length += aligned_size;
+            pgoffset = aligned_pgoffset;
+        }
+
+        /**
+         * As this is a weak mapping, we can pick any reasonable address for our
+         * requirement.
+         */
+        min_align_size = RT_PAGE_AFFINITY_BLOCK_SIZE;
+    }
+    else
+    {
+        RT_ASSERT(0 && "Unexpected input");
+    }
+
+    *ppgoff = pgoffset;
+    *plen = length;
+    *palign = min_align_size;
+
+    return aligned_size;
+}
 
 
 void *lwp_mmap2(struct rt_lwp *lwp, void *addr, size_t length, int prot,
 void *lwp_mmap2(struct rt_lwp *lwp, void *addr, size_t length, int prot,
                 int flags, int fd, off_t pgoffset)
                 int flags, int fd, off_t pgoffset)
 {
 {
     rt_err_t rc;
     rt_err_t rc;
-    rt_size_t k_attr;
-    rt_size_t k_flags;
-    rt_size_t k_offset;
+    rt_size_t k_attr, k_flags, k_offset, aligned_size = 0;
+    rt_size_t min_align_size = 1 << MM_PAGE_SHIFT;
     rt_aspace_t uspace;
     rt_aspace_t uspace;
     rt_mem_obj_t mem_obj;
     rt_mem_obj_t mem_obj;
     void *ret = 0;
     void *ret = 0;
-    LOG_D("%s(addr=0x%lx,length=%ld,fd=%d)", __func__, addr, length, fd);
+    LOG_D("%s(addr=0x%lx,length=0x%lx,fd=%d,pgoff=0x%lx)", __func__, addr, length, fd, pgoffset);
+
+    /* alignment for affinity page block */
+    if (flags & MAP_FIXED)
+    {
+        if (fd != -1)
+        {
+            /* requested mapping address */
+            rt_base_t va_affid = RT_PAGE_PICK_AFFID(addr);
+            rt_base_t pgoff_affid = RT_PAGE_PICK_AFFID(pgoffset << MM_PAGE_SHIFT);
+
+            /* filter illegal align address */
+            if (va_affid != pgoff_affid)
+            {
+                LOG_W("Unaligned mapping address %p(pgoff=0x%lx) from fd=%d",
+                    addr, pgoffset, fd);
+            }
+        }
+        else
+        {
+            /* anonymous mapping can always aligned */
+        }
+    }
+    else
+    {
+        /* weak address selection */
+        aligned_size = _aligned_for_weak_mapping(&pgoffset, &length, &min_align_size);
+    }
 
 
     if (fd == -1)
     if (fd == -1)
     {
     {
-        /**
-         * todo: add threshold
-         */
+    #ifdef RT_DEBUGGING_PAGE_THRESHOLD
         if (!_memory_threshold_ok())
         if (!_memory_threshold_ok())
             return (void *)-ENOMEM;
             return (void *)-ENOMEM;
+    #endif /* RT_DEBUGGING_PAGE_THRESHOLD */
 
 
         k_offset = MM_PA_TO_OFF(addr);
         k_offset = MM_PA_TO_OFF(addr);
-        k_flags = lwp_user_mm_flag_to_kernel(flags) | MMF_MAP_PRIVATE;
+        k_flags = MMF_CREATE(lwp_user_mm_flag_to_kernel(flags) | MMF_MAP_PRIVATE,
+                             min_align_size);
         k_attr = lwp_user_mm_attr_to_kernel(prot);
         k_attr = lwp_user_mm_attr_to_kernel(prot);
 
 
         uspace = lwp->aspace;
         uspace = lwp->aspace;
@@ -553,6 +623,7 @@ void *lwp_mmap2(struct rt_lwp *lwp, void *addr, size_t length, int prot,
 
 
             mmap2.addr = addr;
             mmap2.addr = addr;
             mmap2.length = length;
             mmap2.length = length;
+            mmap2.min_align_size = min_align_size;
             mmap2.prot = prot;
             mmap2.prot = prot;
             mmap2.flags = flags;
             mmap2.flags = flags;
             mmap2.pgoffset = pgoffset;
             mmap2.pgoffset = pgoffset;
@@ -572,7 +643,15 @@ void *lwp_mmap2(struct rt_lwp *lwp, void *addr, size_t length, int prot,
     }
     }
 
 
     if ((long)ret <= 0)
     if ((long)ret <= 0)
+    {
         LOG_D("%s() => %ld", __func__, ret);
         LOG_D("%s() => %ld", __func__, ret);
+    }
+    else
+    {
+        ret = (char *)ret + aligned_size;
+        LOG_D("%s() => 0x%lx", __func__, ret);
+    }
+
     return ret;
     return ret;
 }
 }
 
 

+ 12 - 0
components/mm/Kconfig

@@ -1,5 +1,13 @@
 menu "Memory management"
 menu "Memory management"
 
 
+config RT_PAGE_AFFINITY_BLOCK_SIZE
+    hex "Affinity block size in bytes for page management"
+    default 0x1000
+    help
+        Page affinity block can be used to resolve the VIPT aliasing problem.
+        It should be set to `1ul << ((index + block) - page_offset)` in this case.
+        You could also exploit this as a tunning for cache coloring.
+
 config RT_USING_MEMBLOCK
 config RT_USING_MEMBLOCK
     bool "Using memblock"
     bool "Using memblock"
     default n
     default n
@@ -16,4 +24,8 @@ config RT_INIT_MEMORY_REGIONS
         memory into different types of regions. This variable specifies
         memory into different types of regions. This variable specifies
         the maximum number of regions supported by the system.
         the maximum number of regions supported by the system.
 
 
+config RT_DEBUGGING_ALIASING
+    bool "Using aliasing paging debugger"
+    default n
+
 endmenu
 endmenu

+ 6 - 4
components/mm/ioremap.c

@@ -38,13 +38,15 @@ static void *_ioremap_type(void *paddr, size_t size, enum ioremap_type type)
     size_t attr;
     size_t attr;
     size_t lo_off;
     size_t lo_off;
     int err;
     int err;
+    size_t pa_off = (rt_ubase_t)paddr & ~(RT_PAGE_AFFINITY_BLOCK_SIZE - 1);
 
 
-    lo_off = (rt_ubase_t)paddr & ARCH_PAGE_MASK;
+    lo_off = (rt_ubase_t)paddr - pa_off;
+    pa_off = MM_PA_TO_OFF(pa_off);
 
 
     struct rt_mm_va_hint hint = {
     struct rt_mm_va_hint hint = {
         .prefer = RT_NULL,
         .prefer = RT_NULL,
-        .map_size = RT_ALIGN(size + lo_off, ARCH_PAGE_SIZE),
-        .flags = 0,
+        .map_size = RT_ALIGN(size + lo_off, RT_PAGE_AFFINITY_BLOCK_SIZE),
+        .flags = MMF_CREATE(0, RT_PAGE_AFFINITY_BLOCK_SIZE),
         .limit_start = rt_ioremap_start,
         .limit_start = rt_ioremap_start,
         .limit_range_size = rt_ioremap_size,
         .limit_range_size = rt_ioremap_size,
     };
     };
@@ -63,7 +65,7 @@ static void *_ioremap_type(void *paddr, size_t size, enum ioremap_type type)
     default:
     default:
         return v_addr;
         return v_addr;
     }
     }
-    err = rt_aspace_map_phy(&rt_kernel_space, &hint, attr, MM_PA_TO_OFF(paddr), (void **)&v_addr);
+    err = rt_aspace_map_phy(&rt_kernel_space, &hint, attr, pa_off, (void **)&v_addr);
 
 
     if (err)
     if (err)
     {
     {

+ 1 - 1
components/mm/mm_anon.c

@@ -581,7 +581,7 @@ int rt_varea_fix_private_locked(rt_varea_t ex_varea, void *pa,
         }
         }
         else if (ex_obj->page_read)
         else if (ex_obj->page_read)
         {
         {
-            page = rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE);
+            page = rt_pages_alloc_tagged(0, RT_PAGE_PICK_AFFID(fault_vaddr), PAGE_ANY_AVAILABLE);
             if (page)
             if (page)
             {
             {
                 /** setup message & fetch the data from source object */
                 /** setup message & fetch the data from source object */

+ 49 - 20
components/mm/mm_aspace.c

@@ -1149,12 +1149,17 @@ static void *_ascending_search(rt_varea_t varea, rt_size_t req_size,
         rt_varea_t nx_va = ASPACE_VAREA_NEXT(varea);
         rt_varea_t nx_va = ASPACE_VAREA_NEXT(varea);
         if (nx_va)
         if (nx_va)
         {
         {
-            rt_size_t gap_size =
-                (char *)_lower(limit.end, (char *)nx_va->start - 1) - candidate + 1;
-            if (gap_size >= req_size)
+            if (candidate < (char *)nx_va->start)
             {
             {
-                ret = candidate;
-                break;
+                rt_size_t gap_size =
+                    (char *)_lower(limit.end, (char *)nx_va->start - 1) -
+                    candidate + 1;
+
+                if (gap_size >= req_size)
+                {
+                    ret = candidate;
+                    break;
+                }
             }
             }
         }
         }
         else
         else
@@ -1172,15 +1177,16 @@ static void *_find_head_and_asc_search(rt_aspace_t aspace, rt_size_t req_size,
                                        struct _mm_range limit)
                                        struct _mm_range limit)
 {
 {
     void *va = RT_NULL;
     void *va = RT_NULL;
+    char *candidate = _align(limit.start, align_mask);
 
 
-    rt_varea_t varea = _aspace_bst_search_exceed(aspace, limit.start);
+    rt_varea_t varea = _aspace_bst_search_exceed(aspace, candidate);
     if (varea)
     if (varea)
     {
     {
-        char *candidate = _align(limit.start, align_mask);
         rt_size_t gap_size = (char *)varea->start - candidate;
         rt_size_t gap_size = (char *)varea->start - candidate;
         if (gap_size >= req_size)
         if (gap_size >= req_size)
         {
         {
-            rt_varea_t former = _aspace_bst_search(aspace, limit.start);
+            /* try previous memory region of varea if possible */
+            rt_varea_t former = ASPACE_VAREA_PREV(varea);
             if (former)
             if (former)
             {
             {
                 candidate = _align((char *)former->start + former->size, align_mask);
                 candidate = _align((char *)former->start + former->size, align_mask);
@@ -1203,12 +1209,7 @@ static void *_find_head_and_asc_search(rt_aspace_t aspace, rt_size_t req_size,
     }
     }
     else
     else
     {
     {
-        char *candidate;
-        rt_size_t gap_size;
-
-        candidate = limit.start;
-        candidate = _align(candidate, align_mask);
-        gap_size = (char *)limit.end - candidate + 1;
+        rt_size_t gap_size = (char *)limit.end - candidate + 1;
 
 
         if (gap_size >= req_size)
         if (gap_size >= req_size)
             va = candidate;
             va = candidate;
@@ -1217,6 +1218,12 @@ static void *_find_head_and_asc_search(rt_aspace_t aspace, rt_size_t req_size,
     return va;
     return va;
 }
 }
 
 
+/**
+ * Find a memory region that:
+ * - is free
+ * - sits inside the limit range
+ * - meets the alignment requirement
+ */
 static void *_find_free(rt_aspace_t aspace, void *prefer, rt_size_t req_size,
 static void *_find_free(rt_aspace_t aspace, void *prefer, rt_size_t req_size,
                         void *limit_start, rt_size_t limit_size,
                         void *limit_start, rt_size_t limit_size,
                         mm_flag_t flags)
                         mm_flag_t flags)
@@ -1231,20 +1238,42 @@ static void *_find_free(rt_aspace_t aspace, void *prefer, rt_size_t req_size,
         align_mask = ~((1 << MMF_GET_ALIGN(flags)) - 1);
         align_mask = ~((1 << MMF_GET_ALIGN(flags)) - 1);
     }
     }
 
 
-    if (prefer != RT_NULL)
+    if (flags & MMF_MAP_FIXED)
     {
     {
-        /* if prefer and free, just return the prefer region */
-        prefer = _align(prefer, align_mask);
         struct _mm_range range = {prefer, (char *)prefer + req_size - 1};
         struct _mm_range range = {prefer, (char *)prefer + req_size - 1};
-        varea = _aspace_bst_search_overlap(aspace, range);
 
 
+        /* caller should guarantee that the request region is legal */
+        RT_ASSERT(!_not_in_range(flags, prefer, req_size, limit_start, limit_size));
+
+        varea = _aspace_bst_search_overlap(aspace, range);
         if (!varea)
         if (!varea)
         {
         {
             va = prefer;
             va = prefer;
         }
         }
-        else if (flags & MMF_MAP_FIXED)
+        else
+        {
+            /* region not freed */
+        }
+    }
+    else if (prefer != RT_NULL)
+    {
+        struct _mm_range range;
+
+        /* ceiling the prefer address */
+        prefer = _align(prefer, align_mask);
+        if (_not_in_range(flags, prefer, req_size, limit_start, limit_size))
         {
         {
-            /* OVERLAP */
+            prefer = limit_start;
+        }
+
+        range.start = prefer;
+        range.end = (char *)prefer + req_size - 1;
+        varea = _aspace_bst_search_overlap(aspace, range);
+
+        if (!varea)
+        {
+            /* if preferred and free, just return the prefer region */
+            va = prefer;
         }
         }
         else
         else
         {
         {

+ 8 - 1
components/mm/mm_fault.c

@@ -185,15 +185,22 @@ int rt_aspace_fault_try_fix(rt_aspace_t aspace, struct rt_aspace_fault_msg *msg)
                 case MM_FAULT_OP_EXECUTE:
                 case MM_FAULT_OP_EXECUTE:
                     err = _exec_fault(varea, pa, msg);
                     err = _exec_fault(varea, pa, msg);
                     break;
                     break;
+                default:
+                    LOG_D("Unhandle exception");
+                    break;
                 }
                 }
             }
             }
         }
         }
         else
         else
         {
         {
-            LOG_I("%s: varea not found at 0x%lx", __func__, msg->fault_vaddr);
+            LOG_W("%s: varea not found at 0x%lx", __func__, msg->fault_vaddr);
         }
         }
         RD_UNLOCK(aspace);
         RD_UNLOCK(aspace);
     }
     }
+    else
+    {
+        LOG_W("No aspace found");
+    }
 
 
     return err;
     return err;
 }
 }

+ 2 - 1
components/mm/mm_object.c

@@ -34,7 +34,8 @@ static const char *get_name(rt_varea_t varea)
 static void on_page_fault(struct rt_varea *varea, struct rt_aspace_fault_msg *msg)
 static void on_page_fault(struct rt_varea *varea, struct rt_aspace_fault_msg *msg)
 {
 {
     void *page;
     void *page;
-    page = rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE);
+    int affid = RT_PAGE_PICK_AFFID(msg->fault_vaddr);
+    page = rt_pages_alloc_tagged(0, affid, PAGE_ANY_AVAILABLE);
 
 
     if (!page)
     if (!page)
     {
     {

+ 429 - 169
components/mm/mm_page.c

@@ -10,6 +10,7 @@
  *                             page management algorithm
  *                             page management algorithm
  * 2023-02-20     WangXiaoyao  Multi-list page-management
  * 2023-02-20     WangXiaoyao  Multi-list page-management
  * 2023-11-28     Shell        Bugs fix for page_install on shadow region
  * 2023-11-28     Shell        Bugs fix for page_install on shadow region
+ * 2024-06-18     Shell        Added affinity page management for page coloring.
  */
  */
 #include <rtthread.h>
 #include <rtthread.h>
 
 
@@ -41,14 +42,41 @@ static void *init_mpr_cont_start;
 
 
 static struct rt_varea mpr_varea;
 static struct rt_varea mpr_varea;
 
 
-static struct rt_page *page_list_low[RT_PAGE_MAX_ORDER];
-static struct rt_page *page_list_high[RT_PAGE_MAX_ORDER];
+typedef union
+{
+    struct rt_page *page_list;
+    rt_ubase_t aff_page_map;
+} pgls_agr_t;
+
+#define PGLS_IS_AFF_MAP(pgls) ((pgls).aff_page_map & 0x1)
+#define PGLS_FROM_AFF_MAP(pgls, aff_map) \
+    ((pgls).aff_page_map = (-(rt_ubase_t)(aff_map)) | 0x1)
+#define PGLS_GET_AFF_MAP(pgls) \
+    ((struct rt_page **)-((pgls).aff_page_map & ~0x1))
+#define PGLS_GET(pgls) \
+    (PGLS_IS_AFF_MAP(pgls) ? PGLS_GET_AFF_MAP(pgls) : (pgls).page_list)
+#define PAGE_TO_AFFID(page) (RT_PAGE_PICK_AFFID(page_to_paddr(page)))
+
+/* affinity id */
+#define AFFID_BLK_BITS \
+    ((sizeof(int) * 8 - 1) - __builtin_clz(RT_PAGE_AFFINITY_BLOCK_SIZE) - ARCH_PAGE_SHIFT)
+#define AFFID_NUMOF_ID_IN_SET(order) \
+    ((RT_PAGE_AFFINITY_BLOCK_SIZE / ARCH_PAGE_SIZE) / (1ul << (order)))
+#define AFFID_BITS_MASK(order) \
+    (((1 << AFFID_BLK_BITS) - 1) - ((1 << (order)) - 1))
+
+static pgls_agr_t page_list_low[RT_PAGE_MAX_ORDER];
+static rt_page_t
+    aff_pglist_low[AFFID_NUMOF_ID_IN_SET(0) * 2 - 2];
+static pgls_agr_t page_list_high[RT_PAGE_MAX_ORDER];
+static rt_page_t
+    aff_pglist_high[AFFID_NUMOF_ID_IN_SET(0) * 2 - 2];
 static RT_DEFINE_SPINLOCK(_spinlock);
 static RT_DEFINE_SPINLOCK(_spinlock);
 
 
 #define page_start ((rt_page_t)rt_mpr_start)
 #define page_start ((rt_page_t)rt_mpr_start)
 
 
-static rt_size_t page_nr;
-static rt_size_t _high_pages_nr;
+static rt_size_t _page_nr, _freed_nr, _freed_nr_hi;
+static rt_size_t _page_nr_hi;
 static rt_size_t early_offset;
 static rt_size_t early_offset;
 
 
 static const char *get_name(rt_varea_t varea)
 static const char *get_name(rt_varea_t varea)
@@ -64,11 +92,13 @@ static void hint_free(rt_mm_va_hint_t hint)
     hint->prefer = rt_mpr_start;
     hint->prefer = rt_mpr_start;
 }
 }
 
 
-static void on_page_fault(struct rt_varea *varea, struct rt_aspace_fault_msg *msg)
+static void on_page_fault(struct rt_varea *varea,
+                          struct rt_aspace_fault_msg *msg)
 {
 {
     char *init_start = (void *)init_mpr_align_start;
     char *init_start = (void *)init_mpr_align_start;
     char *init_end = (void *)init_mpr_align_end;
     char *init_end = (void *)init_mpr_align_end;
-    if ((char *)msg->fault_vaddr < init_end && (char *)msg->fault_vaddr >= init_start)
+    if ((char *)msg->fault_vaddr < init_end &&
+        (char *)msg->fault_vaddr >= init_start)
     {
     {
         rt_size_t offset = (char *)msg->fault_vaddr - init_start;
         rt_size_t offset = (char *)msg->fault_vaddr - init_start;
         msg->response.status = MM_FAULT_STATUS_OK;
         msg->response.status = MM_FAULT_STATUS_OK;
@@ -207,6 +237,11 @@ static inline void *page_to_addr(rt_page_t page)
     return (void *)(((page - page_start) << ARCH_PAGE_SHIFT) - PV_OFFSET);
     return (void *)(((page - page_start) << ARCH_PAGE_SHIFT) - PV_OFFSET);
 }
 }
 
 
+static inline rt_ubase_t page_to_paddr(rt_page_t page)
+{
+    return (rt_ubase_t)((page - page_start) << ARCH_PAGE_SHIFT);
+}
+
 static inline rt_page_t addr_to_page(rt_page_t pg_start, void *addr)
 static inline rt_page_t addr_to_page(rt_page_t pg_start, void *addr)
 {
 {
     addr = (char *)addr + PV_OFFSET;
     addr = (char *)addr + PV_OFFSET;
@@ -261,7 +296,7 @@ void *rt_page_page2addr(struct rt_page *p)
 }
 }
 
 
 static inline struct rt_page *_buddy_get(struct rt_page *p,
 static inline struct rt_page *_buddy_get(struct rt_page *p,
-                                        rt_uint32_t size_bits)
+                                         rt_uint32_t size_bits)
 {
 {
     rt_size_t addr;
     rt_size_t addr;
 
 
@@ -270,7 +305,60 @@ static inline struct rt_page *_buddy_get(struct rt_page *p,
     return rt_page_addr2page((void *)addr);
     return rt_page_addr2page((void *)addr);
 }
 }
 
 
-static void _page_remove(rt_page_t page_list[], struct rt_page *p, rt_uint32_t size_bits)
+static rt_page_t *_get_pgls_head_by_page(pgls_agr_t *agr_pgls, rt_page_t page,
+                                         rt_uint32_t size_bits)
+{
+    rt_page_t *pgls_head;
+    int index;
+
+    if (size_bits < AFFID_BLK_BITS)
+    {
+        index = PAGE_TO_AFFID(page) >> size_bits;
+        RT_ASSERT(index < AFFID_NUMOF_ID_IN_SET(size_bits));
+
+        RT_ASSERT(PGLS_IS_AFF_MAP(agr_pgls[size_bits]));
+        pgls_head = &PGLS_GET_AFF_MAP(agr_pgls[size_bits])[index];
+    }
+    else
+    {
+        RT_ASSERT(!PGLS_IS_AFF_MAP(agr_pgls[size_bits]));
+        pgls_head = &agr_pgls[size_bits].page_list;
+    }
+
+    return pgls_head;
+}
+
+static rt_page_t *_get_pgls_head(pgls_agr_t *agr_pgls, int affid,
+                                 rt_uint32_t size_bits)
+{
+    rt_page_t *pgls_head;
+    int index;
+
+    if (size_bits < AFFID_BLK_BITS)
+    {
+        index = affid >> size_bits;
+        RT_ASSERT(index < AFFID_NUMOF_ID_IN_SET(size_bits));
+
+        RT_ASSERT(PGLS_IS_AFF_MAP(agr_pgls[size_bits]));
+        pgls_head = &PGLS_GET_AFF_MAP(agr_pgls[size_bits])[index];
+    }
+    else
+    {
+        RT_ASSERT(!PGLS_IS_AFF_MAP(agr_pgls[size_bits]));
+        pgls_head = &agr_pgls[size_bits].page_list;
+    }
+
+    return pgls_head;
+}
+
+static void _page_alloc(struct rt_page *p)
+{
+    p->size_bits = ARCH_ADDRESS_WIDTH_BITS;
+    p->ref_cnt = 1;
+}
+
+static void _page_remove(rt_page_t *page_head, struct rt_page *p,
+                         rt_uint32_t size_bits)
 {
 {
     if (p->pre)
     if (p->pre)
     {
     {
@@ -278,7 +366,7 @@ static void _page_remove(rt_page_t page_list[], struct rt_page *p, rt_uint32_t s
     }
     }
     else
     else
     {
     {
-        page_list[size_bits] = p->next;
+        *page_head = p->next;
     }
     }
 
 
     if (p->next)
     if (p->next)
@@ -286,18 +374,19 @@ static void _page_remove(rt_page_t page_list[], struct rt_page *p, rt_uint32_t s
         p->next->pre = p->pre;
         p->next->pre = p->pre;
     }
     }
 
 
-    p->size_bits = ARCH_ADDRESS_WIDTH_BITS;
+    _page_alloc(p);
 }
 }
 
 
-static void _page_insert(rt_page_t page_list[], struct rt_page *p, rt_uint32_t size_bits)
+static void _page_insert(rt_page_t *page_head, struct rt_page *p,
+                         rt_uint32_t size_bits)
 {
 {
-    p->next = page_list[size_bits];
+    p->next = *page_head;
     if (p->next)
     if (p->next)
     {
     {
         p->next->pre = p;
         p->next->pre = p;
     }
     }
     p->pre = 0;
     p->pre = 0;
-    page_list[size_bits] = p;
+    *page_head = p;
     p->size_bits = size_bits;
     p->size_bits = size_bits;
 }
 }
 
 
@@ -328,7 +417,8 @@ static int _pages_ref_get(struct rt_page *p, rt_uint32_t size_bits)
     return page_head->ref_cnt;
     return page_head->ref_cnt;
 }
 }
 
 
-static int _pages_free(rt_page_t page_list[], struct rt_page *p, rt_uint32_t size_bits)
+static int _pages_free(pgls_agr_t page_list[], struct rt_page *p,
+                       rt_uint32_t size_bits)
 {
 {
     rt_uint32_t level = size_bits;
     rt_uint32_t level = size_bits;
     struct rt_page *buddy;
     struct rt_page *buddy;
@@ -351,7 +441,8 @@ static int _pages_free(rt_page_t page_list[], struct rt_page *p, rt_uint32_t siz
         buddy = _buddy_get(p, level);
         buddy = _buddy_get(p, level);
         if (buddy && buddy->size_bits == level)
         if (buddy && buddy->size_bits == level)
         {
         {
-            _page_remove(page_list, buddy, level);
+            _page_remove(_get_pgls_head_by_page(page_list, buddy, level),
+                         buddy, level);
             p = (p < buddy) ? p : buddy;
             p = (p < buddy) ? p : buddy;
             level++;
             level++;
         }
         }
@@ -360,26 +451,38 @@ static int _pages_free(rt_page_t page_list[], struct rt_page *p, rt_uint32_t siz
             break;
             break;
         }
         }
     }
     }
-    _page_insert(page_list, p, level);
+
+    _page_insert(_get_pgls_head_by_page(page_list, p, level),
+                 p, level);
     return 1;
     return 1;
 }
 }
 
 
-static struct rt_page *_pages_alloc(rt_page_t page_list[], rt_uint32_t size_bits)
+static struct rt_page *__pages_alloc(
+    pgls_agr_t agr_pgls[], rt_uint32_t size_bits, int affid,
+    void (*page_remove)(rt_page_t *page_head, struct rt_page *p,
+                        rt_uint32_t size_bits),
+    void (*page_insert)(rt_page_t *page_head, struct rt_page *p,
+                        rt_uint32_t size_bits),
+    void (*page_alloc)(rt_page_t page))
 {
 {
-    struct rt_page *p;
+    rt_page_t *pgls_head = _get_pgls_head(agr_pgls, affid, size_bits);
+    rt_page_t p = *pgls_head;
 
 
-    if (page_list[size_bits])
+    if (p)
     {
     {
-        p = page_list[size_bits];
-        _page_remove(page_list, p, size_bits);
+        page_remove(pgls_head, p, size_bits);
     }
     }
     else
     else
     {
     {
         rt_uint32_t level;
         rt_uint32_t level;
+        rt_page_t head;
 
 
+        /* fallback for allocation */
         for (level = size_bits + 1; level < RT_PAGE_MAX_ORDER; level++)
         for (level = size_bits + 1; level < RT_PAGE_MAX_ORDER; level++)
         {
         {
-            if (page_list[level])
+            pgls_head = _get_pgls_head(agr_pgls, affid, level);
+            p = *pgls_head;
+            if (p)
             {
             {
                 break;
                 break;
             }
             }
@@ -389,21 +492,47 @@ static struct rt_page *_pages_alloc(rt_page_t page_list[], rt_uint32_t size_bits
             return 0;
             return 0;
         }
         }
 
 
-        p = page_list[level];
-        _page_remove(page_list, p, level);
+        page_remove(pgls_head, p, level);
+
+        /* pick the page satisfied the affinity tag */
+        head = p;
+        p = head + (affid - (affid & AFFID_BITS_MASK(level)));
+        page_alloc(p);
+
+        /* release the pages caller don't need */
         while (level > size_bits)
         while (level > size_bits)
         {
         {
-            _page_insert(page_list, p, level - 1);
-            p = _buddy_get(p, level - 1);
-            level--;
+            long lower_bits = level - 1;
+            rt_page_t middle = _buddy_get(head, lower_bits);
+            if (p >= middle)
+            {
+                page_insert(
+                    _get_pgls_head_by_page(agr_pgls, head, lower_bits),
+                    head, lower_bits);
+                head = middle;
+            }
+            else
+            {
+                page_insert(
+                    _get_pgls_head_by_page(agr_pgls, middle, lower_bits),
+                    middle, lower_bits);
+            }
+            level = lower_bits;
         }
         }
     }
     }
-    p->size_bits = ARCH_ADDRESS_WIDTH_BITS;
-    p->ref_cnt = 1;
+
     return p;
     return p;
 }
 }
 
 
-static void _early_page_remove(rt_page_t page_list[], rt_page_t page, rt_uint32_t size_bits)
+static struct rt_page *_pages_alloc(pgls_agr_t page_list[],
+                                    rt_uint32_t size_bits, int affid)
+{
+    return __pages_alloc(page_list, size_bits, affid, _page_remove,
+                         _page_insert, _page_alloc);
+}
+
+static void _early_page_remove(rt_page_t *pgls_head, rt_page_t page,
+                               rt_uint32_t size_bits)
 {
 {
     rt_page_t page_cont = (rt_page_t)((char *)page + early_offset);
     rt_page_t page_cont = (rt_page_t)((char *)page + early_offset);
     if (page_cont->pre)
     if (page_cont->pre)
@@ -413,7 +542,7 @@ static void _early_page_remove(rt_page_t page_list[], rt_page_t page, rt_uint32_
     }
     }
     else
     else
     {
     {
-        page_list[size_bits] = page_cont->next;
+        *pgls_head = page_cont->next;
     }
     }
 
 
     if (page_cont->next)
     if (page_cont->next)
@@ -422,70 +551,47 @@ static void _early_page_remove(rt_page_t page_list[], rt_page_t page, rt_uint32_
         next_cont->pre = page_cont->pre;
         next_cont->pre = page_cont->pre;
     }
     }
 
 
+    RT_ASSERT(page_cont->size_bits == size_bits);
+    page_cont->size_bits = ARCH_ADDRESS_WIDTH_BITS;
+    page_cont->ref_cnt = 1;
+}
+
+static void _early_page_alloc(rt_page_t page)
+{
+    rt_page_t page_cont = (rt_page_t)((char *)page + early_offset);
     page_cont->size_bits = ARCH_ADDRESS_WIDTH_BITS;
     page_cont->size_bits = ARCH_ADDRESS_WIDTH_BITS;
+    page_cont->ref_cnt = 1;
 }
 }
 
 
-static void _early_page_insert(rt_page_t page_list[], rt_page_t page, int size_bits)
+static void _early_page_insert(rt_page_t *pgls_head, rt_page_t page,
+                               rt_uint32_t size_bits)
 {
 {
     RT_ASSERT((void *)page >= rt_mpr_start &&
     RT_ASSERT((void *)page >= rt_mpr_start &&
               ((char *)page - (char *)rt_mpr_start) < rt_mpr_size);
               ((char *)page - (char *)rt_mpr_start) < rt_mpr_size);
     rt_page_t page_cont = (rt_page_t)((char *)page + early_offset);
     rt_page_t page_cont = (rt_page_t)((char *)page + early_offset);
 
 
-    page_cont->next = page_list[size_bits];
+    page_cont->next = *pgls_head;
     if (page_cont->next)
     if (page_cont->next)
     {
     {
         rt_page_t next_cont = (rt_page_t)((char *)page_cont->next + early_offset);
         rt_page_t next_cont = (rt_page_t)((char *)page_cont->next + early_offset);
         next_cont->pre = page;
         next_cont->pre = page;
     }
     }
     page_cont->pre = 0;
     page_cont->pre = 0;
-    page_list[size_bits] = page;
+    *pgls_head = page;
     page_cont->size_bits = size_bits;
     page_cont->size_bits = size_bits;
 }
 }
 
 
-static struct rt_page *_early_pages_alloc(rt_page_t page_list[], rt_uint32_t size_bits)
+static struct rt_page *_early_pages_alloc(pgls_agr_t page_list[],
+                                          rt_uint32_t size_bits, int affid)
 {
 {
-    struct rt_page *p;
-
-    if (page_list[size_bits])
-    {
-        p = page_list[size_bits];
-        _early_page_remove(page_list, p, size_bits);
-    }
-    else
-    {
-        rt_uint32_t level;
-
-        for (level = size_bits + 1; level < RT_PAGE_MAX_ORDER; level++)
-        {
-            if (page_list[level])
-            {
-                break;
-            }
-        }
-        if (level == RT_PAGE_MAX_ORDER)
-        {
-            return 0;
-        }
-
-        p = page_list[level];
-        _early_page_remove(page_list, p, level);
-        while (level > size_bits)
-        {
-            _early_page_insert(page_list, p, level - 1);
-            p = _buddy_get(p, level - 1);
-            level--;
-        }
-    }
-    rt_page_t page_cont = (rt_page_t)((char *)p + early_offset);
-    page_cont->size_bits = ARCH_ADDRESS_WIDTH_BITS;
-    page_cont->ref_cnt = 1;
-    return p;
+    return __pages_alloc(page_list, size_bits, affid, _early_page_remove,
+                         _early_page_insert, _early_page_alloc);
 }
 }
 
 
-static rt_page_t *_get_page_list(void *vaddr)
+static pgls_agr_t *_get_page_list(void *vaddr)
 {
 {
     rt_ubase_t pa_int = (rt_ubase_t)vaddr + PV_OFFSET;
     rt_ubase_t pa_int = (rt_ubase_t)vaddr + PV_OFFSET;
-    rt_page_t *list;
+    pgls_agr_t *list;
     if (pa_int > UINT32_MAX)
     if (pa_int > UINT32_MAX)
     {
     {
         list = page_list_high;
         list = page_list_high;
@@ -521,14 +627,15 @@ void rt_page_ref_inc(void *addr, rt_uint32_t size_bits)
     rt_spin_unlock_irqrestore(&_spinlock, level);
     rt_spin_unlock_irqrestore(&_spinlock, level);
 }
 }
 
 
-static rt_page_t (*pages_alloc_handler)(rt_page_t page_list[], rt_uint32_t size_bits);
+static rt_page_t (*pages_alloc_handler)(pgls_agr_t page_list[],
+                                        rt_uint32_t size_bits, int affid);
 
 
 /* if not, we skip the finding on page_list_high */
 /* if not, we skip the finding on page_list_high */
 static size_t _high_page_configured = 0;
 static size_t _high_page_configured = 0;
 
 
-static rt_page_t *_flag_to_page_list(size_t flags)
+static pgls_agr_t *_flag_to_page_list(size_t flags)
 {
 {
-    rt_page_t *page_list;
+    pgls_agr_t *page_list;
     if (_high_page_configured && (flags & PAGE_ANY_AVAILABLE))
     if (_high_page_configured && (flags & PAGE_ANY_AVAILABLE))
     {
     {
         page_list = page_list_high;
         page_list = page_list_high;
@@ -540,15 +647,21 @@ static rt_page_t *_flag_to_page_list(size_t flags)
     return page_list;
     return page_list;
 }
 }
 
 
-rt_inline void *_do_pages_alloc(rt_uint32_t size_bits, size_t flags)
+volatile static rt_ubase_t _last_alloc;
+
+rt_inline void *_do_pages_alloc(rt_uint32_t size_bits, size_t flags, int affid)
 {
 {
     void *alloc_buf = RT_NULL;
     void *alloc_buf = RT_NULL;
     struct rt_page *p;
     struct rt_page *p;
     rt_base_t level;
     rt_base_t level;
-    rt_page_t *page_list = _flag_to_page_list(flags);
+    pgls_agr_t *page_list = _flag_to_page_list(flags);
 
 
     level = rt_spin_lock_irqsave(&_spinlock);
     level = rt_spin_lock_irqsave(&_spinlock);
-    p = pages_alloc_handler(page_list, size_bits);
+    p = pages_alloc_handler(page_list, size_bits, affid);
+    if (p)
+    {
+        _freed_nr -= 1 << size_bits;
+    }
     rt_spin_unlock_irqrestore(&_spinlock, level);
     rt_spin_unlock_irqrestore(&_spinlock, level);
 
 
     if (!p && page_list != page_list_low)
     if (!p && page_list != page_list_low)
@@ -557,13 +670,19 @@ rt_inline void *_do_pages_alloc(rt_uint32_t size_bits, size_t flags)
         page_list = page_list_low;
         page_list = page_list_low;
 
 
         level = rt_spin_lock_irqsave(&_spinlock);
         level = rt_spin_lock_irqsave(&_spinlock);
-        p = pages_alloc_handler(page_list, size_bits);
+        p = pages_alloc_handler(page_list, size_bits, affid);
+        if (p)
+        {
+            _freed_nr -= 1 << size_bits;
+            _freed_nr_hi -= 1 << size_bits;
+        }
         rt_spin_unlock_irqrestore(&_spinlock, level);
         rt_spin_unlock_irqrestore(&_spinlock, level);
     }
     }
 
 
     if (p)
     if (p)
     {
     {
         alloc_buf = page_to_addr(p);
         alloc_buf = page_to_addr(p);
+        _last_alloc = (rt_ubase_t)alloc_buf;
 
 
         #ifdef RT_DEBUGING_PAGE_LEAK
         #ifdef RT_DEBUGING_PAGE_LEAK
             level = rt_spin_lock_irqsave(&_spinlock);
             level = rt_spin_lock_irqsave(&_spinlock);
@@ -574,20 +693,70 @@ rt_inline void *_do_pages_alloc(rt_uint32_t size_bits, size_t flags)
     return alloc_buf;
     return alloc_buf;
 }
 }
 
 
+rt_inline int _get_balanced_id(rt_uint32_t size_bits)
+{
+    rt_ubase_t last_alloc = (_last_alloc / RT_PAGE_AFFINITY_BLOCK_SIZE);
+    return (last_alloc + (1u << size_bits)) & AFFID_BITS_MASK(size_bits);
+}
+
+static void *_do_pages_alloc_noaff(rt_uint32_t size_bits, size_t flags)
+{
+    void *rc = RT_NULL;
+
+    if (size_bits < AFFID_BLK_BITS)
+    {
+        int try_affid = _get_balanced_id(size_bits);
+        size_t numof_id = AFFID_NUMOF_ID_IN_SET(size_bits);
+        size_t valid_affid_mask = numof_id - 1;
+
+        for (size_t i = 0; i < numof_id; i++, try_affid += 1 << size_bits)
+        {
+            rc = _do_pages_alloc(size_bits, flags, try_affid & valid_affid_mask);
+            if (rc)
+            {
+                break;
+            }
+        }
+    }
+    else
+    {
+        rc = _do_pages_alloc(size_bits, flags, 0);
+    }
+
+    if (!rc)
+    {
+        RT_ASSERT(0);
+    }
+    return rc;
+}
+
 void *rt_pages_alloc(rt_uint32_t size_bits)
 void *rt_pages_alloc(rt_uint32_t size_bits)
 {
 {
-    return _do_pages_alloc(size_bits, 0);
+    return _do_pages_alloc_noaff(size_bits, 0);
 }
 }
 
 
 void *rt_pages_alloc_ext(rt_uint32_t size_bits, size_t flags)
 void *rt_pages_alloc_ext(rt_uint32_t size_bits, size_t flags)
 {
 {
-    return _do_pages_alloc(size_bits, flags);
+    return _do_pages_alloc_noaff(size_bits, flags);
+}
+
+void *rt_pages_alloc_tagged(rt_uint32_t size_bits, long affid, size_t flags)
+{
+    rt_page_t current;
+
+    current = _do_pages_alloc(size_bits, flags, affid);
+    if (current && RT_PAGE_PICK_AFFID(current) != affid)
+    {
+        RT_ASSERT(0);
+    }
+
+    return current;
 }
 }
 
 
 int rt_pages_free(void *addr, rt_uint32_t size_bits)
 int rt_pages_free(void *addr, rt_uint32_t size_bits)
 {
 {
     struct rt_page *p;
     struct rt_page *p;
-    rt_page_t *page_list = _get_page_list(addr);
+    pgls_agr_t *page_list = _get_page_list(addr);
     int real_free = 0;
     int real_free = 0;
 
 
     p = rt_page_addr2page(addr);
     p = rt_page_addr2page(addr);
@@ -597,117 +766,130 @@ int rt_pages_free(void *addr, rt_uint32_t size_bits)
         level = rt_spin_lock_irqsave(&_spinlock);
         level = rt_spin_lock_irqsave(&_spinlock);
         real_free = _pages_free(page_list, p, size_bits);
         real_free = _pages_free(page_list, p, size_bits);
         if (real_free)
         if (real_free)
+        {
+            _freed_nr += 1 << size_bits;
+            if (page_list == page_list_high)
+            {
+                _freed_nr_hi += 1 << size_bits;
+            }
             TRACE_FREE(p, size_bits);
             TRACE_FREE(p, size_bits);
+        }
         rt_spin_unlock_irqrestore(&_spinlock, level);
         rt_spin_unlock_irqrestore(&_spinlock, level);
     }
     }
 
 
     return real_free;
     return real_free;
 }
 }
 
 
-void rt_page_list(void) __attribute__((alias("list_page")));
+int rt_page_list(void) __attribute__((alias("list_page")));
 
 
-#define PGNR2SIZE(nr) ((nr) * ARCH_PAGE_SIZE / 1024)
+#define PGNR2SIZE(nr) ((nr)*ARCH_PAGE_SIZE / 1024)
 
 
-void list_page(void)
+static void _dump_page_list(int order, rt_page_t lp, rt_page_t hp,
+                            rt_size_t *pfree)
 {
 {
-    int i;
     rt_size_t free = 0;
     rt_size_t free = 0;
-    rt_size_t installed = page_nr;
 
 
-    rt_base_t level;
-    level = rt_spin_lock_irqsave(&_spinlock);
+    rt_kprintf("level %d ", order);
 
 
-    for (i = 0; i < RT_PAGE_MAX_ORDER; i++)
+    while (lp)
     {
     {
-        struct rt_page *lp = page_list_low[i];
-        struct rt_page *hp = page_list_high[i];
-
-        rt_kprintf("level %d ", i);
-
-        while (lp)
-        {
-            free += (1UL << i);
-            rt_kprintf("[0x%08p]", rt_page_page2addr(lp));
-            lp = lp->next;
-        }
-        while (hp)
-        {
-            free += (1UL << i);
-            rt_kprintf("[0x%08p]", rt_page_page2addr(hp));
-            hp = hp->next;
-        }
-        rt_kprintf("\n");
+        free += (1UL << order);
+        rt_kprintf("[L:0x%08p]", rt_page_page2addr(lp));
+        lp = lp->next;
+    }
+    while (hp)
+    {
+        free += (1UL << order);
+        rt_kprintf("[H:0x%08p]", rt_page_page2addr(hp));
+        hp = hp->next;
     }
     }
 
 
-    rt_spin_unlock_irqrestore(&_spinlock, level);
-    rt_kprintf("-------------------------------\n");
-    rt_kprintf("Page Summary:\n => free/installed: 0x%lx/0x%lx (%ld/%ld KB)\n", free, installed, PGNR2SIZE(free), PGNR2SIZE(installed));
-    rt_kprintf("-------------------------------\n");
+    rt_kprintf("\n");
+
+    *pfree += free;
 }
 }
-MSH_CMD_EXPORT(list_page, show page info);
 
 
-void rt_page_get_info(rt_size_t *total_nr, rt_size_t *free_nr)
+int list_page(void)
 {
 {
     int i;
     int i;
-    rt_size_t total_free = 0;
-    rt_base_t level;
+    rt_size_t free = 0;
+    rt_size_t installed = _page_nr;
 
 
+    rt_base_t level;
     level = rt_spin_lock_irqsave(&_spinlock);
     level = rt_spin_lock_irqsave(&_spinlock);
-    for (i = 0; i < RT_PAGE_MAX_ORDER; i++)
-    {
-        struct rt_page *p = page_list_low[i];
 
 
-        while (p)
+    /* dump affinity map area */
+    for (i = 0; i < AFFID_BLK_BITS; i++)
+    {
+        rt_page_t *iter_lo = PGLS_GET_AFF_MAP(page_list_low[i]);
+        rt_page_t *iter_hi = PGLS_GET_AFF_MAP(page_list_high[i]);
+        rt_size_t list_len = AFFID_NUMOF_ID_IN_SET(i);
+        for (size_t j = 0; j < list_len; j++)
         {
         {
-            total_free += (1UL << i);
-            p = p->next;
+            _dump_page_list(i, iter_lo[j], iter_hi[j], &free);
         }
         }
     }
     }
-    for (i = 0; i < RT_PAGE_MAX_ORDER; i++)
+
+    /* dump normal page list */
+    for (; i < RT_PAGE_MAX_ORDER; i++)
     {
     {
-        struct rt_page *p = page_list_high[i];
+        rt_page_t lp = page_list_low[i].page_list;
+        rt_page_t hp = page_list_high[i].page_list;
 
 
-        while (p)
-        {
-            total_free += (1UL << i);
-            p = p->next;
-        }
+        _dump_page_list(i, lp, hp, &free);
     }
     }
+
     rt_spin_unlock_irqrestore(&_spinlock, level);
     rt_spin_unlock_irqrestore(&_spinlock, level);
-    *total_nr = page_nr;
-    *free_nr = total_free;
+    rt_kprintf("-------------------------------\n");
+    rt_kprintf("Page Summary:\n => free/installed: 0x%lx/0x%lx (%ld/%ld KB)\n",
+               free, installed, PGNR2SIZE(free), PGNR2SIZE(installed));
+    rt_kprintf("-------------------------------\n");
+
+    return 0;
+}
+MSH_CMD_EXPORT(list_page, show page info);
+
+void rt_page_get_info(rt_size_t *total_nr, rt_size_t *free_nr)
+{
+    *total_nr = _page_nr;
+    *free_nr = _freed_nr;
 }
 }
 
 
 void rt_page_high_get_info(rt_size_t *total_nr, rt_size_t *free_nr)
 void rt_page_high_get_info(rt_size_t *total_nr, rt_size_t *free_nr)
 {
 {
-    int i;
-    rt_size_t total_free = 0;
-    rt_base_t level;
+    *total_nr = _page_nr_hi;
+    *free_nr = _freed_nr_hi;
+}
 
 
-    level = rt_spin_lock_irqsave(&_spinlock);
-    for (i = 0; i < RT_PAGE_MAX_ORDER; i++)
+static void _invalid_uninstalled_shadow(rt_page_t start, rt_page_t end)
+{
+    for (rt_page_t iter = start; iter < end; iter++)
     {
     {
-        struct rt_page *p = page_list_high[i];
-
-        while (p)
+        rt_base_t frame = (rt_base_t)rt_page_page2addr(iter);
+        struct installed_page_reg *page_reg = _find_page_region(frame);
+        if (page_reg)
         {
         {
-            total_free += (1UL << i);
-            p = p->next;
+            continue;
         }
         }
+        iter->size_bits = ARCH_ADDRESS_WIDTH_BITS;
     }
     }
-    rt_spin_unlock_irqrestore(&_spinlock, level);
-    *total_nr = _high_pages_nr;
-    *free_nr = total_free;
 }
 }
 
 
-static void _install_page(rt_page_t mpr_head, rt_region_t region, void *insert_handler)
+static void _install_page(rt_page_t mpr_head, rt_region_t region,
+                          void (*insert)(rt_page_t *ppg, rt_page_t page, rt_uint32_t size_bits))
 {
 {
-    void (*insert)(rt_page_t *page_list, rt_page_t page, int size_bits) = insert_handler;
+    pgls_agr_t *page_list;
+    rt_page_t *page_head;
     rt_region_t shadow;
     rt_region_t shadow;
+    const rt_base_t pvoffset = PV_OFFSET;
+
+    _page_nr += ((region.end - region.start) >> ARCH_PAGE_SHIFT);
+    _freed_nr += ((region.end - region.start) >> ARCH_PAGE_SHIFT);
+
     shadow.start = region.start & ~shadow_mask;
     shadow.start = region.start & ~shadow_mask;
     shadow.end = FLOOR(region.end, shadow_mask + 1);
     shadow.end = FLOOR(region.end, shadow_mask + 1);
 
 
-    if (shadow.end + PV_OFFSET > UINT32_MAX)
+    if (shadow.end + pvoffset > UINT32_MAX)
         _high_page_configured = 1;
         _high_page_configured = 1;
 
 
     rt_page_t shad_head = addr_to_page(mpr_head, (void *)shadow.start);
     rt_page_t shad_head = addr_to_page(mpr_head, (void *)shadow.start);
@@ -715,15 +897,9 @@ static void _install_page(rt_page_t mpr_head, rt_region_t region, void *insert_h
     rt_page_t head = addr_to_page(mpr_head, (void *)region.start);
     rt_page_t head = addr_to_page(mpr_head, (void *)region.start);
     rt_page_t tail = addr_to_page(mpr_head, (void *)region.end);
     rt_page_t tail = addr_to_page(mpr_head, (void *)region.end);
 
 
-    /* mark shadow pages as illegal */
-    for (rt_page_t iter = shad_head; iter < head; iter++)
-    {
-        iter->size_bits = ARCH_ADDRESS_WIDTH_BITS;
-    }
-    for (rt_page_t iter = tail; iter < shad_tail; iter++)
-    {
-        iter->size_bits = ARCH_ADDRESS_WIDTH_BITS;
-    }
+    /* mark shadow page records not belongs to other region as illegal */
+    _invalid_uninstalled_shadow(shad_head, head);
+    _invalid_uninstalled_shadow(tail, shad_tail);
 
 
     /* insert reserved pages to list */
     /* insert reserved pages to list */
     const int max_order = RT_PAGE_MAX_ORDER + ARCH_PAGE_SHIFT - 1;
     const int max_order = RT_PAGE_MAX_ORDER + ARCH_PAGE_SHIFT - 1;
@@ -732,6 +908,7 @@ static void _install_page(rt_page_t mpr_head, rt_region_t region, void *insert_h
         struct rt_page *p;
         struct rt_page *p;
         int align_bits;
         int align_bits;
         int size_bits;
         int size_bits;
+        int page_order;
 
 
         size_bits =
         size_bits =
             ARCH_ADDRESS_WIDTH_BITS - 1 - rt_hw_clz(region.end - region.start);
             ARCH_ADDRESS_WIDTH_BITS - 1 - rt_hw_clz(region.end - region.start);
@@ -750,22 +927,83 @@ static void _install_page(rt_page_t mpr_head, rt_region_t region, void *insert_h
         p->ref_cnt = 0;
         p->ref_cnt = 0;
 
 
         /* insert to list */
         /* insert to list */
-        rt_page_t *page_list = _get_page_list((void *)region.start);
+        page_list = _get_page_list((void *)region.start);
         if (page_list == page_list_high)
         if (page_list == page_list_high)
         {
         {
-            _high_pages_nr += 1 << (size_bits - ARCH_PAGE_SHIFT);
+            _page_nr_hi += 1 << (size_bits - ARCH_PAGE_SHIFT);
+            _freed_nr_hi += 1 << (size_bits - ARCH_PAGE_SHIFT);
         }
         }
-        insert(page_list, (rt_page_t)((char *)p - early_offset), size_bits - ARCH_PAGE_SHIFT);
+
+        page_order = size_bits - ARCH_PAGE_SHIFT;
+        page_head = _get_pgls_head_by_page(page_list, p, page_order);
+        insert(page_head, (rt_page_t)((char *)p - early_offset), page_order);
         region.start += (1UL << size_bits);
         region.start += (1UL << size_bits);
     }
     }
 }
 }
 
 
+static void *_aligned_to_affinity(rt_ubase_t head_page_pa, void *mapped_to)
+{
+#define AFFBLK_MASK (RT_PAGE_AFFINITY_BLOCK_SIZE - 1)
+    rt_ubase_t head_page_pg_aligned;
+    rt_ubase_t aligned_affblk_tag = (long)mapped_to & AFFBLK_MASK;
+
+    head_page_pg_aligned =
+        ((long)head_page_pa & ~AFFBLK_MASK) | aligned_affblk_tag;
+    if (head_page_pg_aligned < head_page_pa)
+    {
+        /* find the page forward */
+        head_page_pg_aligned += RT_PAGE_AFFINITY_BLOCK_SIZE;
+    }
+
+    return (void *)head_page_pg_aligned;
+}
+
+/* page management */
+static struct installed_page_reg
+{
+    rt_region_t region_area;
+    struct installed_page_reg *next;
+} _init_region;
+static _Atomic(struct installed_page_reg *) _head;
+
+static void _update_region_list(struct installed_page_reg *member)
+{
+    struct installed_page_reg *head;
+    do
+    {
+        head = rt_atomic_load(&_head);
+        member->next = head;
+    } while (!rt_atomic_compare_exchange_strong(&_head, &head, member));
+}
+
+rt_bool_t rt_page_is_member(rt_base_t page_pa)
+{
+    rt_bool_t rc = RT_FALSE;
+    rt_ubase_t page_va = page_pa - PV_OFFSET;
+    for (struct installed_page_reg *head = _head; head; head = head->next)
+    {
+        if (page_va >= head->region_area.start &&
+            page_va < head->region_area.end)
+        {
+            rc = RT_TRUE;
+            break;
+        }
+    }
+
+    return rc;
+}
+
 void rt_page_init(rt_region_t reg)
 void rt_page_init(rt_region_t reg)
 {
 {
     int i;
     int i;
     rt_region_t shadow;
     rt_region_t shadow;
 
 
-    /* inclusive start, exclusive end */
+    /* setup install page status */
+    _init_region.region_area = reg;
+    _init_region.next = RT_NULL;
+    _head = &_init_region;
+
+    /* adjust install region. inclusive start, exclusive end */
     reg.start += ARCH_PAGE_MASK;
     reg.start += ARCH_PAGE_MASK;
     reg.start &= ~ARCH_PAGE_MASK;
     reg.start &= ~ARCH_PAGE_MASK;
     reg.end &= ~ARCH_PAGE_MASK;
     reg.end &= ~ARCH_PAGE_MASK;
@@ -774,7 +1012,7 @@ void rt_page_init(rt_region_t reg)
         LOG_E("region end(%p) must greater than start(%p)", reg.start, reg.end);
         LOG_E("region end(%p) must greater than start(%p)", reg.start, reg.end);
         RT_ASSERT(0);
         RT_ASSERT(0);
     }
     }
-    page_nr = ((reg.end - reg.start) >> ARCH_PAGE_SHIFT);
+
     shadow.start = reg.start & ~shadow_mask;
     shadow.start = reg.start & ~shadow_mask;
     shadow.end = FLOOR(reg.end, shadow_mask + 1);
     shadow.end = FLOOR(reg.end, shadow_mask + 1);
     LOG_D("[Init page] start: 0x%lx, end: 0x%lx, total: 0x%lx", reg.start,
     LOG_D("[Init page] start: 0x%lx, end: 0x%lx, total: 0x%lx", reg.start,
@@ -783,10 +1021,21 @@ void rt_page_init(rt_region_t reg)
     int err;
     int err;
 
 
     /* init free list */
     /* init free list */
-    for (i = 0; i < RT_PAGE_MAX_ORDER; i++)
+    rt_page_t *aff_pgls_iter_lo = aff_pglist_low;
+    rt_page_t *aff_pgls_iter_hi = aff_pglist_high;
+    for (i = 0; i < AFFID_BLK_BITS; i++)
+    {
+        long stride = AFFID_NUMOF_ID_IN_SET(i);
+        PGLS_FROM_AFF_MAP(page_list_low[i], aff_pgls_iter_lo);
+        PGLS_FROM_AFF_MAP(page_list_high[i], aff_pgls_iter_hi);
+        aff_pgls_iter_lo += stride;
+        aff_pgls_iter_hi += stride;
+    }
+
+    for (; i < RT_PAGE_MAX_ORDER; i++)
     {
     {
-        page_list_low[i] = 0;
-        page_list_high[i] = 0;
+        page_list_low[i].page_list = 0;
+        page_list_high[i].page_list = 0;
     }
     }
 
 
     /* map MPR area */
     /* map MPR area */
@@ -809,7 +1058,10 @@ void rt_page_init(rt_region_t reg)
     rt_size_t init_mpr_size = init_mpr_align_end - init_mpr_align_start;
     rt_size_t init_mpr_size = init_mpr_align_end - init_mpr_align_start;
     rt_size_t init_mpr_npage = init_mpr_size >> ARCH_PAGE_SHIFT;
     rt_size_t init_mpr_npage = init_mpr_size >> ARCH_PAGE_SHIFT;
 
 
-    init_mpr_cont_start = (void *)reg.start;
+    /* find available aligned page */
+    init_mpr_cont_start = _aligned_to_affinity(reg.start,
+                                               (void *)init_mpr_align_start);
+
     rt_size_t init_mpr_cont_end = (rt_size_t)init_mpr_cont_start + init_mpr_size;
     rt_size_t init_mpr_cont_end = (rt_size_t)init_mpr_cont_start + init_mpr_size;
     early_offset = (rt_size_t)init_mpr_cont_start - init_mpr_align_start;
     early_offset = (rt_size_t)init_mpr_cont_start - init_mpr_align_start;
     rt_page_t mpr_cont = (void *)((char *)rt_mpr_start + early_offset);
     rt_page_t mpr_cont = (void *)((char *)rt_mpr_start + early_offset);
@@ -874,13 +1126,21 @@ int rt_page_install(rt_region_t region)
         head = addr_to_page(page_start, (void *)shadow.start);
         head = addr_to_page(page_start, (void *)shadow.start);
         tail = addr_to_page(page_start, (void *)shadow.end);
         tail = addr_to_page(page_start, (void *)shadow.end);
 
 
-        page_nr += ((region.end - region.start) >> ARCH_PAGE_SHIFT);
-
         err = _load_mpr_area(head, tail);
         err = _load_mpr_area(head, tail);
 
 
         if (err == RT_EOK)
         if (err == RT_EOK)
         {
         {
-            _install_page(rt_mpr_start, region, _page_insert);
+            struct installed_page_reg *installed_pgreg =
+                rt_malloc(sizeof(*installed_pgreg));
+
+            if (installed_pgreg)
+            {
+                installed_pgreg->region_area.start = region.start;
+                installed_pgreg->region_area.end = region.end;
+
+                _update_region_list(installed_pgreg);
+                _install_page(rt_mpr_start, region, _page_insert);
+            }
         }
         }
     }
     }
     return err;
     return err;

+ 9 - 1
components/mm/mm_page.h

@@ -33,6 +33,9 @@
 
 
 #define PAGE_ANY_AVAILABLE 0x1ul
 #define PAGE_ANY_AVAILABLE 0x1ul
 
 
+#define RT_PAGE_PICK_AFFID(ptr) \
+    ((((long)ptr) & (RT_PAGE_AFFINITY_BLOCK_SIZE - 1)) / ARCH_PAGE_SIZE)
+
 #ifdef RT_DEBUGING_PAGE_LEAK
 #ifdef RT_DEBUGING_PAGE_LEAK
 #define DEBUG_FIELD struct {    \
 #define DEBUG_FIELD struct {    \
     /* trace list */            \
     /* trace list */            \
@@ -57,6 +60,7 @@ DEF_PAGE_T(
 
 
 #undef GET_FLOOR
 #undef GET_FLOOR
 #undef DEF_PAGE_T
 #undef DEF_PAGE_T
+#undef DEBUG_FIELD
 
 
 typedef struct tag_region
 typedef struct tag_region
 {
 {
@@ -77,13 +81,17 @@ void *rt_pages_alloc(rt_uint32_t size_bits);
 
 
 void *rt_pages_alloc_ext(rt_uint32_t size_bits, size_t flags);
 void *rt_pages_alloc_ext(rt_uint32_t size_bits, size_t flags);
 
 
+void *rt_pages_alloc_tagged(rt_uint32_t size_bits, long tag, size_t flags);
+
+rt_bool_t rt_page_is_member(rt_base_t page_pa);
+
 void rt_page_ref_inc(void *addr, rt_uint32_t size_bits);
 void rt_page_ref_inc(void *addr, rt_uint32_t size_bits);
 
 
 int rt_page_ref_get(void *addr, rt_uint32_t size_bits);
 int rt_page_ref_get(void *addr, rt_uint32_t size_bits);
 
 
 int rt_pages_free(void *addr, rt_uint32_t size_bits);
 int rt_pages_free(void *addr, rt_uint32_t size_bits);
 
 
-void rt_page_list(void);
+int rt_page_list(void);
 
 
 rt_size_t rt_page_bits(rt_size_t size);
 rt_size_t rt_page_bits(rt_size_t size);
 
 

+ 11 - 0
libcpu/aarch64/common/mmu.c

@@ -296,6 +296,17 @@ void *rt_hw_mmu_map(rt_aspace_t aspace, void *v_addr, void *p_addr, size_t size,
             mapper = _kernel_map_2M;
             mapper = _kernel_map_2M;
         }
         }
 
 
+        /* check aliasing */
+        #ifdef RT_DEBUGGING_ALIASING
+        #define _ALIAS_OFFSET(addr) ((long)(addr) & (RT_PAGE_AFFINITY_BLOCK_SIZE - 1))
+        if (rt_page_is_member((rt_base_t)p_addr) && _ALIAS_OFFSET(v_addr) != _ALIAS_OFFSET(p_addr))
+        {
+            LOG_W("Possibly aliasing on va(0x%lx) to pa(0x%lx)", v_addr, p_addr);
+            rt_backtrace();
+            RT_ASSERT(0);
+        }
+        #endif /* RT_DEBUGGING_ALIASING */
+
         MM_PGTBL_LOCK(aspace);
         MM_PGTBL_LOCK(aspace);
         ret = mapper(aspace->page_table, v_addr, p_addr, attr);
         ret = mapper(aspace->page_table, v_addr, p_addr, attr);
         MM_PGTBL_UNLOCK(aspace);
         MM_PGTBL_UNLOCK(aspace);