فهرست منبع

aarch64 libcpu更新,增加smp支持

shaojinchun 4 سال پیش
والد
کامیت
f1989fa56b

+ 166 - 11
libcpu/aarch64/common/context_gcc.S

@@ -12,6 +12,24 @@
 
 #include "asm-fpu.h"
 
+.text
+.weak rt_hw_cpu_id_set
+.type rt_hw_cpu_id_set, @function
+rt_hw_cpu_id_set:
+    mrs x0, mpidr_el1           /* MPIDR_EL1: Multi-Processor Affinity Register */
+    and x0, x0, #15
+    msr tpidr_el1, x0
+    ret
+
+/*
+int rt_hw_cpu_id(void)
+*/
+.global rt_hw_cpu_id
+.type rt_hw_cpu_id, @function
+rt_hw_cpu_id:
+   mrs x0, tpidr_el1           /* MPIDR_EL1: Multi-Processor Affinity Register */
+   ret
+
   /*
  *enable gtimer
  */
@@ -107,12 +125,52 @@ rt_hw_get_gtimer_frq:
     MRS     X29, SP_EL0
     STP     X29, X30, [SP, #-0x10]!
 
-    MOV     X3, #((3 << 6) | 0x4 | 0x1)  /* el1h, disable interrupt */
-    MOV     X2, X30
+    MOV     X19, #((3 << 6) | 0x4 | 0x1)  /* el1h, disable interrupt */
+    MOV     X18, X30
 
-    STP     X2, X3, [SP, #-0x10]!
+    STP     X18, X19, [SP, #-0x10]!
 .endm
 
+#ifdef RT_USING_SMP
+.macro RESTORE_CONTEXT
+    /* Set the SP to point to the stack of the task being restored. */
+    MOV     SP, X0
+
+    BL      lwp_check_exit
+
+    LDP     X2, X3, [SP], #0x10  /* SPSR and ELR. */
+
+    TST     X3, #0x1f
+    MSR     SPSR_EL1, X3
+    MSR     ELR_EL1, X2
+
+    LDP     X29, X30, [SP], #0x10
+    MSR     SP_EL0, X29
+    LDP     X28, X29, [SP], #0x10
+    MSR     FPCR, X28
+    MSR     FPSR, X29
+    LDP     X28, X29, [SP], #0x10
+    LDP     X26, X27, [SP], #0x10
+    LDP     X24, X25, [SP], #0x10
+    LDP     X22, X23, [SP], #0x10
+    LDP     X20, X21, [SP], #0x10
+    LDP     X18, X19, [SP], #0x10
+    LDP     X16, X17, [SP], #0x10
+    LDP     X14, X15, [SP], #0x10
+    LDP     X12, X13, [SP], #0x10
+    LDP     X10, X11, [SP], #0x10
+    LDP     X8, X9, [SP], #0x10
+    LDP     X6, X7, [SP], #0x10
+    LDP     X4, X5, [SP], #0x10
+    LDP     X2, X3, [SP], #0x10
+    LDP     X0, X1, [SP], #0x10
+    RESTORE_FPU SP
+
+    BEQ     ret_to_user
+
+    ERET
+.endm
+#else
 .macro RESTORE_CONTEXT
     /* Set the SP to point to the stack of the task being restored. */
     MOV     SP, X0
@@ -157,6 +215,7 @@ rt_hw_get_gtimer_frq:
 
     ERET
 .endm
+#endif
 
 .macro RESTORE_CONTEXT_WITHOUT_MMU_SWITCH
     /* the SP is already ok */
@@ -195,6 +254,11 @@ rt_hw_get_gtimer_frq:
     ERET
 .endm
 
+#ifdef RT_USING_SMP
+#define rt_hw_interrupt_disable rt_hw_local_irq_disable
+#define rt_hw_interrupt_enable rt_hw_local_irq_enable
+#endif
+
 .text
 /*
  * rt_base_t rt_hw_interrupt_disable();
@@ -219,6 +283,98 @@ rt_hw_interrupt_enable:
     MSR     DAIF, X0
     RET
 
+.text
+
+#ifdef RT_USING_SMP
+
+/*
+ * void rt_hw_context_switch_to(rt_uint3 to, struct rt_thread *to_thread);
+ * X0 --> to (thread stack)
+ * X1 --> to_thread
+ */
+
+.globl rt_hw_context_switch_to
+rt_hw_context_switch_to:
+    LDR     X0, [X0]
+    MOV     SP, X0
+    MOV     X0, X1
+    BL      rt_cpus_lock_status_restore
+    BL      rt_thread_self
+    BL      lwp_user_setting_restore
+    B       rt_hw_context_switch_exit
+
+/*
+ * void rt_hw_context_switch(rt_uint32 from, rt_uint32
+to, struct rt_thread *to_thread);
+ * X0 --> from (from_thread stack)
+ * X1 --> to (to_thread stack)
+ * X2 --> to_thread
+ */
+.globl rt_hw_context_switch
+rt_hw_context_switch:
+    SAVE_CONTEXT_FROM_EL1
+    MOV    X3, SP
+    STR    X3, [X0]            // store sp in preempted tasks TCB
+    LDR    X0, [X1]            // get new task stack pointer
+    MOV    SP, X0
+    MOV    X0, X2
+    BL     rt_cpus_lock_status_restore
+    BL     rt_thread_self
+    BL     lwp_user_setting_restore
+    B      rt_hw_context_switch_exit
+
+/*
+ * void rt_hw_context_switch_interrupt(context, from sp, to sp, tp tcb)
+ * X0 :interrupt context
+ * X1 :addr of from_thread's sp
+ * X2 :addr of to_thread's sp
+ * X3 :to_thread's tcb
+ */
+.globl rt_hw_context_switch_interrupt
+rt_hw_context_switch_interrupt:
+    STP     X0, X1, [SP, #-0x10]!
+    STP     X2, X3, [SP, #-0x10]!
+    STP     X29, X30, [SP, #-0x10]!
+    BL      rt_thread_self
+    BL      lwp_user_setting_save
+    LDP     X29, X30, [SP], #0x10
+    LDP     X2, X3, [SP], #0x10
+    LDP     X0, X1, [SP], #0x10
+    STR     X0, [X1]
+    LDR     X0, [X2]
+    MOV     SP, X0
+    MOV     X0, X3
+    MOV     X19, X0
+    BL      rt_cpus_lock_status_restore
+    MOV     X0, X19
+    BL      lwp_user_setting_restore
+    B       rt_hw_context_switch_exit
+
+.globl vector_fiq
+vector_fiq:
+    B       .
+
+.globl vector_irq
+vector_irq:
+    CLREX
+    SAVE_CONTEXT
+    STP     X0, X1, [SP, #-0x10]!   /* X0 is thread sp */
+
+    BL      rt_interrupt_enter
+    BL      rt_hw_trap_irq
+    BL      rt_interrupt_leave
+
+    LDP     X0, X1, [SP], #0x10
+    BL      rt_scheduler_do_irq_switch
+    B       rt_hw_context_switch_exit
+
+.global rt_hw_context_switch_exit
+rt_hw_context_switch_exit:
+    MOV     X0, SP
+    RESTORE_CONTEXT
+
+#else
+
 /*
  * void rt_hw_context_switch_to(rt_ubase_t to);
  * X0 --> to sp
@@ -228,7 +384,6 @@ rt_hw_context_switch_to:
     LDR     X0, [X0]
     RESTORE_CONTEXT
 
-.text
 /*
  * void rt_hw_context_switch(rt_ubase_t from, rt_ubase_t to);
  * X0 --> from sp
@@ -254,11 +409,11 @@ rt_hw_context_switch:
 .globl rt_interrupt_to_thread
 .globl rt_hw_context_switch_interrupt
 rt_hw_context_switch_interrupt:
-    LDR     X6, =rt_thread_switch_interrupt_flag
+    ADR     X6, rt_thread_switch_interrupt_flag
     LDR     X7, [X6]
     CMP     X7, #1
     B.EQ     _reswitch
-    LDR     X4, =rt_interrupt_from_thread   // set rt_interrupt_from_thread
+    ADR     X4, rt_interrupt_from_thread   // set rt_interrupt_from_thread
     STR     X0, [X4]
     MOV     X7, #1              // set rt_thread_switch_interrupt_flag to 1
     STR     X7, [X6]
@@ -267,7 +422,7 @@ rt_hw_context_switch_interrupt:
     BL      lwp_user_setting_save
     LDP     X1, X30, [SP], #0x10
 _reswitch:
-    LDR     X6, =rt_interrupt_to_thread     // set rt_interrupt_to_thread
+    ADR     X6, rt_interrupt_to_thread     // set rt_interrupt_to_thread
     STR     X1, [X6]
     RET
 
@@ -307,7 +462,7 @@ vector_irq:
 
     // if rt_thread_switch_interrupt_flag set, jump to
     // rt_hw_context_switch_interrupt_do and don't return
-    LDR     X1, =rt_thread_switch_interrupt_flag
+    ADR     X1, rt_thread_switch_interrupt_flag
     LDR     X2, [X1]
     CMP     X2, #1
     B.NE    vector_irq_exit
@@ -315,11 +470,11 @@ vector_irq:
     MOV     X2,  #0         // clear flag
     STR     X2,  [X1]
 
-    LDR     X3,  =rt_interrupt_from_thread
+    ADR     X3,  rt_interrupt_from_thread
     LDR     X4,  [X3]
     STR     x0,  [X4]       // store sp in preempted tasks's TCB
 
-    LDR     x3,  =rt_interrupt_to_thread
+    ADR     x3,  rt_interrupt_to_thread
     LDR     X4,  [X3]
     LDR     x0,  [X4]       // get new task's stack pointer
 
@@ -328,6 +483,7 @@ vector_irq:
 vector_irq_exit:
     MOV     SP, X0
     RESTORE_CONTEXT_WITHOUT_MMU_SWITCH
+#endif
 
 // -------------------------------------------------
 
@@ -371,4 +527,3 @@ switch_mmu:
 mmu_table_get:
     MRS X0, TTBR0_EL1
     RET
-

+ 0 - 101
libcpu/aarch64/common/cp15.h

@@ -47,107 +47,6 @@ __STATIC_FORCEINLINE  void __DMB(void)
     __asm__ volatile ("dmb 0xF":::"memory");
 }
 
-#ifdef RT_USING_SMP
-static inline void send_ipi_msg(int cpu, int ipi_vector)
-{
-    IPI_MAILBOX_SET(cpu) = 1 << ipi_vector;
-}
-
-static inline void setup_bootstrap_addr(int cpu, int addr)
-{
-	CORE_MAILBOX3_SET(cpu) = addr;
-}
-
-static inline void enable_cpu_ipi_intr(int cpu)
-{
-    COREMB_INTCTL(cpu) = IPI_MAILBOX_INT_MASK;
-}
-
-static inline void enable_cpu_timer_intr(int cpu)
-{
-	CORETIMER_INTCTL(cpu) = 0x8;
-}
-
-static inline void enable_cntv(void)
-{
-    rt_uint32_t cntv_ctl;
-    cntv_ctl = 1;
-    asm volatile ("mcr p15, 0, %0, c14, c3, 1" :: "r"(cntv_ctl) ); // write CNTV_CTL
-}
-
-static inline void disable_cntv(void)
-{
-    rt_uint32_t cntv_ctl;
-    cntv_ctl = 0;
-    asm volatile ("mcr p15, 0, %0, c14, c3, 1" :: "r"(cntv_ctl) ); // write CNTV_CTL
-}
-
-static inline  void mask_cntv(void)
-{
-    rt_uint32_t cntv_ctl;
-    cntv_ctl = 2;
-    asm volatile ("mcr p15, 0, %0, c14, c3, 1" :: "r"(cntv_ctl) ); // write CNTV_CTL
-}
-
-static inline void unmask_cntv(void)
-{
-    rt_uint32_t cntv_ctl;
-    cntv_ctl = 1;
-    asm volatile ("mcr p15, 0, %0, c14, c3, 1" :: "r"(cntv_ctl) ); // write CNTV_CTL
-}
-
-static inline rt_uint64_t read_cntvct(void)
-{
-    rt_uint32_t val,val1;
-    asm volatile("mrrc p15, 1, %0, %1, c14" : "=r" (val),"=r" (val1));
-    return (val);
-}
-
-static inline rt_uint64_t read_cntvoff(void)
-{
-
-    rt_uint64_t val;
-    asm volatile("mrrc p15, 4, %Q0, %R0, c14" : "=r" (val));
-    return (val);
-}
-
-static inline rt_uint32_t read_cntv_tval(void)
-{
-    rt_uint32_t val;
-    asm volatile ("mrc p15, 0, %0, c14, c3, 0" : "=r"(val) );
-    return val;
-}
-
-
-static inline  void write_cntv_tval(rt_uint32_t val)
-{
-    asm volatile ("mcr p15, 0, %0, c14, c3, 0" :: "r"(val) );
-    return;
-}
-
-static inline rt_uint32_t read_cntfrq(void)
-{
-    rt_uint32_t val;
-    asm volatile ("mrc p15, 0, %0, c14, c0, 0" : "=r"(val) );
-    return val;
-}
-
-
-static inline  rt_uint32_t read_cntctrl(void)
-{
-    rt_uint32_t val;
-    asm volatile ("mrc p15, 0, %0, c14, c1, 0" : "=r"(val) );
-    return val;
-}
-
-static inline uint32_t write_cntctrl(uint32_t val)
-{
-
-    asm volatile ("mcr p15, 0, %0, c14, c1, 0" : :"r"(val) );
-    return val;
-}
-#endif
-
 unsigned long rt_cpu_get_smp_id(void);
 
 void rt_cpu_mmu_disable(void);

+ 34 - 40
libcpu/aarch64/common/cpu.c

@@ -14,59 +14,53 @@
 #include <board.h>
 #include "cp15.h"
 
-int rt_hw_cpu_id(void)
-{
-    int cpu_id;
-    rt_base_t value;
-
-    __asm__ volatile (
-            "mrs %0, mpidr_el1"
-            :"=r"(value)
-            );
-    cpu_id = value & 0xf;
-    return cpu_id;
-};
-
 #ifdef RT_USING_SMP
 void rt_hw_spin_lock_init(rt_hw_spinlock_t *lock)
 {
     lock->slock = 0;
 }
 
+#define TICKET_SHIFT    16
 void rt_hw_spin_lock(rt_hw_spinlock_t *lock)
 {
-    unsigned long tmp;
-    unsigned long newval;
-    rt_hw_spinlock_t lockval;
-    __asm__ __volatile__(
-            "pld [%0]"
-            ::"r"(&lock->slock)
-            );
-
-    __asm__ __volatile__(
-            "1: ldrex   %0, [%3]\n"
-            "   add %1, %0, %4\n"
-            "   strex   %2, %1, [%3]\n"
-            "   teq %2, #0\n"
-            "   bne 1b"
-            : "=&r" (lockval), "=&r" (newval), "=&r" (tmp)
-            : "r" (&lock->slock), "I" (1 << 16)
-            : "cc");
-
-    while (lockval.tickets.next != lockval.tickets.owner) {
-        __WFE();
-        lockval.tickets.owner = *(volatile unsigned short *)(&lock->tickets.owner);
-    }
+    unsigned int tmp;
+    struct __arch_tickets lockval, newval;
 
-    __DMB();
+    asm volatile(
+        /* Atomically increment the next ticket. */
+        "   prfm    pstl1strm, %3\n"
+        "1: ldaxr   %w0, %3\n"
+        "   add %w1, %w0, %w5\n"
+        "   stxr    %w2, %w1, %3\n"
+        "   cbnz    %w2, 1b\n"
+        /* Did we get the lock? */
+        "   eor %w1, %w0, %w0, ror #16\n"
+        "   cbz %w1, 3f\n"
+        /*
+         * No: spin on the owner. Send a local event to avoid missing an
+         * unlock before the exclusive load.
+         */
+        "   sevl\n"
+        "2: wfe\n"
+        "   ldaxrh  %w2, %4\n"
+        "   eor %w1, %w2, %w0, lsr #16\n"
+        "   cbnz    %w1, 2b\n"
+        /* We got the lock. Critical section starts here. */
+        "3:"
+        : "=&r"(lockval), "=&r"(newval), "=&r"(tmp), "+Q"(*lock)
+        : "Q"(lock->tickets.owner), "I"(1 << TICKET_SHIFT)
+        : "memory");
+    rt_hw_dmb();
 }
 
 void rt_hw_spin_unlock(rt_hw_spinlock_t *lock)
 {
-    __DMB();
-    lock->tickets.owner++;
-    __DSB();
-    __SEV();
+    rt_hw_dmb();
+    asm volatile(
+        "   stlrh   %w1, %0\n"
+        : "=Q"(lock->tickets.owner)
+        : "r"(lock->tickets.owner + 1)
+        : "memory");
 }
 #endif /*RT_USING_SMP*/
 

+ 10 - 0
libcpu/aarch64/common/cpuport.h

@@ -12,6 +12,16 @@
 
 #include <armv8.h>
 
+#ifdef RT_USING_SMP
+typedef union {
+    unsigned long slock;
+    struct __arch_tickets {
+        unsigned short owner;
+        unsigned short next;
+    } tickets;
+} rt_hw_spinlock_t;
+#endif
+
 rt_inline void rt_hw_isb(void)
 {
     asm volatile ("isb":::"memory");

+ 12 - 7
libcpu/aarch64/common/exception.c

@@ -3,12 +3,16 @@
 static void data_abort(unsigned long far, unsigned long iss)
 {
     rt_kprintf("fault addr = 0x%016lx\n", far);
-    if (iss & 0x40) {
+    if (iss & 0x40)
+    {
         rt_kprintf("abort caused by write instruction\n");
-    } else {
+    }
+    else
+    {
         rt_kprintf("abort caused by read instruction\n");
     }
-    switch (iss & 0x3f) {
+    switch (iss & 0x3f)
+    {
     case 0b000000:
         rt_kprintf("Address size fault, zeroth level of translation or translation table base register\n");
         break;
@@ -142,12 +146,13 @@ void process_exception(unsigned long esr, unsigned long epc)
     unsigned long fault_addr;
     rt_kprintf("\nexception info:\n");
     ec = (unsigned char)((esr >> 26) & 0x3fU);
-    iss = (unsigned int )(esr & 0x00ffffffU);
+    iss = (unsigned int)(esr & 0x00ffffffU);
     rt_kprintf("esr.EC :0x%02x\n", ec);
     rt_kprintf("esr.IL :0x%02x\n", (unsigned char)((esr >> 25) & 0x01U));
     rt_kprintf("esr.ISS:0x%08x\n", iss);
     rt_kprintf("epc    :0x%016p\n", (void *)epc);
-    switch (ec) {
+    switch (ec)
+    {
     case 0x00:
         rt_kprintf("Exceptions with an unknow reason\n");
         break;
@@ -210,13 +215,13 @@ void process_exception(unsigned long esr, unsigned long epc)
 
     case 0x24:
         rt_kprintf("Data abort from a lower Exception level\n");
-        __asm__ volatile ("mrs %0, far_el1":"=r"(fault_addr));
+        __asm__ volatile("mrs %0, far_el1":"=r"(fault_addr));
         data_abort(fault_addr, iss);
         break;
 
     case 0x25:
         rt_kprintf("Data abort\n");
-        __asm__ volatile ("mrs %0, far_el1":"=r"(fault_addr));
+        __asm__ volatile("mrs %0, far_el1":"=r"(fault_addr));
         data_abort(fault_addr, iss);
         break;
 

+ 121 - 101
libcpu/aarch64/common/mmu.c

@@ -12,7 +12,6 @@
 #include <rthw.h>
 #include <board.h>
 
-#include "cp15.h"
 #include "mmu.h"
 
 #ifdef RT_USING_USERSPACE
@@ -35,7 +34,7 @@
 #define MMU_TBL_PAGE_4k_LEVEL  3
 #define MMU_TBL_LEVEL_NR       4
 
-void *_rt_hw_mmu_v2p(rt_mmu_info *mmu_info, void* v_addr);
+void *_rt_hw_mmu_v2p(rt_mmu_info *mmu_info, void *v_addr);
 
 struct page_table
 {
@@ -48,13 +47,12 @@ unsigned long get_free_page(void)
 {
     if (!__init_page_array)
     {
-        __init_page_array = (struct page_table *)(((unsigned long)HEAP_BEGIN + ARCH_PAGE_MASK) & ~(ARCH_PAGE_MASK));
+        unsigned long temp_page_start;
+        asm volatile("mov %0, sp":"=r"(temp_page_start));
+        __init_page_array = (struct page_table *)(temp_page_start & ~(ARCH_SECTION_MASK));
+        __page_off = 2; /* 0, 1 for ttbr0, ttrb1 */
     }
     __page_off++;
-    if (__init_page_array + __page_off > (struct page_table *)HEAP_END)
-    {
-        return 0;
-    }
     return (unsigned long)(__init_page_array[__page_off - 1].page);
 }
 
@@ -66,10 +64,10 @@ void mmu_memset(char *dst, char v,  size_t len)
     }
 }
 
-static int _map_single_page_2M(unsigned long* lv0_tbl, unsigned long va, unsigned long pa, unsigned long attr)
+static int _map_single_page_2M(unsigned long *lv0_tbl, unsigned long va, unsigned long pa, unsigned long attr)
 {
     int level;
-    unsigned long* cur_lv_tbl = lv0_tbl;
+    unsigned long *cur_lv_tbl = lv0_tbl;
     unsigned long page;
     unsigned long off;
     int level_shift = MMU_ADDRESS_BITS;
@@ -99,21 +97,21 @@ static int _map_single_page_2M(unsigned long* lv0_tbl, unsigned long va, unsigne
         page = cur_lv_tbl[off];
         if ((page & MMU_TYPE_MASK) == MMU_TYPE_BLOCK)
         {
-            //is block! error!
+            /* is block! error! */
             return MMU_MAP_ERROR_CONFLICT;
         }
-        cur_lv_tbl = (unsigned long*)(page & MMU_ADDRESS_MASK);
+        cur_lv_tbl = (unsigned long *)(page & MMU_ADDRESS_MASK);
         level_shift -= MMU_LEVEL_SHIFT;
     }
     attr &= MMU_ATTRIB_MASK;
-    pa |= (attr | MMU_TYPE_BLOCK); //block
+    pa |= (attr | MMU_TYPE_BLOCK); /* block */
     off = (va >> ARCH_SECTION_SHIFT);
     off &= MMU_LEVEL_MASK;
     cur_lv_tbl[off] = pa;
     return 0;
 }
 
-int armv8_init_map_2M(unsigned long* lv0_tbl, unsigned long va, unsigned long pa, unsigned long count, unsigned long attr)
+int armv8_init_map_2M(unsigned long *lv0_tbl, unsigned long va, unsigned long pa, unsigned long count, unsigned long attr)
 {
     unsigned long i;
     int ret;
@@ -139,10 +137,10 @@ int armv8_init_map_2M(unsigned long* lv0_tbl, unsigned long va, unsigned long pa
     return 0;
 }
 
-static int _kenrel_map_2M(unsigned long* lv0_tbl, unsigned long va, unsigned long pa, unsigned long attr)
+static int _kenrel_map_2M(unsigned long *lv0_tbl, unsigned long va, unsigned long pa, unsigned long attr)
 {
     int level;
-    unsigned long* cur_lv_tbl = lv0_tbl;
+    unsigned long *cur_lv_tbl = lv0_tbl;
     unsigned long page;
     unsigned long off;
     int level_shift = MMU_ADDRESS_BITS;
@@ -182,7 +180,7 @@ static int _kenrel_map_2M(unsigned long* lv0_tbl, unsigned long va, unsigned lon
         page = cur_lv_tbl[off];
         if ((page & MMU_TYPE_MASK) == MMU_TYPE_BLOCK)
         {
-            //is block! error!
+            /* is block! error! */
             return MMU_MAP_ERROR_CONFLICT;
         }
         cur_lv_tbl = (unsigned long *)(page & MMU_ADDRESS_MASK);
@@ -190,11 +188,12 @@ static int _kenrel_map_2M(unsigned long* lv0_tbl, unsigned long va, unsigned lon
         level_shift -= MMU_LEVEL_SHIFT;
     }
     attr &= MMU_ATTRIB_MASK;
-    pa |= (attr | MMU_TYPE_BLOCK); //block
+    pa |= (attr | MMU_TYPE_BLOCK); /* block */
     off = (va >> ARCH_SECTION_SHIFT);
     off &= MMU_LEVEL_MASK;
     cur_lv_tbl[off] = pa;
     rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, cur_lv_tbl + off, sizeof(void *));
+
     return 0;
 }
 
@@ -204,11 +203,11 @@ struct mmu_level_info
     void *page;
 };
 
-static void _kenrel_unmap_4K(unsigned long* lv0_tbl, void* v_addr)
+static void _kenrel_unmap_4K(unsigned long *lv0_tbl, void *v_addr)
 {
     int level;
     unsigned long va = (unsigned long)v_addr;
-    unsigned long* cur_lv_tbl = lv0_tbl;
+    unsigned long *cur_lv_tbl = lv0_tbl;
     unsigned long page;
     unsigned long off;
     struct mmu_level_info level_info[4];
@@ -216,7 +215,7 @@ static void _kenrel_unmap_4K(unsigned long* lv0_tbl, void* v_addr)
     int level_shift = MMU_ADDRESS_BITS;
     unsigned long *pos;
 
-    rt_memset(level_info, 0 , sizeof level_info);
+    rt_memset(level_info, 0, sizeof level_info);
     for (level = 0; level < MMU_TBL_LEVEL_NR; level++)
     {
         off = (va >> level_shift);
@@ -231,7 +230,7 @@ static void _kenrel_unmap_4K(unsigned long* lv0_tbl, void* v_addr)
             break;
         }
         level_info[level].pos = cur_lv_tbl + off;
-        cur_lv_tbl = (unsigned long*)(page & MMU_ADDRESS_MASK);
+        cur_lv_tbl = (unsigned long *)(page & MMU_ADDRESS_MASK);
         cur_lv_tbl = (unsigned long *)((unsigned long)cur_lv_tbl - PV_OFFSET);
         level_info[level].page = cur_lv_tbl;
         level_shift -= MMU_LEVEL_SHIFT;
@@ -241,7 +240,7 @@ static void _kenrel_unmap_4K(unsigned long* lv0_tbl, void* v_addr)
     pos = level_info[level].pos;
     if (pos)
     {
-        *pos = RT_NULL;
+        *pos = (unsigned long)RT_NULL;
         rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, pos, sizeof(void *));
     }
     level--;
@@ -255,7 +254,7 @@ static void _kenrel_unmap_4K(unsigned long* lv0_tbl, void* v_addr)
             ref = rt_page_ref_get(cur_page, 0);
             if (ref == 1)
             {
-                *pos = RT_NULL;
+                *pos = (unsigned long)RT_NULL;
                 rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, pos, sizeof(void *));
             }
             rt_pages_free(cur_page, 0);
@@ -266,11 +265,11 @@ static void _kenrel_unmap_4K(unsigned long* lv0_tbl, void* v_addr)
     return;
 }
 
-static int _kenrel_map_4K(unsigned long* lv0_tbl, unsigned long va, unsigned long pa, unsigned long attr)
+static int _kenrel_map_4K(unsigned long *lv0_tbl, unsigned long va, unsigned long pa, unsigned long attr)
 {
     int ret = 0;
     int level;
-    unsigned long* cur_lv_tbl = lv0_tbl;
+    unsigned long *cur_lv_tbl = lv0_tbl;
     unsigned long page;
     unsigned long off;
     int level_shift = MMU_ADDRESS_BITS;
@@ -311,7 +310,7 @@ static int _kenrel_map_4K(unsigned long* lv0_tbl, unsigned long va, unsigned lon
         page = cur_lv_tbl[off];
         if ((page & MMU_TYPE_MASK) == MMU_TYPE_BLOCK)
         {
-            //is block! error!
+            /* is block! error! */
             ret = MMU_MAP_ERROR_CONFLICT;
             goto err;
         }
@@ -319,12 +318,12 @@ static int _kenrel_map_4K(unsigned long* lv0_tbl, unsigned long va, unsigned lon
         cur_lv_tbl = (unsigned long *)((unsigned long)cur_lv_tbl - PV_OFFSET);
         level_shift -= MMU_LEVEL_SHIFT;
     }
-    //now is level page
+    /* now is level page */
     attr &= MMU_ATTRIB_MASK;
-    pa |= (attr | MMU_TYPE_PAGE); //page
+    pa |= (attr | MMU_TYPE_PAGE); /* page */
     off = (va >> ARCH_PAGE_SHIFT);
     off &= MMU_LEVEL_MASK;
-    cur_lv_tbl[off] = pa; //page
+    cur_lv_tbl[off] = pa; /* page */
     rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, cur_lv_tbl + off, sizeof(void *));
     return ret;
 err:
@@ -332,7 +331,7 @@ err:
     return ret;
 }
 
-int kernel_map_fixed(unsigned long* lv0_tbl, unsigned long va, unsigned long pa, unsigned long count, unsigned long attr)
+int kernel_map_fixed(unsigned long *lv0_tbl, unsigned long va, unsigned long pa, unsigned long count, unsigned long attr)
 {
     unsigned long i;
     int ret;
@@ -365,33 +364,33 @@ int kernel_map_fixed(unsigned long* lv0_tbl, unsigned long va, unsigned long pa,
   index 1 : memory nocache
   index 2 : device nGnRnE
  *****************************************************/
-void mmu_tcr_init(void *tbl0, void *tbl1)
+void mmu_tcr_init(void)
 {
     unsigned long val64;
 
     val64 = 0x00447fUL;
     __asm__ volatile("msr MAIR_EL1, %0\n dsb sy\n"::"r"(val64));
 
-    //TCR_EL1
-    val64 = (16UL << 0)  //t0sz 48bit
-        | (0x0UL << 6)   //reserved
-        | (0x0UL << 7)   //epd0
-        | (0x3UL << 8)   //t0 wb cacheable
-        | (0x3UL << 10)  //inner shareable
-        | (0x2UL << 12)  //t0 outer shareable
-        | (0x0UL << 14)  //t0 4K
-        | (16UL << 16)   //t1sz 48bit
-        | (0x0UL << 22)  //define asid use ttbr0.asid
-        | (0x0UL << 23)  //epd1
-        | (0x3UL << 24)  //t1 inner wb cacheable
-        | (0x3UL << 26)  //t1 outer wb cacheable
-        | (0x2UL << 28)  //t1 outer shareable
-        | (0x2UL << 30)  //t1 4k
-        | (0x1UL << 32)  //001b 64GB PA
-        | (0x0UL << 35)  //reserved
-        | (0x1UL << 36)  //as: 0:8bit 1:16bit
-        | (0x0UL << 37)  //tbi0
-        | (0x0UL << 38); //tbi1
+    /* TCR_EL1 */
+    val64 = (16UL << 0)  /* t0sz 48bit */
+            | (0x0UL << 6)   /* reserved */
+            | (0x0UL << 7)   /* epd0 */
+            | (0x3UL << 8)   /* t0 wb cacheable */
+            | (0x3UL << 10)  /* inner shareable */
+            | (0x2UL << 12)  /* t0 outer shareable */
+            | (0x0UL << 14)  /* t0 4K */
+            | (16UL << 16)   /* t1sz 48bit */
+            | (0x0UL << 22)  /* define asid use ttbr0.asid */
+            | (0x0UL << 23)  /* epd1 */
+            | (0x3UL << 24)  /* t1 inner wb cacheable */
+            | (0x3UL << 26)  /* t1 outer wb cacheable */
+            | (0x2UL << 28)  /* t1 outer shareable */
+            | (0x2UL << 30)  /* t1 4k */
+            | (0x1UL << 32)  /* 001b 64GB PA */
+            | (0x0UL << 35)  /* reserved */
+            | (0x1UL << 36)  /* as: 0:8bit 1:16bit */
+            | (0x0UL << 37)  /* tbi0 */
+            | (0x0UL << 38); /* tbi1 */
     __asm__ volatile("msr TCR_EL1, %0\n"::"r"(val64));
 }
 
@@ -404,11 +403,11 @@ void rt_hw_cpu_dump_page_table(rt_uint32_t *ptb)
 {
 }
 
-volatile unsigned long MMUTable[512] __attribute__((aligned(4*1024)));
+volatile unsigned long MMUTable[512] __attribute__((aligned(4 * 1024)));
 void rt_hw_mmu_setmtt(unsigned long vaddrStart,
-        unsigned long vaddrEnd,
-        unsigned long paddrStart,
-        unsigned long attr)
+                      unsigned long vaddrEnd,
+                      unsigned long paddrStart,
+                      unsigned long attr)
 {
     unsigned long count;
 
@@ -438,7 +437,7 @@ void rt_hw_mmu_setmtt(unsigned long vaddrStart,
     kernel_map_fixed((unsigned long *)MMUTable, vaddrStart, paddrStart, count, attr);
 }
 
-static void kernel_mmu_switch(unsigned long tbl)
+void kernel_mmu_switch(unsigned long tbl)
 {
     tbl += PV_OFFSET;
     __asm__ volatile("msr TTBR1_EL1, %0\n dsb sy\nisb"::"r"(tbl):"memory");
@@ -449,20 +448,28 @@ static void kernel_mmu_switch(unsigned long tbl)
 void rt_hw_mmu_setup(struct mem_desc *mdesc, int desc_nr)
 {
     /* set page table */
-    for(; desc_nr > 0; desc_nr--)
+    for (; desc_nr > 0; desc_nr--)
     {
         rt_hw_mmu_setmtt(mdesc->vaddr_start, mdesc->vaddr_end,
-                mdesc->paddr_start, mdesc->attr);
+                         mdesc->paddr_start, mdesc->attr);
         mdesc++;
     }
     rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, (void *)MMUTable, sizeof MMUTable);
     kernel_mmu_switch((unsigned long)MMUTable);
 }
 
-/*
-   mem map
-   */
-int rt_hw_mmu_map_init(rt_mmu_info *mmu_info, void* v_address, size_t size, size_t *vtable, size_t pv_off)
+/**
+ * This function will initialize rt_mmu_info structure.
+ *
+ * @param mmu_info   rt_mmu_info structure
+ * @param v_address  virtual address
+ * @param size       map size
+ * @param vtable     mmu table
+ * @param pv_off     pv offset in kernel space
+ *
+ * @return 0 on successful and -1 for fail
+ */
+int rt_hw_mmu_map_init(rt_mmu_info *mmu_info, void *v_address, size_t size, size_t *vtable, size_t pv_off)
 {
     rt_base_t level;
     size_t va_s, va_e;
@@ -475,7 +482,7 @@ int rt_hw_mmu_map_init(rt_mmu_info *mmu_info, void* v_address, size_t size, size
     va_s = (size_t)v_address;
     va_e = (size_t)v_address + size - 1;
 
-    if ( va_e < va_s)
+    if (va_e < va_s)
     {
         return -1;
     }
@@ -500,7 +507,7 @@ int rt_hw_mmu_map_init(rt_mmu_info *mmu_info, void* v_address, size_t size, size
     return 0;
 }
 
-int rt_hw_mmu_ioremap_init(rt_mmu_info *mmu_info, void* v_address, size_t size)
+int rt_hw_mmu_ioremap_init(rt_mmu_info *mmu_info, void *v_address, size_t size)
 {
     return 0;
 }
@@ -587,7 +594,7 @@ static int check_vaddr(rt_mmu_info *mmu_info, void *va, int pages)
 }
 #endif
 
-static void __rt_hw_mmu_unmap(rt_mmu_info *mmu_info, void* v_addr, size_t npages)
+static void __rt_hw_mmu_unmap(rt_mmu_info *mmu_info, void *v_addr, size_t npages)
 {
     size_t loop_va = (size_t)v_addr & ~ARCH_PAGE_MASK;
 
@@ -603,23 +610,33 @@ static void __rt_hw_mmu_unmap(rt_mmu_info *mmu_info, void* v_addr, size_t npages
     }
 }
 
-static int __rt_hw_mmu_map(rt_mmu_info *mmu_info, void* v_addr, void* p_addr, size_t npages, size_t attr)
+static int __rt_hw_mmu_map(rt_mmu_info *mmu_info, void *v_addr, void *p_addr, size_t npages, size_t attr)
 {
+    int ret = -1;
     size_t loop_va = (size_t)v_addr & ~ARCH_PAGE_MASK;
     size_t loop_pa = (size_t)p_addr & ~ARCH_PAGE_MASK;
+    size_t unmap_va = loop_va;
 
-    if (!mmu_info)
-    {
-        return -1;
-    }
-
-    while (npages--)
+    if (mmu_info)
     {
-        _kenrel_map_4K(mmu_info->vtable, loop_va, loop_pa, attr);
-        loop_va += ARCH_PAGE_SIZE;
-        loop_pa += ARCH_PAGE_SIZE;
+        while (npages--)
+        {
+            ret = _kenrel_map_4K(mmu_info->vtable, loop_va, loop_pa, attr);
+            if (ret != 0)
+            {
+                /* error, undo map */
+                while (unmap_va != loop_va)
+                {
+                    _kenrel_unmap_4K(mmu_info->vtable, (void *)unmap_va);
+                    unmap_va += ARCH_PAGE_SIZE;
+                }
+                break;
+            }
+            loop_va += ARCH_PAGE_SIZE;
+            loop_pa += ARCH_PAGE_SIZE;
+        }
     }
-    return 0;
+    return ret;
 }
 
 static void rt_hw_cpu_tlb_invalidate(void)
@@ -628,7 +645,7 @@ static void rt_hw_cpu_tlb_invalidate(void)
 }
 
 #ifdef RT_USING_USERSPACE
-void *_rt_hw_mmu_map(rt_mmu_info *mmu_info, void *v_addr, void* p_addr, size_t size, size_t attr)
+void *_rt_hw_mmu_map(rt_mmu_info *mmu_info, void *v_addr, void *p_addr, size_t size, size_t attr)
 {
     size_t pa_s, pa_e;
     size_t vaddr;
@@ -653,7 +670,7 @@ void *_rt_hw_mmu_map(rt_mmu_info *mmu_info, void *v_addr, void* p_addr, size_t s
             return 0;
         }
         vaddr &= ~ARCH_PAGE_MASK;
-        if (check_vaddr(mmu_info, (void*)vaddr, pages) != 0)
+        if (check_vaddr(mmu_info, (void *)vaddr, pages) != 0)
         {
             return 0;
         }
@@ -662,12 +679,13 @@ void *_rt_hw_mmu_map(rt_mmu_info *mmu_info, void *v_addr, void* p_addr, size_t s
     {
         vaddr = find_vaddr(mmu_info, pages);
     }
-    if (vaddr) {
-        ret = __rt_hw_mmu_map(mmu_info, (void*)vaddr, p_addr, pages, attr);
+    if (vaddr)
+    {
+        ret = __rt_hw_mmu_map(mmu_info, (void *)vaddr, p_addr, pages, attr);
         if (ret == 0)
         {
             rt_hw_cpu_tlb_invalidate();
-            return (void*)(vaddr + ((size_t)p_addr & ARCH_PAGE_MASK));
+            return (void *)(vaddr + ((size_t)p_addr & ARCH_PAGE_MASK));
         }
     }
     return 0;
@@ -699,7 +717,7 @@ void *_rt_hw_mmu_map(rt_mmu_info *mmu_info, void* p_addr, size_t size, size_t at
 #endif
 
 #ifdef RT_USING_USERSPACE
-static int __rt_hw_mmu_map_auto(rt_mmu_info *mmu_info, void* v_addr, size_t npages, size_t attr)
+static int __rt_hw_mmu_map_auto(rt_mmu_info *mmu_info, void *v_addr, size_t npages, size_t attr)
 {
     size_t loop_va = (size_t)v_addr & ~ARCH_PAGE_MASK;
     size_t loop_pa;
@@ -727,13 +745,13 @@ err:
         int i;
         void *va, *pa;
 
-        va = (void*)((size_t)v_addr & ~ARCH_PAGE_MASK);
+        va = (void *)((size_t)v_addr & ~ARCH_PAGE_MASK);
         for (i = 0; i < npages; i++)
         {
             pa = rt_hw_mmu_v2p(mmu_info, va);
-            pa = (void*)((char*)pa - mmu_info->pv_off);
+            pa = (void *)((char *)pa - mmu_info->pv_off);
             rt_pages_free(pa, 0);
-            va = (void*)((char*)va + ARCH_PAGE_SIZE);
+            va = (void *)((char *)va + ARCH_PAGE_SIZE);
         }
 
         __rt_hw_mmu_unmap(mmu_info, v_addr, npages);
@@ -759,7 +777,7 @@ void *_rt_hw_mmu_map_auto(rt_mmu_info *mmu_info, void *v_addr, size_t size, size
     {
         vaddr = (size_t)v_addr;
         vaddr &= ~ARCH_PAGE_MASK;
-        if (check_vaddr(mmu_info, (void*)vaddr, pages) != 0)
+        if (check_vaddr(mmu_info, (void *)vaddr, pages) != 0)
         {
             return 0;
         }
@@ -768,19 +786,20 @@ void *_rt_hw_mmu_map_auto(rt_mmu_info *mmu_info, void *v_addr, size_t size, size
     {
         vaddr = find_vaddr(mmu_info, pages);
     }
-    if (vaddr) {
-        ret = __rt_hw_mmu_map_auto(mmu_info, (void*)vaddr, pages, attr);
+    if (vaddr)
+    {
+        ret = __rt_hw_mmu_map_auto(mmu_info, (void *)vaddr, pages, attr);
         if (ret == 0)
         {
             rt_hw_cpu_tlb_invalidate();
-            return (void*)((char*)vaddr + offset);
+            return (void *)((char *)vaddr + offset);
         }
     }
     return 0;
 }
 #endif
 
-void _rt_hw_mmu_unmap(rt_mmu_info *mmu_info, void* v_addr, size_t size)
+void _rt_hw_mmu_unmap(rt_mmu_info *mmu_info, void *v_addr, size_t size)
 {
     size_t va_s, va_e;
     int pages;
@@ -795,7 +814,7 @@ void _rt_hw_mmu_unmap(rt_mmu_info *mmu_info, void* v_addr, size_t size)
 }
 
 #ifdef RT_USING_USERSPACE
-void *rt_hw_mmu_map(rt_mmu_info *mmu_info, void *v_addr, void* p_addr, size_t size, size_t attr)
+void *rt_hw_mmu_map(rt_mmu_info *mmu_info, void *v_addr, void *p_addr, size_t size, size_t attr)
 {
     void *ret;
     rt_base_t level;
@@ -818,7 +837,7 @@ void *rt_hw_mmu_map_auto(rt_mmu_info *mmu_info, void *v_addr, size_t size, size_
 }
 #endif
 
-void rt_hw_mmu_unmap(rt_mmu_info *mmu_info, void* v_addr, size_t size)
+void rt_hw_mmu_unmap(rt_mmu_info *mmu_info, void *v_addr, size_t size)
 {
     rt_base_t level;
 
@@ -827,12 +846,12 @@ void rt_hw_mmu_unmap(rt_mmu_info *mmu_info, void* v_addr, size_t size)
     rt_hw_interrupt_enable(level);
 }
 
-void *_rt_hw_mmu_v2p(rt_mmu_info *mmu_info, void* v_addr)
+void *_rt_hw_mmu_v2p(rt_mmu_info *mmu_info, void *v_addr)
 {
     int level;
     unsigned long va = (unsigned long)v_addr;
     unsigned long pa;
-    unsigned long* cur_lv_tbl;
+    unsigned long *cur_lv_tbl;
     unsigned long page;
     unsigned long off;
     unsigned long off_addr;
@@ -840,7 +859,7 @@ void *_rt_hw_mmu_v2p(rt_mmu_info *mmu_info, void* v_addr)
 
     if (!mmu_info)
     {
-        return (void*)0;
+        return (void *)0;
     }
     cur_lv_tbl = mmu_info->vtable;
     for (level = 0; level < MMU_TBL_PAGE_4k_LEVEL; level++)
@@ -859,11 +878,11 @@ void *_rt_hw_mmu_v2p(rt_mmu_info *mmu_info, void* v_addr)
             pa += off_addr;
             return (void *)pa;
         }
-        cur_lv_tbl = (unsigned long*)(page & MMU_ADDRESS_MASK);
+        cur_lv_tbl = (unsigned long *)(page & MMU_ADDRESS_MASK);
         cur_lv_tbl = (unsigned long *)((unsigned long)cur_lv_tbl - PV_OFFSET);
         level_shift -= MMU_LEVEL_SHIFT;
     }
-    //now is level MMU_TBL_PAGE_4k_LEVEL
+    /* now is level MMU_TBL_PAGE_4k_LEVEL */
     off = (va >> ARCH_PAGE_SHIFT);
     off &= MMU_LEVEL_MASK;
     page = cur_lv_tbl[off];
@@ -876,7 +895,7 @@ void *_rt_hw_mmu_v2p(rt_mmu_info *mmu_info, void* v_addr)
     return (void *)pa;
 }
 
-void *rt_hw_mmu_v2p(rt_mmu_info *mmu_info, void* v_addr)
+void *rt_hw_mmu_v2p(rt_mmu_info *mmu_info, void *v_addr)
 {
     void *ret;
     rt_base_t level;
@@ -895,10 +914,11 @@ void rt_hw_mmu_setup_early(unsigned long *tbl0, unsigned long *tbl1, unsigned lo
     unsigned long count = (size + ARCH_SECTION_MASK) >> ARCH_SECTION_SHIFT;
     unsigned long normal_attr = MMU_MAP_CUSTOM(MMU_AP_KAUN, NORMAL_MEM);
 
-    mmu_memset((char *)tbl0, 0, sizeof(struct page_table));
-    mmu_memset((char *)tbl1, 0, sizeof(struct page_table));
+    /* clean the first two pages */
+    mmu_memset((char *)tbl0, 0, ARCH_PAGE_SIZE);
+    mmu_memset((char *)tbl1, 0, ARCH_PAGE_SIZE);
 
-    ret = armv8_init_map_2M(tbl1 , va, va + pv_off, count, normal_attr);
+    ret = armv8_init_map_2M(tbl1, va, va + pv_off, count, normal_attr);
     if (ret != 0)
     {
         while (1);

+ 17 - 11
libcpu/aarch64/common/trap.c

@@ -58,7 +58,7 @@ int check_user_stack(unsigned long esr, struct rt_hw_exp_stack *regs)
     case 0x20:
     case 0x21:
     case 0x24:
-        asm volatile ("mrs %0, far_el1":"=r"(dfar));
+        asm volatile("mrs %0, far_el1":"=r"(dfar));
         if (arch_expand_user_stack(dfar))
         {
             ret = 1;
@@ -186,7 +186,7 @@ void rt_hw_trap_irq(void)
     }
 #else
     void *param;
-    int ir;
+    int ir, ir_self;
     rt_isr_handler_t isr_func;
     extern struct rt_irq_desc isr_table[];
 
@@ -198,17 +198,20 @@ void rt_hw_trap_irq(void)
         return;
     }
 
+    /* bit 10~12 is cpuid, bit 0~9 is interrupt id */
+    ir_self = ir & 0x3ffUL;
+
     /* get interrupt service routine */
-    isr_func = isr_table[ir].handler;
+    isr_func = isr_table[ir_self].handler;
 #ifdef RT_USING_INTERRUPT_INFO
-    isr_table[ir].counter++;
+    isr_table[ir_self].counter++;
 #endif
     if (isr_func)
     {
         /* Interrupt for myself. */
-        param = isr_table[ir].param;
+        param = isr_table[ir_self].param;
         /* turn to interrupt service routine */
-        isr_func(ir, param);
+        isr_func(ir_self, param);
     }
 
     /* end of interrupt */
@@ -219,18 +222,21 @@ void rt_hw_trap_irq(void)
 void rt_hw_trap_fiq(void)
 {
     void *param;
-    int ir;
+    int ir, ir_self;
     rt_isr_handler_t isr_func;
     extern struct rt_irq_desc isr_table[];
 
     ir = rt_hw_interrupt_get_irq();
 
+    /* bit 10~12 is cpuid, bit 0~9 is interrup id */
+    ir_self = ir & 0x3ffUL;
+
     /* get interrupt service routine */
-    isr_func = isr_table[ir].handler;
-    param = isr_table[ir].param;
+    isr_func = isr_table[ir_self].handler;
+    param = isr_table[ir_self].param;
 
     /* turn to interrupt service routine */
-    isr_func(ir, param);
+    isr_func(ir_self, param);
 
     /* end of interrupt */
     rt_hw_interrupt_ack(ir);
@@ -243,7 +249,7 @@ void rt_hw_trap_exception(struct rt_hw_exp_stack *regs)
     unsigned long esr;
     unsigned char ec;
 
-    asm volatile ("mrs %0, esr_el1":"=r"(esr));
+    asm volatile("mrs %0, esr_el1":"=r"(esr));
     ec = (unsigned char)((esr >> 26) & 0x3fU);
 
     if (ec == 0x15) /* is 64bit syscall ? */

+ 12 - 11
libcpu/aarch64/common/vector_gcc.S

@@ -18,27 +18,28 @@ system_vectors:
 .align 11
     .set    VBAR, system_vectors
     .org    VBAR
-    // Exception from CurrentEL (EL1) with SP_EL0 (SPSEL=1)
+
+    /*  Exception from CurrentEL (EL1) with SP_EL0 (SPSEL=1) */
     .org (VBAR + 0x00 + 0)
-    B vector_serror      // 			Synchronous
+    B vector_serror                     /* Synchronous */
     .org (VBAR + 0x80 + 0)
-    B vector_serror        //          IRQ/vIRQ
+    B vector_serror                     /* IRQ/vIRQ */
     .org (VBAR + 0x100 + 0)
-    B vector_serror        //          FIQ/vFIQ
+    B vector_serror                     /* FIQ/vFIQ */
     .org (VBAR + 0x180 + 0)
-    B vector_serror      //         Error/vError
+    B vector_serror                     /* Error/vError */
 
-    // Exception from CurrentEL (EL1) with SP_ELn
+    /*  Exception from CurrentEL (EL1) with SP_ELn */
     .org (VBAR + 0x200 + 0)
-    B vector_exception      // 			Synchronous
+    B vector_exception                  /* Synchronous */
     .org (VBAR + 0x280 + 0)
-    B vector_irq    	// 			IRQ/vIRQ
+    B vector_irq    	                /* IRQ/vIRQ */
     .org (VBAR + 0x300 + 0)
-    B vector_fiq        //          FIQ/vFIQ
+    B vector_fiq                        /* FIQ/vFIQ */
     .org (VBAR + 0x380 + 0)
     B vector_serror
 
-    // Exception from lower EL, aarch64
+    /* Exception from lower EL, aarch64 */
     .org (VBAR + 0x400 + 0)
     B vector_exception
     .org (VBAR + 0x480 + 0)
@@ -48,7 +49,7 @@ system_vectors:
     .org (VBAR + 0x580 + 0)
     B vector_serror
 
-    // Exception from lower EL, aarch32
+    /* Exception from lower EL, aarch32 */
     .org (VBAR + 0x600 + 0)
     B vector_serror
     .org (VBAR + 0x680 + 0)

+ 155 - 43
libcpu/aarch64/cortex-a/entry_point.S

@@ -10,14 +10,13 @@
 
 #include "rtconfig.h"
 .section ".text.entrypoint","ax"
-.set EL1_stack,         __el1_stack
 .global __start
 
 __start:
+    bl      rt_hw_cpu_id_set
     /* read cpu id, stop slave cores */
-    mrs     x1, mpidr_el1           /* MPIDR_EL1: Multi-Processor Affinity Register */
-    and     x1, x1, #3
-    cbz     x1, .L__cpu_0           /* .L prefix is the local label in ELF */
+    mrs     x0, tpidr_el1
+    cbz     x0, .L__cpu_0           /* .L prefix is the local label in ELF */
 
     /* cpu id > 0, stop */
     /* cpu id == 0 will also goto here after returned from entry() if possible */
@@ -27,9 +26,6 @@ __start:
 
 .L__cpu_0:
     /* set stack before our code, Define stack pointer for current exception level */
-    /* ldr	x2, =EL1_stack */
-    /* mov	sp, x2         */
-
     adr     x1, __start
 
     /* set up EL1 */
@@ -52,7 +48,7 @@ __start:
 .L__not_in_el3:                     /* running at EL2 or EL1 */
     cmp     x0, #4                  /* 0x04  0100 EL1 */
     beq     .L__in_el1              /* EL1 -> 5: */
- 
+
     mrs     x0, hcr_el2
     bic     x0, x0, #0xff
     msr     hcr_el2, x0
@@ -88,44 +84,34 @@ __start:
     msr     cpacr_el1, x1
 
     /* clear bss */
-    ldr     x0, =__bss_start
-    ldr     x1, =__bss_end
-    ldr     x2, =PV_OFFSET
-    add     x0, x0, x2
-    add     x1, x1, x2
-
-    sub     x2, x1, x0
-    mov     x3, x1
-    cmp     x2, #7
-    bls     .L__clean_bss_check
-
-.L__clean_bss_loop_quad:
-    str     xzr, [x0], #8
-    sub     x2, x3, x0
-    cmp     x2, #7
-    bhi     .L__clean_bss_loop_quad
-    cmp     x1, x0
-    bls     .L__jump_to_entry
-
-.L__clean_bss_loop_byte:
-    str     xzr, [x0], #1
-
-.L__clean_bss_check:
-    cmp     x1, x0
-    bhi     .L__clean_bss_loop_byte
+    ldr     x1, =__bss_start     /* get bss start address */
+    ldr     x2, =__bss_end
+    sub     x2, x2, x1            /* get bss size          */
+    add     x1, x1, x9
+
+    and     x3, x2, #7           /* x3 is < 7 */
+    ldr     x4, =~0x7
+    and     x2, x2, x4            /* mask ~7 */
+
+.L__clean_bss_loop:
+    cbz     x2, .L__clean_bss_loop_1
+    str     xzr, [x1], #8
+    sub     x2, x2, #8
+    b       .L__clean_bss_loop
+
+.L__clean_bss_loop_1:
+    cbz     x3, .L__jump_to_entry
+    strb    wzr, [x1], #1
+    sub     x3, x3, #1
+    b       .L__clean_bss_loop_1
 
 .L__jump_to_entry:          /* jump to C code, should not return */        
-
-    bl get_free_page
-    mov x21, x0
-    bl get_free_page
-    mov x20, x0
-
-    mov x1, x21
     bl mmu_tcr_init
 
-    mov x0, x20
-    mov x1, x21
+    adr x1, __start
+    ldr x0, =~0x1fffff
+    and x0, x1, x0
+    add x1, x0, #0x1000
 
     msr ttbr0_el1, x0
     msr ttbr1_el1, x1
@@ -135,7 +121,7 @@ __start:
     ldr x3, =PV_OFFSET
     bl rt_hw_mmu_setup_early
 
-    ldr x30, =after_mmu_enable
+    ldr x30, =after_mmu_enable  /* set LR to after_mmu_enable function, it's a v_addr */
 
     mrs x1, sctlr_el1
     bic x1, x1, #(3 << 3)    /* dis SA, SA0 */
@@ -156,11 +142,13 @@ __start:
     ret
 
 after_mmu_enable:
+#if 0
     mrs x0, tcr_el1          /* disable ttbr0, only using kernel space */
     orr x0, x0, #(1 << 7)
     msr tcr_el1, x0
     msr ttbr0_el1, xzr
     dsb sy
+#endif
 
     mov     x0, #1
     msr     spsel, x0
@@ -168,3 +156,127 @@ after_mmu_enable:
     mov     sp, x1           /* sp_el1 set to _start */
 
     b  rtthread_startup
+
+#ifdef RT_USING_SMP
+/**
+ *  secondary cpu
+ */
+
+.globl _secondary_cpu_entry
+_secondary_cpu_entry:
+    bl      rt_hw_cpu_id_set
+    adr     x1, __start
+
+    /* set up EL1 */
+    mrs     x0, CurrentEL           /* CurrentEL Register. bit 2, 3. Others reserved */
+    and     x0, x0, #12             /* clear reserved bits */
+
+    /* running at EL3? */
+    cmp     x0, #12                 /* 1100b. So, EL3 */
+    bne     .L__not_in_el3_cpux          /* 11?  !EL3 -> 5: */
+
+    /* should never be executed, just for completeness. (EL3) */
+    mov     x2, #0x5b1
+    msr     scr_el3, x2             /* SCR_ELn  Secure Configuration Register */
+    mov     x2, #0x3c9
+    msr     spsr_el3, x2            /* SPSR_ELn. Saved Program Status Register. 1111001001 */
+    adr     x2, .L__not_in_el3_cpux
+    msr     elr_el3, x2
+    eret                            /* Exception Return: from EL3, continue from .L__not_in_el3 */
+
+.L__not_in_el3_cpux:                     /* running at EL2 or EL1 */
+    cmp     x0, #4                  /* 0x04  0100 EL1 */
+    beq     .L__in_el1_cpux              /* EL1 -> 5: */
+ 
+    mrs     x0, hcr_el2
+    bic     x0, x0, #0xff
+    msr     hcr_el2, x0
+
+    msr     sp_el1, x1              /* in EL2, set sp of EL1 to _start */
+
+    /* enable CNTP for EL1 */
+    mrs     x0, cnthctl_el2         /* Counter-timer Hypervisor Control register */
+    orr     x0, x0, #3
+    msr     cnthctl_el2, x0
+    msr     cntvoff_el2, xzr
+
+    /* enable AArch64 in EL1 */
+    mov     x0, #(1 << 31)          /* AArch64 */
+    orr     x0, x0, #(1 << 1)       /* SWIO hardwired on Pi3 */
+    msr     hcr_el2, x0
+    mrs     x0, hcr_el2
+
+    /* change execution level to EL1 */
+    mov     x2, #0x3c4
+    msr     spsr_el2, x2            /* 1111000100 */
+    adr     x2, .L__in_el1_cpux
+    msr     elr_el2, x2
+
+    eret                            /* exception return. from EL2. continue from .L__in_el1 */
+
+.L__in_el1_cpux:
+    adr     x19, .L__in_el1_cpux
+    ldr     x8, =.L__in_el1_cpux
+    sub     x19, x19, x8            /* get PV_OFFSET            */
+
+    mrs     x0, tpidr_el1
+    /* each cpu init stack is 8k */
+    sub     x1, x1, x0, lsl #13
+    mov     sp, x1                  /* in EL1. Set sp to _start */
+
+    /* Set CPACR_EL1 (Architecture Feature Access Control Register) to avoid trap from SIMD or float point instruction */
+    mov     x1, #0x00300000         /* Don't trap any SIMD/FP instructions in both EL0 and EL1 */
+    msr     cpacr_el1, x1
+
+.L__jump_to_entry_cpux:                   /* jump to C code, should not return */
+
+    /* init mmu early */
+
+    bl mmu_tcr_init
+
+    adr x1, __start
+    ldr x0, =~0x1fffff
+    and x0, x1, x0
+    add x1, x0, #0x1000
+
+    msr ttbr0_el1, x0
+    msr ttbr1_el1, x1
+    dsb sy
+
+    ldr x30, =after_mmu_enable_cpux  /* set LR to after_mmu_enable function, it's a v_addr */
+
+    mrs x1, sctlr_el1
+    bic x1, x1, #(3 << 3)    /* dis SA, SA0 */
+    bic x1, x1, #(1 << 1)    /* dis A */
+    orr x1, x1, #(1 << 12)   /* I */
+    orr x1, x1, #(1 << 2)    /* C */
+    orr x1, x1, #(1 << 0)    /* M */
+    msr sctlr_el1, x1        /* enable MMU */
+
+    dsb sy
+    isb sy
+    ic ialluis               /* Invalidate all instruction caches in Inner Shareable domain to Point of Unification */
+    dsb sy
+    isb sy
+    tlbi vmalle1             /* Invalidate all stage 1 translations used at EL1 with the current VMID */
+    dsb sy
+    isb sy
+    ret
+
+after_mmu_enable_cpux:
+    mrs x0, tcr_el1          /* disable ttbr0, only using kernel space */
+    orr x0, x0, #(1 << 7)
+    msr tcr_el1, x0
+    msr ttbr0_el1, xzr
+    dsb sy
+
+    mov     x0, #1
+    msr     spsel, x0
+    mrs     x0, tpidr_el1
+    /* each cpu init stack is 8k */
+    adr     x1, __start
+    sub     x1, x1, x0, lsl #13
+    mov     sp, x1                  /* in EL1. Set sp to _start */
+
+    b rt_hw_secondary_cpu_bsp_start
+#endif