Browse Source

[libcpu] add c-sky ck802 support

liang yongxiang 7 years ago
parent
commit
7785dc5d01

+ 152 - 0
libcpu/c-sky/ck802/contex_ck802_gcc.S

@@ -0,0 +1,152 @@
+/*
+ * File      : contex_ck802.S
+ * This file is part of RT-Thread RTOS
+ * COPYRIGHT (C) 2006 - 2017, RT-Thread Development Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Change Logs:
+ * Date           Author       Notes
+ * 2017-01-01     Urey      first version
+ * 2018-06-05     tanek     clean code
+ */
+
+.file     "contex_ck802.S"
+
+#undef  VIC_TSPDR
+#define VIC_TSPDR 0XE000EC08
+
+.global  rt_thread_switch_interrupt_flag
+.global  rt_interrupt_from_thread
+.global  rt_interrupt_to_thread
+
+.text
+.align 2
+
+/*
+ * rt_base_t rt_hw_interrupt_disable(void);
+ */
+.global rt_hw_interrupt_disable
+.type   rt_hw_interrupt_disable, %function
+rt_hw_interrupt_disable:
+mfcr    r0, psr
+psrclr  ie
+rts
+
+/*
+ * void rt_hw_interrupt_enable(rt_base_t psr);
+ */
+.global rt_hw_interrupt_enable
+.type   rt_hw_interrupt_enable, %function
+rt_hw_interrupt_enable:
+mtcr    r0, psr
+rts
+
+/*
+ * void rt_hw_context_switch_to(rt_uint32 to);
+ * R0 --> to
+ */
+.global rt_hw_context_switch_to
+.type rt_hw_context_switch_to, %function
+rt_hw_context_switch_to:
+lrw     r2, rt_interrupt_to_thread
+stw     r0, (r2)
+
+/* set form thread = 0 */
+lrw     r2, rt_interrupt_from_thread
+movi    r0, 0
+stw     r0, (r2)
+
+psrclr  ie
+jbr     __tspend_handler_nosave
+
+/*
+ * void rt_hw_context_switch_interrupt(rt_uint32 from, rt_uint32 to);
+ * r0 --> from
+ * r1 --> to
+ */
+.global rt_hw_context_switch_interrupt
+.type   rt_hw_context_switch_interrupt, %function
+rt_hw_context_switch_interrupt:
+lrw    r2, rt_interrupt_from_thread     /* set rt_interrupt_from_thread */
+stw    r0, (r2)
+
+lrw    r2, rt_interrupt_to_thread       /* set rt_interrupt_to_thread */
+stw    r1, (r2)
+
+lrw    r0, VIC_TSPDR
+bgeni  r1, 0
+stw    r1, (r0)
+rts
+
+/*
+ * void rt_hw_context_switch(rt_uint32 from, rt_uint32 to)
+ * r0 --> from
+ * r1 --> to
+ */
+.global rt_hw_context_switch
+.type   rt_hw_context_switch, %function
+rt_hw_context_switch:
+lrw    r2, rt_interrupt_from_thread     /* set rt_interrupt_from_thread */
+stw    r0, (r2)
+
+lrw    r2, rt_interrupt_to_thread       /* set rt_interrupt_to_thread */
+stw    r1, (r2)
+
+lrw    r0, VIC_TSPDR
+bgeni  r1, 0
+stw    r1, (r0)
+rts
+
+.global PendSV_Handler
+.type   PendSV_Handler, %function
+PendSV_Handler:
+subi    sp, 68
+stm     r0-r13, (sp)
+stw     r15, (sp, 56)
+mfcr    r0, epsr
+stw     r0, (sp, 60)
+mfcr    r0, epc
+stw     r0, (sp, 64)
+
+lrw     r0, rt_interrupt_from_thread
+ldw     r1, (r0)
+stw     sp, (r1)
+
+__tspend_handler_nosave:
+lrw     r6, rt_interrupt_to_thread
+lrw     r7, rt_interrupt_from_thread
+ldw     r8, (r6)
+stw     r8, (r7)
+
+ldw     sp, (r8)
+
+#ifdef CONFIG_STACK_GUARD
+mfcr    r3, cr<0, 4>
+bseti   r3, 0
+bseti   r3, 1
+mtcr    r3, cr<0, 4>
+#endif
+
+ldw     r0, (sp, 64)
+mtcr    r0, epc
+ldw     r0, (sp, 60)
+bseti   r0, 6
+mtcr    r0, epsr
+ldw     r15, (sp, 56)
+ldm     r0-r13, (sp)
+addi    sp, 68
+rte
+

+ 337 - 0
libcpu/c-sky/ck802/core_ck802.c

@@ -0,0 +1,337 @@
+/*
+ * File      : core_ck802.c
+ * This file is part of RT-Thread RTOS
+ * COPYRIGHT (C) 2006 - 2017, RT-Thread Development Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Change Logs:
+ * Date           Author       Notes
+ * 2017-01-01     Urey         first version
+ */
+
+#include <rthw.h>
+#include <rtthread.h>
+
+#include <stdint.h>
+#include <core_ck802.h>
+
+/* flag in interrupt handling */
+rt_uint32_t rt_interrupt_from_thread, rt_interrupt_to_thread;
+rt_uint32_t rt_thread_switch_interrupt_flag;
+
+/*******************************************************************************
+ *                Hardware Abstraction Layer
+  Core Function Interface contains:
+  - Core VIC Functions
+  - Core CORET Functions
+  - Core Register Access Functions
+ ******************************************************************************/
+/**
+  \defgroup CSI_Core_FunctionInterface Functions and Instructions Reference
+*/
+
+/* ##########################   NVIC functions  #################################### */
+/**
+  \ingroup  CSI_Core_FunctionInterface
+  \defgroup CSI_Core_NVICFunctions NVIC Functions
+  \brief    Functions that manage interrupts and exceptions via the NVIC.
+  @{
+ */
+
+/* Interrupt Priorities are WORD accessible only under CSKYv6M                   */
+/* The following MACROS handle generation of the register offset and byte masks */
+#define _BIT_SHIFT(IRQn)         (  ((((uint32_t)(int32_t)(IRQn))         )      &  0x03UL) * 8UL)
+#define _IP_IDX(IRQn)            (   (((uint32_t)(int32_t)(IRQn))                >>    2UL)      )
+
+static uint32_t s_nvic_prio_bits = __NVIC_PRIO_BITS;
+
+/**
+  \brief   initialize the NVIC interrupt controller
+  \param [in]      prio_bits  the priority bits of NVIC interrupt controller.
+ */
+void drv_nvic_init(uint32_t prio_bits)
+{
+    if (s_nvic_prio_bits >= 8U)
+    {
+        return;
+    }
+
+    s_nvic_prio_bits = prio_bits;
+}
+
+/**
+  \brief   Enable External Interrupt
+  \details Enables a device-specific interrupt in the NVIC interrupt controller.
+  \param [in]      IRQn  External interrupt number. Value cannot be negative.
+ */
+void drv_nvic_enable_irq(int32_t IRQn)
+{
+    NVIC->ISER[0U] = (uint32_t)(1UL << (((uint32_t)(int32_t)IRQn) & 0x1FUL));
+#ifdef CONFIG_SYSTEM_SECURE
+    NVIC->ISSR[0U] = (uint32_t)(1UL << (((uint32_t)(int32_t)IRQn) & 0x1FUL));
+#endif
+}
+
+/**
+  \brief   Disable External Interrupt
+  \details Disables a device-specific interrupt in the NVIC interrupt controller.
+  \param [in]      IRQn  External interrupt number. Value cannot be negative.
+ */
+void drv_nvic_disable_irq(int32_t IRQn)
+{
+    NVIC->ICER[0U] = (uint32_t)(1UL << (((uint32_t)(int32_t)IRQn) & 0x1FUL));
+}
+
+/**
+  \brief   Enable External Secure Interrupt
+  \details Enables a secure device-specific interrupt in the NVIC interrupt controller.
+  \param [in]      IRQn  External interrupt number. Value cannot be negative.
+ */
+void drv_nvic_enable_sirq(int32_t IRQn)
+{
+    NVIC->ISSR[0U] = (uint32_t)(1UL << (((uint32_t)(int32_t)IRQn) & 0x1FUL));
+}
+
+/**
+  \brief   Get Pending Interrupt
+  \details Reads the pending register in the NVIC and returns the pending bit for the specified interrupt.
+  \param [in]      IRQn  Interrupt number.
+  \return             0  Interrupt status is not pending.
+  \return             1  Interrupt status is pending.
+ */
+uint32_t drv_nvic_get_pending_irq(int32_t IRQn)
+{
+    return ((uint32_t)(((NVIC->ISPR[0U] & (1UL << (((uint32_t)(int32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+}
+
+/**
+  \brief   Set Pending Interrupt
+  \details Sets the pending bit of an external interrupt.
+  \param [in]      IRQn  Interrupt number. Value cannot be negative.
+ */
+void drv_nvic_set_pending_irq(int32_t IRQn)
+{
+    NVIC->ISPR[0U] = (uint32_t)(1UL << (((uint32_t)(int32_t)IRQn) & 0x1FUL));
+}
+
+/**
+  \brief   Clear Pending Interrupt
+  \details Clears the pending bit of an external interrupt.
+  \param [in]      IRQn  External interrupt number. Value cannot be negative.
+ */
+void drv_nvic_clear_pending_irq(int32_t IRQn)
+{
+    NVIC->ICPR[0U] = (uint32_t)(1UL << (((uint32_t)(int32_t)IRQn) & 0x1FUL));
+}
+
+/**
+  \brief   Get Wake up Interrupt
+  \details Reads the wake up register in the NVIC and returns the pending bit for the specified interrupt.
+  \param [in]      IRQn  Interrupt number.
+  \return             0  Interrupt is not set as wake up interrupt.
+  \return             1  Interrupt is set as wake up interrupt.
+ */
+uint32_t drv_nvic_get_wakeup_irq(int32_t IRQn)
+{
+    return ((uint32_t)(((NVIC->IWER[0U] & (1UL << (((uint32_t)(int32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+}
+
+/**
+  \brief   Set Wake up Interrupt
+  \details Sets the wake up bit of an external interrupt.
+  \param [in]      IRQn  Interrupt number. Value cannot be negative.
+ */
+void drv_nvic_set_wakeup_irq(int32_t IRQn)
+{
+    NVIC->IWER[0U] = (uint32_t)(1UL << (((uint32_t)(int32_t)IRQn) & 0x1FUL));
+}
+
+/**
+  \brief   Clear Wake up Interrupt
+  \details Clears the wake up bit of an external interrupt.
+  \param [in]      IRQn  External interrupt number. Value cannot be negative.
+ */
+void drv_nvic_clear_wakeup_irq(int32_t IRQn)
+{
+    NVIC->IWDR[0U] = (uint32_t)(1UL << (((uint32_t)(int32_t)IRQn) & 0x1FUL));
+}
+
+/**
+  \brief   Get Active Interrupt
+  \details Reads the active register in the NVIC and returns the active bit for the device specific interrupt.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  Interrupt status is not active.
+  \return             1  Interrupt status is active.
+  \note    IRQn must not be negative.
+ */
+uint32_t drv_nvic_get_active(int32_t IRQn)
+{
+    return ((uint32_t)(((NVIC->IABR[0] & (1UL << (((uint32_t)(int32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+}
+
+/**
+  \brief   Set Threshold register
+  \details set the threshold register in the NVIC.
+  \param [in]      VectThreshold  specific vecter threshold.
+  \param [in]      PrioThreshold  specific priority threshold.
+ */
+void drv_nvic_set_threshold(uint32_t VectThreshold, uint32_t PrioThreshold)
+{
+    NVIC->IPTR = 0x80000000 | (((VectThreshold + 32) & 0xFF) << 8) | ((PrioThreshold & 0x3) << 6);
+}
+
+/**
+  \brief   Set Interrupt Priority
+  \details Sets the priority of an interrupt.
+  \note    The priority cannot be set for every core interrupt.
+  \param [in]      IRQn  Interrupt number.
+  \param [in]  priority  Priority to set.
+ */
+void drv_nvic_set_prio(int32_t IRQn, uint32_t priority)
+{
+    NVIC->IPR[_IP_IDX(IRQn)]  = ((uint32_t)(NVIC->IPR[_IP_IDX(IRQn)]  & ~(0xFFUL << _BIT_SHIFT(IRQn))) |
+                                 (((priority << (8U - s_nvic_prio_bits)) & (uint32_t)0xFFUL) << _BIT_SHIFT(IRQn)));
+}
+
+/**
+  \brief   Get Interrupt Priority
+  \details Reads the priority of an interrupt.
+           The interrupt number can be positive to specify an external (device specific) interrupt,
+           or negative to specify an internal (core) interrupt.
+  \param [in]   IRQn  Interrupt number.
+  \return             Interrupt Priority.
+                      Value is aligned automatically to the implemented priority bits of the microcontroller.
+ */
+uint32_t drv_nvic_get_prio(int32_t IRQn)
+{
+    return ((uint32_t)(((NVIC->IPR[ _IP_IDX(IRQn)] >> _BIT_SHIFT(IRQn)) & (uint32_t)0xFFUL) >> (8U - s_nvic_prio_bits)));
+}
+
+
+/*@} end of CSI_Core_NVICFunctions */
+
+/* ##################################    SysTick function  ############################################ */
+/**
+  \ingroup  CSI_Core_FunctionInterface
+  \defgroup CSI_Core_SysTickFunctions SysTick Functions
+  \brief    Functions that configure the System.
+  @{
+ */
+
+
+/**
+  \brief   CORE timer Configuration
+  \details Initializes the System Timer and its interrupt, and starts the System Tick Timer.
+           Counter is in free running mode to generate periodic interrupts.
+  \param [in]  ticks  Number of ticks between two interrupts.
+  \param [in]  IRQn   core timer Interrupt number.
+  \return          0  Function succeeded.
+  \return          1  Function failed.
+  \note    When the variable <b>__Vendor_SysTickConfig</b> is set to 1, then the
+           function <b>SysTick_Config</b> is not included. In this case, the file <b><i>device</i>.h</b>
+           must contain a vendor-specific implementation of this function.
+ */
+uint32_t drv_coret_config(uint32_t ticks, int32_t IRQn)
+{
+    if ((ticks - 1UL) > CORET_LOAD_RELOAD_Msk)
+    {
+        return (1UL);                                                   /* Reload value impossible */
+    }
+
+    CORET->LOAD  = (uint32_t)(ticks - 1UL);                           /* set reload register */
+    drv_nvic_set_prio(IRQn, (1UL << s_nvic_prio_bits) - 1UL);         /* set Priority for Systick Interrupt */
+    CORET->VAL   = 0UL;                                               /* Load the CORET Counter Value */
+    CORET->CTRL  = CORET_CTRL_CLKSOURCE_Msk |
+                   CORET_CTRL_TICKINT_Msk |
+                   CORET_CTRL_ENABLE_Msk;                           /* Enable CORET IRQ and CORET Timer */
+    return (0UL);                                                     /* Function successful */
+}
+
+/**
+  \brief   get CORE timer reload value
+  \return          CORE timer counter value.
+ */
+uint32_t drv_coret_get_load(void)
+{
+    return CORET->LOAD;
+}
+
+/**
+  \brief   get CORE timer counter value
+  \return          CORE timer counter value.
+ */
+uint32_t drv_coret_get_value(void)
+{
+    return CORET->VAL;
+}
+
+/*@} end of CSI_Core_SysTickFunctions */
+
+#if 0
+/* ##################################### DCC function ########################################### */
+/**
+  \ingroup  CSI_Core_FunctionInterface
+  \defgroup CSI_core_DebugFunctions HAD Functions
+  \brief    Functions that access the HAD debug interface.
+  @{
+ */
+
+/**
+  \brief   HAD Send Character
+  \details Transmits a character via the HAD channel 0, and
+           \li Just returns when no debugger is connected that has booked the output.
+           \li Is blocking when a debugger is connected, but the previous character sent has not been transmitted.
+  \param [in]     ch  Character to transmit.
+  \returns            Character to transmit.
+ */
+uint32_t HAD_SendChar(uint32_t ch)
+{
+    DCC->DERJR = (uint8_t)ch;
+
+    return (ch);
+}
+
+
+/**
+  \brief   HAD Receive Character
+  \details Inputs a character via the external variable \ref HAD_RxBuffer.
+  \return             Received character.
+  \return         -1  No character pending.
+ */
+int32_t HAD_ReceiveChar(void)
+{
+    int32_t ch = -1;                           /* no character available */
+
+    if (_FLD2VAL(DCC_EHSR_JW, DCC->EHSR))
+    {
+        ch = DCC->DERJW;
+    }
+
+    return (ch);
+}
+
+/**
+  \brief   HAD Check Character
+  \details Checks whether a character is pending for reading in the variable \ref HAD_RxBuffer.
+  \return          0  No character available.
+  \return          1  Character available.
+ */
+int32_t HAD_CheckChar(void)
+{
+    return _FLD2VAL(DCC_EHSR_JW, DCC->EHSR);                              /* no character available */
+}
+
+#endif

+ 552 - 0
libcpu/c-sky/ck802/core_ck802.h

@@ -0,0 +1,552 @@
+/*
+ * File      : core_ck802.h
+ * This file is part of RT-Thread RTOS
+ * COPYRIGHT (C) 2006 - 2017, RT-Thread Development Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Change Logs:
+ * Date           Author       Notes
+ * 2017-01-01     Urey      first version
+ */
+
+#ifndef __CORE_CK802_H_GENERIC
+#define __CORE_CK802_H_GENERIC
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*******************************************************************************
+ *                 CSI definitions
+ ******************************************************************************/
+/**
+  \ingroup Ck802
+  @{
+ */
+
+/*  CSI CK802 definitions */
+#define __CK802_CSI_VERSION_MAIN  (0x04U)                                      /*!< [31:16] CSI HAL main version */
+#define __CK802_CSI_VERSION_SUB   (0x1EU)                                      /*!< [15:0]  CSI HAL sub version */
+#define __CK802_CSI_VERSION       ((__CK802_CSI_VERSION_MAIN << 16U) | \
+                                   __CK802_CSI_VERSION_SUB           )        /*!< CSI HAL version number */
+
+#define __CK80X                (0x02U)                                         /*!< CK80X Core */
+
+/** __FPU_USED indicates whether an FPU is used or not.
+    This core does not support an FPU at all
+*/
+#define __FPU_USED       0U
+
+#if defined ( __GNUC__ )
+#if defined (__VFP_FP__) && !defined(__SOFTFP__)
+#error "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)"
+#endif
+#endif
+
+#include "csi_gcc.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __CORE_CK802_H_GENERIC */
+
+#ifndef __CSI_GENERIC
+
+#ifndef __CORE_CK802_H_DEPENDANT
+#define __CORE_CK802_H_DEPENDANT
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* check device defines and use defaults */
+//#if defined __CHECK_DEVICE_DEFINES
+#ifndef __CK802_REV
+#define __CK802_REV               0x0000U
+//#warning "__CK802_REV not defined in device header file; using default!"
+#endif
+
+#ifndef __NVIC_PRIO_BITS
+#define __NVIC_PRIO_BITS          2U
+//#warning "__NVIC_PRIO_BITS not defined in device header file; using default!"
+#endif
+
+#ifndef __Vendor_SysTickConfig
+#define __Vendor_SysTickConfig    0U
+//#warning "__Vendor_SysTickConfig not defined in device header file; using default!"
+#endif
+
+#ifndef __GSR_GCR_PRESENT
+#define __GSR_GCR_PRESENT         0U
+//#warning "__GSR_GCR_PRESENT not defined in device header file; using default!"
+#endif
+
+#ifndef __MGU_PRESENT
+#define __MGU_PRESENT             0U
+//#warning "__MGU_PRESENT not defined in device header file; using default!"
+#endif
+//#endif
+
+/* IO definitions (access restrictions to peripheral registers) */
+/**
+    \defgroup CSI_glob_defs CSI Global Defines
+
+    <strong>IO Type Qualifiers</strong> are used
+    \li to specify the access to peripheral variables.
+    \li for automatic generation of peripheral register debug information.
+*/
+#ifdef __cplusplus
+#define   __I     volatile             /*!< Defines 'read only' permissions */
+#else
+#define   __I     volatile const       /*!< Defines 'read only' permissions */
+#endif
+#define     __O     volatile             /*!< Defines 'write only' permissions */
+#define     __IO    volatile             /*!< Defines 'read / write' permissions */
+
+/* following defines should be used for structure members */
+#define     __IM     volatile const      /*! Defines 'read only' structure member permissions */
+#define     __OM     volatile            /*! Defines 'write only' structure member permissions */
+#define     __IOM    volatile            /*! Defines 'read / write' structure member permissions */
+
+/*@} end of group CK802 */
+
+/*******************************************************************************
+ *                 Register Abstraction
+  Core Register contain:
+  - Core Register
+  - Core NVIC Register
+  - Core SCB Register
+  - Core SysTick Register
+ ******************************************************************************/
+/**
+  \defgroup CSI_core_register Defines and Type Definitions
+  \brief Type definitions and defines for CK80X processor based devices.
+*/
+
+/**
+  \ingroup    CSI_core_register
+  \defgroup   CSI_CORE  Status and Control Registers
+  \brief      Core Register type definitions.
+  @{
+ */
+
+/**
+  \brief  访问处理器状态寄存器(PSR)的联合体定义.
+ */
+typedef union
+{
+    struct
+    {
+        uint32_t C: 1;                       /*!< bit:      0  条件码/进位位 */
+        uint32_t _reserved0: 5;              /*!< bit:  2.. 5  保留 */
+        uint32_t IE: 1;                      /*!< bit:      6  中断有效控制位 */
+        uint32_t IC: 1;                      /*!< bit:      7  中断控制位 */
+        uint32_t EE: 1;                      /*!< bit:      8  异常有效控制位 */
+        uint32_t MM: 1;                      /*!< bit:      9  不对齐异常掩盖位 */
+        uint32_t _reserved1: 6;              /*!< bit: 10..15  保留 */
+        uint32_t VEC: 8;                     /*!< bit: 16..23  异常事件向量值 */
+        uint32_t _reserved2: 7;              /*!< bit: 24..30  保留 */
+        uint32_t S: 1;                       /*!< bit:     31  超级用户模式设置位 */
+    } b;                                   /*!< Structure    用来按位访问 */
+    uint32_t w;                            /*!< Type         整个寄存器访问 */
+} PSR_Type;
+
+/* PSR Register Definitions */
+#define PSR_S_Pos                          31U                                            /*!< PSR: S Position */
+#define PSR_S_Msk                          (1UL << PSR_S_Pos)                             /*!< PSR: S Mask */
+
+#define PSR_VEC_Pos                        16U                                            /*!< PSR: VEC Position */
+#define PSR_VEC_Msk                        (0x7FUL << PSR_VEC_Pos)                        /*!< PSR: VEC Mask */
+
+#define PSR_MM_Pos                         9U                                             /*!< PSR: MM Position */
+#define PSR_MM_Msk                         (1UL << PSR_MM_Pos)                            /*!< PSR: MM Mask */
+
+#define PSR_EE_Pos                         8U                                             /*!< PSR: EE Position */
+#define PSR_EE_Msk                         (1UL << PSR_EE_Pos)                            /*!< PSR: EE Mask */
+
+#define PSR_IC_Pos                         7U                                             /*!< PSR: IC Position */
+#define PSR_IC_Msk                         (1UL << PSR_IC_Pos)                            /*!< PSR: IC Mask */
+
+#define PSR_IE_Pos                         6U                                             /*!< PSR: IE Position */
+#define PSR_IE_Msk                         (1UL << PSR_IE_Pos)                            /*!< PSR: IE Mask */
+
+#define PSR_C_Pos                          0U                                             /*!< PSR: C Position */
+#define PSR_C_Msk                          (1UL << PSR_C_Pos)                             /*!< PSR: C Mask */
+
+/**
+  \brief  访问高速缓存配置寄存器(CCR, CR<18, 0>)的联合体定义.
+ */
+typedef union
+{
+    struct
+    {
+        uint32_t MP: 1;                      /*!< bit:      0  内存保护设置位 */
+        uint32_t _reserved0: 6;              /*!< bit:  1.. 6  保留 */
+        uint32_t BE: 1;                      /*!< bit:      7  Endian模式 */
+        uint32_t SCK: 3;                     /*!< bit:  8..10  系统和处理器的时钟比 */
+        uint32_t _reserved1: 2;              /*!< bit: 11..12  保留 */
+        uint32_t BE_V2: 1;                   /*!< bit:     13  V2版本大小端 */
+        uint32_t _reserved2: 18;             /*!< bit: 14..31  保留 */
+    } b;                                   /*!< Structure    用来按位访问 */
+    uint32_t w;                            /*!< Type         整个寄存器访问 */
+} CCR_Type;
+
+/* CCR Register Definitions */
+#define CCR_BE_V2_Pos                     13U                                            /*!< CCR: BE_V2 Position */
+#define CCR_BE_V2_Msk                     (0x1UL << CCR_ISR_Pos)                         /*!< CCR: BE_V2 Mask */
+
+#define CCR_SCK_Pos                       8U                                             /*!< CCR: SCK Position */
+#define CCR_SCK_Msk                       (0x3UL << CCR_SCK_Pos)                         /*!< CCR: SCK Mask */
+
+#define CCR_BE_Pos                        7U                                             /*!< CCR: BE Position */
+#define CCR_BE_Msk                        (0x1UL << CCR_BE_Pos)                          /*!< CCR: BE Mask */
+
+#define CCR_MP_Pos                        0U                                             /*!< CCR: MP Position */
+#define CCR_MP_Msk                        (0x1UL << CCR_MP_Pos)                          /*!< CCR: MP Mask */
+
+/**
+  \brief  访问可高缓和访问权限配置寄存器(CAPR, CR<19,0>)的联合体定义..
+ */
+typedef union
+{
+    struct
+    {
+        uint32_t X0: 1;                      /*!< bit:      0  不可执行属性设置位 */
+        uint32_t X1: 1;                      /*!< bit:      1  不可执行属性设置位 */
+        uint32_t X2: 1;                      /*!< bit:      2  不可执行属性设置位 */
+        uint32_t X3: 1;                      /*!< bit:      3  不可执行属性设置位 */
+        uint32_t X4: 1;                      /*!< bit:      4  不可执行属性设置位 */
+        uint32_t X5: 1;                      /*!< bit:      5  不可执行属性设置位 */
+        uint32_t X6: 1;                      /*!< bit:      6  不可执行属性设置位 */
+        uint32_t X7: 1;                      /*!< bit:      7  不可执行属性设置位 */
+        uint32_t AP0: 2;                     /*!< bit:  8.. 9  访问权限设置位 */
+        uint32_t AP1: 2;                     /*!< bit: 10..11  访问权限设置位 */
+        uint32_t AP2: 2;                     /*!< bit: 12..13  访问权限设置位 */
+        uint32_t AP3: 2;                     /*!< bit: 14..15  访问权限设置位 */
+        uint32_t AP4: 2;                     /*!< bit: 16..17  访问权限设置位 */
+        uint32_t AP5: 2;                     /*!< bit: 18..19  访问权限设置位 */
+        uint32_t AP6: 2;                     /*!< bit: 20..21  访问权限设置位 */
+        uint32_t AP7: 2;                     /*!< bit: 22..23  访问权限设置位 */
+        uint32_t S0: 1;                      /*!< bit:     24  安全属性设置位 */
+        uint32_t S1: 1;                      /*!< bit:     25  安全属性设置位 */
+        uint32_t S2: 1;                      /*!< bit:     26  安全属性设置位 */
+        uint32_t S3: 1;                      /*!< bit:     27  安全属性设置位 */
+        uint32_t S4: 1;                      /*!< bit:     28  安全属性设置位 */
+        uint32_t S5: 1;                      /*!< bit:     29  安全属性设置位 */
+        uint32_t S6: 1;                      /*!< bit:     30  安全属性设置位 */
+        uint32_t S7: 1;                      /*!< bit:     31  安全属性设置位 */
+    } b;                                   /*!< Structure    用来按位访问 */
+    uint32_t w;                            /*!< Type         整个寄存器访问 */
+} CAPR_Type;
+
+/* CAPR Register Definitions */
+#define CAPR_S7_Pos                        31U                                            /*!< CAPR: S7 Position */
+#define CAPR_S7_Msk                        (1UL << CAPR_S7_Pos)                           /*!< CAPR: S7 Mask */
+
+#define CAPR_S6_Pos                        30U                                            /*!< CAPR: S6 Position */
+#define CAPR_S6_Msk                        (1UL << CAPR_S6_Pos)                           /*!< CAPR: S6 Mask */
+
+#define CAPR_S5_Pos                        29U                                            /*!< CAPR: S5 Position */
+#define CAPR_S5_Msk                        (1UL << CAPR_S5_Pos)                           /*!< CAPR: S5 Mask */
+
+#define CAPR_S4_Pos                        28U                                            /*!< CAPR: S4 Position */
+#define CAPR_S4_Msk                        (1UL << CAPR_S4_Pos)                           /*!< CAPR: S4 Mask */
+
+#define CAPR_S3_Pos                        27U                                            /*!< CAPR: S3 Position */
+#define CAPR_S3_Msk                        (1UL << CAPR_S3_Pos)                           /*!< CAPR: S3 Mask */
+
+#define CAPR_S2_Pos                        26U                                            /*!< CAPR: S2 Position */
+#define CAPR_S2_Msk                        (1UL << CAPR_S2_Pos)                           /*!< CAPR: S2 Mask */
+
+#define CAPR_S1_Pos                        25U                                            /*!< CAPR: S1 Position */
+#define CAPR_S1_Msk                        (1UL << CAPR_S1_Pos)                           /*!< CAPR: S1 Mask */
+
+#define CAPR_S0_Pos                        24U                                            /*!< CAPR: S0 Position */
+#define CAPR_S0_Msk                        (1UL << CAPR_S0_Pos)                           /*!< CAPR: S0 Mask */
+
+#define CAPR_AP7_Pos                       22U                                            /*!< CAPR: AP7 Position */
+#define CAPR_AP7_Msk                       (0x3UL << CAPR_AP7_Pos)                        /*!< CAPR: AP7 Mask */
+
+#define CAPR_AP6_Pos                       20U                                            /*!< CAPR: AP6 Position */
+#define CAPR_AP6_Msk                       (0x3UL << CAPR_AP6_Pos)                        /*!< CAPR: AP6 Mask */
+
+#define CAPR_AP5_Pos                       18U                                            /*!< CAPR: AP5 Position */
+#define CAPR_AP5_Msk                       (0x3UL << CAPR_AP5_Pos)                        /*!< CAPR: AP5 Mask */
+
+#define CAPR_AP4_Pos                       16U                                            /*!< CAPR: AP4 Position */
+#define CAPR_AP4_Msk                       (0x3UL << CAPR_AP4_Pos)                        /*!< CAPR: AP4 Mask */
+
+#define CAPR_AP3_Pos                       14U                                            /*!< CAPR: AP3 Position */
+#define CAPR_AP3_Msk                       (0x3UL << CAPR_AP3_Pos)                        /*!< CAPR: AP3 Mask */
+
+#define CAPR_AP2_Pos                       12U                                            /*!< CAPR: AP2 Position */
+#define CAPR_AP2_Msk                       (0x3UL << CAPR_AP2_Pos)                        /*!< CAPR: AP2 Mask */
+
+#define CAPR_AP1_Pos                       10U                                            /*!< CAPR: AP1 Position */
+#define CAPR_AP1_Msk                       (0x3UL << CAPR_AP1_Pos)                        /*!< CAPR: AP1 Mask */
+
+#define CAPR_AP0_Pos                       8U                                             /*!< CAPR: AP0 Position */
+#define CAPR_AP0_Msk                       (0x3UL << CAPR_AP0_Pos)                        /*!< CAPR: AP0 Mask */
+
+#define CAPR_X7_Pos                        7U                                             /*!< CAPR: X7 Position */
+#define CAPR_X7_Msk                        (0x1UL << CAPR_X7_Pos)                         /*!< CAPR: X7 Mask */
+
+#define CAPR_X6_Pos                        6U                                             /*!< CAPR: X6 Position */
+#define CAPR_X6_Msk                        (0x1UL << CAPR_X6_Pos)                         /*!< CAPR: X6 Mask */
+
+#define CAPR_X5_Pos                        5U                                             /*!< CAPR: X5 Position */
+#define CAPR_X5_Msk                        (0x1UL << CAPR_X5_Pos)                         /*!< CAPR: X5 Mask */
+
+#define CAPR_X4_Pos                        4U                                             /*!< CAPR: X4 Position */
+#define CAPR_X4_Msk                        (0x1UL << CAPR_X4_Pos)                         /*!< CAPR: X4 Mask */
+
+#define CAPR_X3_Pos                        3U                                             /*!< CAPR: X3 Position */
+#define CAPR_X3_Msk                        (0x1UL << CAPR_X3_Pos)                         /*!< CAPR: X3 Mask */
+
+#define CAPR_X2_Pos                        2U                                             /*!< CAPR: X2 Position */
+#define CAPR_X2_Msk                        (0x1UL << CAPR_X2_Pos)                         /*!< CAPR: X2 Mask */
+
+#define CAPR_X1_Pos                        1U                                             /*!< CAPR: X1 Position */
+#define CAPR_X1_Msk                        (0x1UL << CAPR_X1_Pos)                         /*!< CAPR: X1 Mask */
+
+#define CAPR_X0_Pos                        0U                                             /*!< CAPR: X0 Position */
+#define CAPR_X0_Msk                        (0x1UL << CAPR_X0_Pos)                         /*!< CAPR: X0 Mask */
+
+/**
+  \brief  访问保护区控制寄存器(PACR, CR<20,0>)的联合体定义.
+ */
+typedef union
+{
+    struct
+    {
+        uint32_t E: 1;                       /*!< bit:      0  保护区有效设置 */
+        uint32_t Size: 5;                    /*!< bit:  1.. 5  保护区大小 */
+        uint32_t _reserved0: 4;              /*!< bit:  6.. 9  保留 */
+        uint32_t base_addr: 22;              /*!< bit: 10..31  保护区地址的高位 */
+    } b;                                   /*!< Structure    用来按位访问 */
+    uint32_t w;                            /*!< Type         整个寄存器访问 */
+} PACR_Type;
+
+/* PACR Register Definitions */
+#define PACR_BASE_ADDR_Pos                 10U                                            /*!< PACR: base_addr Position */
+#define PACK_BASE_ADDR_Msk                 (0x3FFFFFUL << PACR_BASE_ADDR_Pos)             /*!< PACR: base_addr Mask */
+
+#define PACR_SIZE_Pos                      1U                                             /*!< PACR: Size Position */
+#define PACK_SIZE_Msk                      (0x1FUL << PACR_SIZE_Pos)                      /*!< PACR: Size Mask */
+
+#define PACR_E_Pos                         0U                                             /*!< PACR: E Position */
+#define PACK_E_Msk                         (0x1UL << PACR_E_Pos)                          /*!< PACR: E Mask */
+
+/**
+  \brief  访问保护区选择寄存器(PRSR,CR<21,0>)的联合体定义.
+ */
+typedef union
+{
+    struct
+    {
+        uint32_t RID: 3;                     /*!< bit:  0.. 2  保护区索引值 */
+        uint32_t _reserved0: 30;             /*!< bit:  3..31  保留 */
+    } b;                                   /*!< Structure    用来按位访问 */
+    uint32_t w;                            /*!< Type         整个寄存器访问 */
+} PRSR_Type;
+
+/* PRSR Register Definitions */
+#define PRSR_RID_Pos                       0U                                            /*!< PRSR: RID Position */
+#define PRSR_RID_Msk                       (0x7UL << PRSR_RID_Pos)                       /*!< PRSR: RID Mask */
+
+/*@} end of group CSI_CORE */
+
+
+/**
+  \ingroup    CSI_core_register
+  \defgroup   CSI_NVIC Vectored Interrupt Controller (NVIC)
+  \brief      Type definitions for the NVIC Registers
+  @{
+ */
+
+/**
+  \brief 访问矢量中断控制器的结构体.
+ */
+typedef struct
+{
+    __IOM uint32_t ISER[1U];               /*!< Offset: 0x000 (R/W)  中断使能设置寄存器 */
+    uint32_t RESERVED0[15U];
+    __IOM uint32_t IWER[1U];               /*!< Offset: 0x040 (R/W)  中断低功耗唤醒设置寄存器 */
+    uint32_t RESERVED1[15U];
+    __IOM uint32_t ICER[1U];               /*!< Offset: 0x080 (R/W)  中断使能清除寄存器 */
+    uint32_t RESERVED2[15U];
+    __IOM uint32_t IWDR[1U];               /*!< Offset: 0x0c0 (R/W)  中断低功耗唤醒清除寄存器 */
+    uint32_t RESERVED3[15U];
+    __IOM uint32_t ISPR[1U];               /*!< Offset: 0x100 (R/W)  中断等待设置寄存器 */
+    uint32_t RESERVED4[15U];
+    __IOM uint32_t ISSR[1U];               /*!< Offset: 0x140 (R/W)  安全中断使能设置寄存器 */
+    uint32_t RESERVED5[15U];
+    __IOM uint32_t ICPR[1U];               /*!< Offset: 0x180 (R/W)  中断等待清除寄存器 */
+    uint32_t RESERVED6[31U];
+    __IOM uint32_t IABR[1U];               /*!< Offset: 0x200 (R/W)  中断响应状态寄存器 */
+    uint32_t RESERVED7[63U];
+    __IOM uint32_t IPR[8U];                /*!< Offset: 0x300 (R/W)  中断优先级设置寄存器 */
+    uint32_t RESERVED8[504U];
+    __IM  uint32_t ISR;                    /*!< Offset: 0xB00 (R/ )  中断状态寄存器 */
+    __IOM uint32_t IPTR;                   /*!< Offset: 0xB04 (R/W)  中断优先级阈值寄存器 */
+} NVIC_Type;
+
+/*@} end of group CSI_NVIC */
+
+/**
+  \ingroup  CSI_core_register
+  \defgroup CSI_SysTick     System Tick Timer (CORET)
+  \brief    Type definitions for the System Timer Registers.
+  @{
+ */
+
+/**
+  \brief  访问系统计时器的数据结构.
+ */
+typedef struct
+{
+    __IOM uint32_t CTRL;                   /*!< Offset: 0x000 (R/W)  控制状态寄存器 */
+    __IOM uint32_t LOAD;                   /*!< Offset: 0x004 (R/W)  回填值寄存器 */
+    __IOM uint32_t VAL;                    /*!< Offset: 0x008 (R/W)  当前值寄存器 */
+    __IM  uint32_t CALIB;                  /*!< Offset: 0x00C (R/ )  校准寄存器 */
+} CORET_Type;
+
+/* CORET Control / Status Register Definitions */
+#define CORET_CTRL_COUNTFLAG_Pos           16U                                            /*!< CORET CTRL: COUNTFLAG Position */
+#define CORET_CTRL_COUNTFLAG_Msk           (1UL << CORET_CTRL_COUNTFLAG_Pos)              /*!< CORET CTRL: COUNTFLAG Mask */
+
+#define CORET_CTRL_CLKSOURCE_Pos            2U                                            /*!< CORET CTRL: CLKSOURCE Position */
+#define CORET_CTRL_CLKSOURCE_Msk           (1UL << CORET_CTRL_CLKSOURCE_Pos)              /*!< CORET CTRL: CLKSOURCE Mask */
+
+#define CORET_CTRL_TICKINT_Pos              1U                                            /*!< CORET CTRL: TICKINT Position */
+#define CORET_CTRL_TICKINT_Msk             (1UL << CORET_CTRL_TICKINT_Pos)                /*!< CORET CTRL: TICKINT Mask */
+
+#define CORET_CTRL_ENABLE_Pos               0U                                            /*!< CORET CTRL: ENABLE Position */
+#define CORET_CTRL_ENABLE_Msk              (1UL /*<< CORET_CTRL_ENABLE_Pos*/)             /*!< CORET CTRL: ENABLE Mask */
+
+/* CORET Reload Register Definitions */
+#define CORET_LOAD_RELOAD_Pos               0U                                            /*!< CORET LOAD: RELOAD Position */
+#define CORET_LOAD_RELOAD_Msk              (0xFFFFFFUL /*<< CORET_LOAD_RELOAD_Pos*/)      /*!< CORET LOAD: RELOAD Mask */
+
+/* CORET Current Register Definitions */
+#define CORET_VAL_CURRENT_Pos               0U                                            /*!< CORET VAL: CURRENT Position */
+#define CORET_VAL_CURRENT_Msk              (0xFFFFFFUL /*<< CORET_VAL_CURRENT_Pos*/)      /*!< CORET VAL: CURRENT Mask */
+
+/* CORET Calibration Register Definitions */
+#define CORET_CALIB_NOREF_Pos               31U                                           /*!< CORET CALIB: NOREF Position */
+#define CORET_CALIB_NOREF_Msk              (1UL << CORET_CALIB_NOREF_Pos)                 /*!< CORET CALIB: NOREF Mask */
+
+#define CORET_CALIB_SKEW_Pos                30U                                           /*!< CORET CALIB: SKEW Position */
+#define CORET_CALIB_SKEW_Msk               (1UL << CORET_CALIB_SKEW_Pos)                  /*!< CORET CALIB: SKEW Mask */
+
+#define CORET_CALIB_TENMS_Pos               0U                                            /*!< CORET CALIB: TENMS Position */
+#define CORET_CALIB_TENMS_Msk              (0xFFFFFFUL /*<< CORET_CALIB_TENMS_Pos*/)      /*!< CORET CALIB: TENMS Mask */
+
+/*@} end of group CSI_SysTick */
+
+/**
+  \ingroup  CSI_core_register
+  \defgroup CSI_DCC
+  \brief    Type definitions for the DCC.
+  @{
+ */
+
+/**
+  \brief  访问DCC的数据结构.
+ */
+typedef struct
+{
+    uint32_t RESERVED0[13U];
+    __IOM uint32_t HCR;                    /*!< Offset: 0x034 (R/W) */
+    __IM uint32_t EHSR;                    /*!< Offset: 0x03C (R/ ) */
+    uint32_t RESERVED1[6U];
+    union
+    {
+        __IM uint32_t DERJW;                 /*!< Offset: 0x058 (R/ )  数据交换寄存器 CPU读*/
+        __OM uint32_t DERJR;                 /*!< Offset: 0x058 ( /W)  数据交换寄存器 CPU写*/
+    };
+
+} DCC_Type;
+
+#define DCC_HCR_JW_Pos                   18U                                            /*!< DCC HCR: jw_int_en Position */
+#define DCC_HCR_JW_Msk                   (1UL << DCC_HCR_JW_Pos)                        /*!< DCC HCR: jw_int_en Mask */
+
+#define DCC_HCR_JR_Pos                   19U                                            /*!< DCC HCR: jr_int_en Position */
+#define DCC_HCR_JR_Msk                   (1UL << DCC_HCR_JR_Pos)                        /*!< DCC HCR: jr_int_en Mask */
+
+#define DCC_EHSR_JW_Pos                  1U                                             /*!< DCC EHSR: jw_vld Position */
+#define DCC_EHSR_JW_Msk                  (1UL << DCC_EHSR_JW_Pos)                       /*!< DCC EHSR: jw_vld Mask */
+
+#define DCC_EHSR_JR_Pos                  2U                                             /*!< DCC EHSR: jr_vld Position */
+#define DCC_EHSR_JR_Msk                  (1UL << DCC_EHSR_JR_Pos)                       /*!< DCC EHSR: jr_vld Mask */
+
+/*@} end of group CSI_DCC */
+
+
+/**
+  \ingroup    CSI_core_register
+  \defgroup   CSI_core_bitfield     Core register bit field macros
+  \brief      Macros for use with bit field definitions (xxx_Pos, xxx_Msk).
+  @{
+ */
+
+/**
+  \brief   Mask and shift a bit field value for use in a register bit range.
+  \param[in] field  Name of the register bit field.
+  \param[in] value  Value of the bit field.
+  \return           Masked and shifted value.
+*/
+#define _VAL2FLD(field, value)    ((value << field ## _Pos) & field ## _Msk)
+
+/**
+  \brief     Mask and shift a register value to extract a bit filed value.
+  \param[in] field  Name of the register bit field.
+  \param[in] value  Value of register.
+  \return           Masked and shifted bit field value.
+*/
+#define _FLD2VAL(field, value)    ((value & field ## _Msk) >> field ## _Pos)
+
+/*@} end of group CSI_core_bitfield */
+
+/**
+  \ingroup    CSI_core_register
+  \defgroup   CSI_core_base     Core Definitions
+  \brief      Definitions for base addresses, unions, and structures.
+  @{
+ */
+
+/* Memory mapping of CK802 Hardware */
+#define TCIP_BASE           (0xE000E000UL)                            /*!< Titly Coupled IP Base Address */
+#define CORET_BASE          (TCIP_BASE +  0x0010UL)                   /*!< CORET Base Address */
+#define NVIC_BASE           (TCIP_BASE +  0x0100UL)                   /*!< NVIC Base Address */
+#define DCC_BASE            (0xE0011000UL)                            /*!< DCC Base Address */
+
+#define CORET               ((CORET_Type   *)     CORET_BASE  )       /*!< SysTick configuration struct */
+#define NVIC                ((NVIC_Type    *)     NVIC_BASE   )       /*!< NVIC configuration struct */
+#define DCC                 ((DCC_Type     *)     DCC_BASE    )       /*!< DCC configuration struct */
+
+/*@} */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __CORE_CK802_H_DEPENDANT */
+
+#endif /* __CSI_GENERIC */

+ 55 - 0
libcpu/c-sky/ck802/stack_ck802.c

@@ -0,0 +1,55 @@
+/*
+ * File      : stack_ck802.c
+ * This file is part of RT-Thread RTOS
+ * COPYRIGHT (C) 2006 - 2017, RT-Thread Development Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Change Logs:
+ * Date           Author       Notes
+ * 2017-01-01     Urey      first version
+ */
+
+#include <rtthread.h>
+
+rt_uint8_t *rt_hw_stack_init(void *tentry, void *parameter, rt_uint8_t *stack_addr, void *texit)
+{
+    rt_uint32_t *stk  = (rt_uint32_t *)stack_addr;
+
+    stk  = (rt_uint32_t *)(stack_addr + sizeof(rt_uint32_t));
+    stk  = (rt_uint32_t *)RT_ALIGN_DOWN((rt_uint32_t)stk, 8);
+
+    *(--stk)  = (rt_uint32_t)tentry;            /* Entry Point                                         */
+    *(--stk)  = (rt_uint32_t)0x80000150L;       /* PSR                                                 */
+    *(--stk)  = (rt_uint32_t)texit;             /* R15 (LR) (init value will cause fault if ever used) */
+    *(--stk)  = (rt_uint32_t)0x13131313L;       /* R13                                                 */
+    *(--stk)  = (rt_uint32_t)0x12121212L;       /* R12 */
+    *(--stk)  = (rt_uint32_t)0x11111111L;       /* R11 */
+    *(--stk)  = (rt_uint32_t)0x10101010L;       /* R10 */
+    *(--stk)  = (rt_uint32_t)0x09090909L;       /* R9  */
+    *(--stk)  = (rt_uint32_t)0x08080808L;       /* R8  */
+    *(--stk)  = (rt_uint32_t)0x07070707L;       /* R7  */
+    *(--stk)  = (rt_uint32_t)0x06060606L;       /* R6  */
+    *(--stk)  = (rt_uint32_t)0x05050505L;       /* R5  */
+    *(--stk)  = (rt_uint32_t)0x04040404L;       /* R4  */
+    *(--stk)  = (rt_uint32_t)0x03030303L;       /* R3                                                  */
+    *(--stk)  = (rt_uint32_t)0x02020202L;       /* R2                                                  */
+    *(--stk)  = (rt_uint32_t)0x01010101L;       /* R1                                                  */
+    *(--stk)  = (rt_uint32_t)parameter;         /* R0 : argument                                       */
+
+    /* return task's current stack address */
+    return (rt_uint8_t *)stk;
+}
+

+ 248 - 0
libcpu/c-sky/common/csi_core.h

@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2017 C-SKY Microsystems Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/******************************************************************************
+ * @file     csi_core.h
+ * @brief    CSI Core Layer Header File
+ * @version  V1.0
+ * @date     02. June 2017
+ ******************************************************************************/
+
+#ifndef _CORE_H_
+#define _CORE_H_
+
+#include <stdint.h>
+#include "csi_gcc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* ##################################    NVIC function  ############################################ */
+
+/**
+  \brief   initialize the NVIC interrupt controller
+  \param [in]      prio_bits  the priority bits of NVIC interrupt controller.
+ */
+void drv_nvic_init(uint32_t prio_bits);
+
+/**
+  \brief   Enable External Interrupt
+  \details Enables a device-specific interrupt in the NVIC interrupt controller.
+  \param [in]      irq_num  External interrupt number. Value cannot be negative.
+ */
+void drv_nvic_enable_irq(int32_t irq_num);
+/**
+  \brief   Disable External Interrupt
+  \details Disables a device-specific interrupt in the NVIC interrupt controller.
+  \param [in]      irq_num  External interrupt number. Value cannot be negative.
+ */
+void drv_nvic_disable_irq(int32_t irq_num);
+
+/**
+  \brief   Get Pending Interrupt
+  \details Reads the pending register in the NVIC and returns the pending bit for the specified interrupt.
+  \param [in]      irq_num  Interrupt number.
+  \return             0  Interrupt status is not pending.
+  \return             1  Interrupt status is pending.
+ */
+uint32_t drv_nvic_get_pending_irq(int32_t irq_num);
+
+/**
+  \brief   Set Pending Interrupt
+  \details Sets the pending bit of an external interrupt.
+  \param [in]      irq_num  Interrupt number. Value cannot be negative.
+ */
+void drv_nvic_set_pending_irq(int32_t irq_num);
+
+/**
+  \brief   Clear Pending Interrupt
+  \details Clears the pending bit of an external interrupt.
+  \param [in]      irq_num  External interrupt number. Value cannot be negative.
+ */
+void drv_nvic_clear_pending_irq(int32_t irq_num);
+
+/**
+  \brief   Get Active Interrupt
+  \details Reads the active register in the NVIC and returns the active bit for the device specific interrupt.
+  \param [in]      irq_num  Device specific interrupt number.
+  \return             0  Interrupt status is not active.
+  \return             1  Interrupt status is active.
+  \note    irq_num must not be negative.
+ */
+uint32_t drv_nvic_get_active(int32_t irq_num);
+
+/**
+ \brief   Set Interrupt Priority
+ \details Sets the priority of an interrupt.
+ \note    The priority cannot be set for every core interrupt.
+ \param [in]      irq_num  Interrupt number.
+ \param [in]  priority  Priority to set.
+*/
+void drv_nvic_set_prio(int32_t irq_num, uint32_t priority);
+/**
+  \brief   Get Interrupt Priority
+  \details Reads the priority of an interrupt.
+           The interrupt number can be positive to specify an external (device specific) interrupt,
+           or negative to specify an internal (core) interrupt.
+  \param [in]   irq_num  Interrupt number.
+  \return             Interrupt Priority.
+                      Value is aligned automatically to the implemented priority bits of the microcontroller.
+ */
+uint32_t drv_nvic_get_prio(int32_t irq_num);
+
+/*@} end of CSI_Core_NVICFunctions */
+
+
+/* ##########################  Cache functions  #################################### */
+
+/**
+  \brief   Enable I-Cache
+  \details Turns on I-Cache
+  */
+void drv_icache_enable(void);
+
+/**
+  \brief   Disable I-Cache
+  \details Turns off I-Cache
+  */
+void drv_icache_disable(void);
+
+/**
+  \brief   Invalidate I-Cache
+  \details Invalidates I-Cache
+  */
+void drv_icache_invalid(void);
+
+/**
+  \brief   Enable D-Cache
+  \details Turns on D-Cache
+  \note    I-Cache also turns on.
+  */
+void drv_dcache_enable(void);
+
+/**
+  \brief   Disable D-Cache
+  \details Turns off D-Cache
+  \note    I-Cache also turns off.
+  */
+void drv_dcache_disable(void);
+
+/**
+  \brief   Invalidate D-Cache
+  \details Invalidates D-Cache
+  \note    I-Cache also invalid
+  */
+void drv_dcache_invalid(void);
+
+/**
+  \brief   Clean D-Cache
+  \details Cleans D-Cache
+  \note    I-Cache also cleans
+  */
+void drv_dcache_clean(void);
+
+/**
+  \brief   Clean & Invalidate D-Cache
+  \details Cleans and Invalidates D-Cache
+  \note    I-Cache also flush.
+  */
+void drv_dcache_clean_invalid(void);
+
+
+/**
+  \brief   D-Cache Invalidate by address
+  \details Invalidates D-Cache for the given address
+  \param[in]   addr    address (aligned to 16-byte boundary)
+  \param[in]   dsize   size of memory block (in number of bytes)
+*/
+void drv_dcache_invalid_range(uint32_t *addr, int32_t dsize);
+
+/**
+  \brief   D-Cache Clean by address
+  \details Cleans D-Cache for the given address
+  \param[in]   addr    address (aligned to 16-byte boundary)
+  \param[in]   dsize   size of memory block (in number of bytes)
+*/
+void drv_dcache_clean_range(uint32_t *addr, int32_t dsize);
+
+/**
+  \brief   D-Cache Clean and Invalidate by address
+  \details Cleans and invalidates D_Cache for the given address
+  \param[in]   addr    address (aligned to 16-byte boundary)
+  \param[in]   dsize   size of memory block (in number of bytes)
+*/
+void drv_dcache_clean_invalid_range(uint32_t *addr, int32_t dsize);
+
+/**
+  \brief   setup cacheable range Cache
+  \details setup Cache range
+  */
+void drv_cache_set_range(uint32_t index, uint32_t baseAddr, uint32_t size, uint32_t enable);
+
+/**
+  \brief   Enable cache profile
+  \details Turns on Cache profile
+  */
+void drv_cache_enable_profile(void);
+
+/**
+  \brief   Disable cache profile
+  \details Turns off Cache profile
+  */
+void drv_cache_disable_profile(void);
+/**
+  \brief   Reset cache profile
+  \details Reset Cache profile
+  */
+void drv_cache_reset_profile(void);
+
+/**
+  \brief   cache access times
+  \details Cache access times
+  \note    every 256 access add 1.
+  */
+uint32_t drv_cache_get_access_time(void);
+
+/**
+  \brief   cache miss times
+  \details Cache miss times
+  \note    every 256 miss add 1.
+  */
+uint32_t drv_cache_get_miss_time(void);
+
+/* ##################################    SysTick function  ############################################ */
+
+/**
+  \brief   CORE timer Configuration
+  \details Initializes the System Timer and its interrupt, and starts the System Tick Timer.
+           Counter is in free running mode to generate periodic interrupts.
+  \param [in]  ticks  Number of ticks between two interrupts.
+  \param [in]  irq_num   core timer Interrupt number.
+  \return          0  Function succeeded.
+  \return          1  Function failed.
+  \note    When the variable <b>__Vendor_SysTickConfig</b> is set to 1, then the
+           function <b>SysTick_Config</b> is not included. In this case, the file <b><i>device</i>.h</b>
+           must contain a vendor-specific implementation of this function.
+ */
+uint32_t drv_coret_config(uint32_t ticks, int32_t irq_num);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CORE_H_ */

+ 36 - 0
libcpu/c-sky/common/csi_gcc.h

@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2017 C-SKY Microsystems Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/******************************************************************************
+ * @file     csi_gcc.h
+ * @brief    CSI Header File for GCC.
+ * @version  V1.0
+ * @date     02. June 2017
+ ******************************************************************************/
+
+#ifndef _CSI_GCC_H_
+#define _CSI_GCC_H_
+
+#define __ASM            __asm                                      /*!< asm keyword for GNU Compiler */
+#define __INLINE         inline                                     /*!< inline keyword for GNU Compiler */
+#define __ALWAYS_INLINE  __attribute__((always_inline)) static inline
+
+#include <stdlib.h>
+#include "csi_reg.h"
+#include "csi_instr.h"
+#include "csi_simd.h"
+
+#endif /* _CSI_GCC_H_ */

+ 447 - 0
libcpu/c-sky/common/csi_instr.h

@@ -0,0 +1,447 @@
+/*
+ * Copyright (C) 2017 C-SKY Microsystems Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/******************************************************************************
+ * @file     csi_instr.h
+ * @brief    CSI Header File for instruct.
+ * @version  V1.0
+ * @date     02. June 2017
+ ******************************************************************************/
+
+#ifndef _CSI_INSTR_H_
+#define _CSI_INSTR_H_
+
+
+#define __CSI_GCC_OUT_REG(r) "=r" (r)
+#define __CSI_GCC_USE_REG(r) "r" (r)
+
+/**
+  \brief   No Operation
+  \details No Operation does nothing. This instruction can be used for code alignment purposes.
+ */
+__ALWAYS_INLINE void __NOP(void)
+{
+    __ASM volatile("nop");
+}
+
+
+/**
+  \brief   Wait For Interrupt
+  \details Wait For Interrupt is a hint instruction that suspends execution until one of a number of events occurs.
+ */
+__ALWAYS_INLINE void __WFI(void)
+{
+    __ASM volatile("wait");
+}
+
+/**
+  \brief   Wait For Interrupt
+  \details Wait For Interrupt is a hint instruction that suspends execution until one interrupt occurs.
+ */
+__ALWAYS_INLINE void __WAIT(void)
+{
+    __ASM volatile("wait");
+}
+
+/**
+  \brief   Doze For Interrupt
+  \details Doze For Interrupt is a hint instruction that suspends execution until one interrupt occurs.
+ */
+__ALWAYS_INLINE void __DOZE(void)
+{
+    __ASM volatile("doze");
+}
+
+/**
+  \brief   Stop For Interrupt
+  \details Stop For Interrupt is a hint instruction that suspends execution until one interrupt occurs.
+ */
+__ALWAYS_INLINE void __STOP(void)
+{
+    __ASM volatile("stop");
+}
+
+/**
+  \brief   Instruction Synchronization Barrier
+  \details Instruction Synchronization Barrier flushes the pipeline in the processor,
+           so that all instructions following the ISB are fetched from cache or memory,
+           after the instruction has been completed.
+ */
+__ALWAYS_INLINE void __ISB(void)
+{
+    __ASM volatile("sync"::: "memory");
+}
+
+
+/**
+  \brief   Data Synchronization Barrier
+  \details Acts as a special kind of Data Memory Barrier.
+           It completes when all explicit memory accesses before this instruction complete.
+ */
+__ALWAYS_INLINE void __DSB(void)
+{
+    __ASM volatile("sync"::: "memory");
+}
+
+
+/**
+  \brief   Data Memory Barrier
+  \details Ensures the apparent order of the explicit memory operations before
+           and after the instruction, without ensuring their completion.
+ */
+__ALWAYS_INLINE void __DMB(void)
+{
+    __ASM volatile("sync"::: "memory");
+}
+
+
+/**
+  \brief   Reverse byte order (32 bit)
+  \details Reverses the byte order in integer value.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
+ */
+__ALWAYS_INLINE uint32_t __REV(uint32_t value)
+{
+    return __builtin_bswap32(value);
+}
+
+
+/**
+  \brief   Reverse byte order (16 bit)
+  \details Reverses the byte order in two unsigned short values.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
+ */
+__ALWAYS_INLINE uint32_t __REV16(uint32_t value)
+{
+    uint32_t result;
+#if (__CK80X >= 2)
+    __ASM volatile("revh %0, %1" : __CSI_GCC_OUT_REG(result) : __CSI_GCC_USE_REG(value));
+#else
+    result = ((value & 0xFF000000) >> 8) | ((value & 0x00FF0000) << 8) |
+             ((value & 0x0000FF00) >> 8) | ((value & 0x000000FF) << 8);
+#endif
+    return (result);
+}
+
+
+/**
+  \brief   Reverse byte order in signed short value
+  \details Reverses the byte order in a signed short value with sign extension to integer.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
+ */
+__ALWAYS_INLINE int32_t __REVSH(int32_t value)
+{
+    return (short)(((value & 0xFF00) >> 8) | ((value & 0x00FF) << 8));
+}
+
+
+/**
+  \brief   Rotate Right in unsigned value (32 bit)
+  \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
+  \param [in]    op1  Value to rotate
+  \param [in]    op2  Number of Bits to rotate
+  \return               Rotated value
+ */
+__ALWAYS_INLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
+{
+    return (op1 >> op2) | (op1 << (32U - op2));
+}
+
+
+/**
+  \brief   Breakpoint
+  \details Causes the processor to enter Debug state
+           Debug tools can use this to investigate system state when the instruction at a particular address is reached.
+ */
+__ALWAYS_INLINE void __BKPT()
+{
+    __ASM volatile("bkpt");
+}
+
+/**
+  \brief   Reverse bit order of value
+  \details Reverses the bit order of the given value.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
+ */
+__ALWAYS_INLINE uint32_t __RBIT(uint32_t value)
+{
+    uint32_t result;
+
+#if       (__CK80X >= 0x03U)
+    __ASM volatile("brev %0, %1" : "=r"(result) : "r"(value));
+#else
+    int32_t s = 4 /*sizeof(v)*/ * 8 - 1; /* extra shift needed at end */
+
+    result = value;                      /* r will be reversed bits of v; first get LSB of v */
+
+    for (value >>= 1U; value; value >>= 1U)
+    {
+        result <<= 1U;
+        result |= value & 1U;
+        s--;
+    }
+
+    result <<= s;                        /* shift when v's highest bits are zero */
+#endif
+    return (result);
+}
+
+
+/**
+  \brief   Count leading zeros
+  \details Counts the number of leading zeros of a data value.
+  \param [in]  value  Value to count the leading zeros
+  \return             number of leading zeros in value
+ */
+#define __CLZ             __builtin_clz
+/**
+  \details This function saturates a signed value.
+  \param [in]    x   Value to be saturated
+  \param [in]    y   Bit position to saturate to [1..32]
+  \return            Saturated value.
+ */
+__ALWAYS_INLINE int32_t __SSAT(int32_t x, uint32_t y)
+{
+    int32_t posMax, negMin;
+    uint32_t i;
+
+    posMax = 1;
+
+    for (i = 0; i < (y - 1); i++)
+    {
+        posMax = posMax * 2;
+    }
+
+    if (x > 0)
+    {
+        posMax = (posMax - 1);
+
+        if (x > posMax)
+        {
+            x = posMax;
+        }
+
+//    x &= (posMax * 2 + 1);
+    }
+    else
+    {
+        negMin = -posMax;
+
+        if (x < negMin)
+        {
+            x = negMin;
+        }
+
+//    x &= (posMax * 2 - 1);
+    }
+
+    return (x);
+}
+
+/**
+  \brief   Unsigned Saturate
+  \details Saturates an unsigned value.
+  \param [in]  value  Value to be saturated
+  \param [in]    sat  Bit position to saturate to (0..31)
+  \return             Saturated value
+ */
+__ALWAYS_INLINE uint32_t __USAT(uint32_t value, uint32_t sat)
+{
+    uint32_t result;
+
+    if ((((0xFFFFFFFF >> sat) << sat) & value) != 0)
+    {
+        result = 0xFFFFFFFF >> (32 - sat);
+    }
+    else
+    {
+        result = value;
+    }
+
+    return (result);
+}
+
+/**
+  \brief   Unsigned Saturate for internal use
+  \details Saturates an unsigned value, should not call directly.
+  \param [in]  value  Value to be saturated
+  \param [in]    sat  Bit position to saturate to (0..31)
+  \return             Saturated value
+ */
+__ALWAYS_INLINE uint32_t __IUSAT(uint32_t value, uint32_t sat)
+{
+    uint32_t result;
+
+    if (value & 0x80000000)   /* only overflow set bit-31 */
+    {
+        result = 0;
+    }
+    else if ((((0xFFFFFFFF >> sat) << sat) & value) != 0)
+    {
+        result = 0xFFFFFFFF >> (32 - sat);
+    }
+    else
+    {
+        result = value;
+    }
+
+    return (result);
+}
+
+/**
+  \brief   Rotate Right with Extend
+  \details This function moves each bit of a bitstring right by one bit.
+           The carry input is shifted in at the left end of the bitstring.
+  \note    carry input will always 0.
+  \param [in]    op1  Value to rotate
+  \return               Rotated value
+ */
+__ALWAYS_INLINE uint32_t __RRX(uint32_t op1)
+{
+#if (__CK80X >= 2)
+    uint32_t res = 0;
+    __ASM volatile("bgeni    t0, 31\n\t"
+                   "lsri     %0, 1\n\t"
+                   "movt     %1, t0\n\t"
+                   "or       %1, %1, %0\n\t"
+                   : "=r"(op1), "=r"(res): "0"(op1), "1"(res): "t0");
+    return res;
+#else
+    uint32_t res = 0;
+    __ASM volatile("movi     r7, 0\n\t"
+                   "bseti    r7, 31\n\t"
+                   "lsri     %0, 1\n\t"
+                   "bf       1f\n\t"
+                   "mov     %1, r7\n\t"
+                   "1:\n\t"
+                   "or       %1, %1, %0\n\t"
+                   : "=r"(op1), "=r"(res): "0"(op1), "1"(res): "r7");
+    return res;
+#endif
+}
+
+/**
+  \brief   LDRT Unprivileged (8 bit)
+  \details Executes a Unprivileged LDRT instruction for 8 bit value.
+  \param [in]    addr  Pointer to location
+  \return             value of type uint8_t at (*ptr)
+ */
+__ALWAYS_INLINE uint8_t __LDRBT(volatile uint8_t *addr)
+{
+    uint32_t result;
+//#warning "__LDRBT"
+    __ASM volatile("ldb %0, (%1, 0)" : "=r"(result) : "r"(addr));
+    return ((uint8_t) result);    /* Add explicit type cast here */
+}
+
+
+/**
+  \brief   LDRT Unprivileged (16 bit)
+  \details Executes a Unprivileged LDRT instruction for 16 bit values.
+  \param [in]    addr  Pointer to location
+  \return        value of type uint16_t at (*ptr)
+ */
+__ALWAYS_INLINE uint16_t __LDRHT(volatile uint16_t *addr)
+{
+    uint32_t result;
+
+//#warning "__LDRHT"
+    __ASM volatile("ldh %0, (%1, 0)" : "=r"(result) : "r"(addr));
+    return ((uint16_t) result);    /* Add explicit type cast here */
+}
+
+
+/**
+  \brief   LDRT Unprivileged (32 bit)
+  \details Executes a Unprivileged LDRT instruction for 32 bit values.
+  \param [in]    addr  Pointer to location
+  \return        value of type uint32_t at (*ptr)
+ */
+__ALWAYS_INLINE uint32_t __LDRT(volatile uint32_t *addr)
+{
+    uint32_t result;
+
+//#warning "__LDRT"
+    __ASM volatile("ldw %0, (%1, 0)" : "=r"(result) : "r"(addr));
+    return (result);
+}
+
+
+/**
+  \brief   STRT Unprivileged (8 bit)
+  \details Executes a Unprivileged STRT instruction for 8 bit values.
+  \param [in]  value  Value to store
+  \param [in]    addr  Pointer to location
+ */
+__ALWAYS_INLINE void __STRBT(uint8_t value, volatile uint8_t *addr)
+{
+//#warning "__STRBT"
+    __ASM volatile("stb %1, (%0, 0)" :: "r"(addr), "r"((uint32_t)value) : "memory");
+}
+
+
+/**
+  \brief   STRT Unprivileged (16 bit)
+  \details Executes a Unprivileged STRT instruction for 16 bit values.
+  \param [in]  value  Value to store
+  \param [in]    addr  Pointer to location
+ */
+__ALWAYS_INLINE void __STRHT(uint16_t value, volatile uint16_t *addr)
+{
+//#warning "__STRHT"
+    __ASM volatile("sth %1, (%0, 0)" :: "r"(addr), "r"((uint32_t)value) : "memory");
+}
+
+
+/**
+  \brief   STRT Unprivileged (32 bit)
+  \details Executes a Unprivileged STRT instruction for 32 bit values.
+  \param [in]  value  Value to store
+  \param [in]    addr  Pointer to location
+ */
+__ALWAYS_INLINE void __STRT(uint32_t value, volatile uint32_t *addr)
+{
+//#warning "__STRT"
+    __ASM volatile("stw %1, (%0, 0)" :: "r"(addr), "r"(value) : "memory");
+}
+
+/*@}*/ /* end of group CSI_Core_InstructionInterface */
+
+
+/* ##########################  FPU functions  #################################### */
+
+/**
+  \brief   get FPU type
+  \details returns the FPU type, always 0.
+  \returns
+   - \b  0: No FPU
+   - \b  1: Single precision FPU
+   - \b  2: Double + Single precision FPU
+ */
+__ALWAYS_INLINE uint32_t __get_FPUType(void)
+{
+    uint32_t result;
+
+    __ASM volatile("mfcr %0, cr<13, 0>" : "=r"(result));
+    return 0;
+}
+
+
+#endif /* _CSI_INSTR_H_ */

+ 366 - 0
libcpu/c-sky/common/csi_reg.h

@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2017 C-SKY Microsystems Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/******************************************************************************
+ * @file     csi_reg.h
+ * @brief    CSI Header File for reg.
+ * @version  V1.0
+ * @date     02. June 2017
+ ******************************************************************************/
+
+#ifndef _CSI_REG_H_
+#define _CSI_REG_H_
+
+#include<csi_gcc.h>
+
+/**
+  \brief   Enable IRQ Interrupts
+  \details Enables IRQ interrupts by setting the IE-bit in the PSR.
+           Can only be executed in Privileged modes.
+ */
+__ALWAYS_INLINE void __enable_irq(void)
+{
+    __ASM volatile("psrset ie");
+}
+
+
+
+/**
+  \brief   Disable IRQ Interrupts
+  \details Disables IRQ interrupts by clearing the IE-bit in the PSR.
+  Can only be executed in Privileged modes.
+ */
+__ALWAYS_INLINE void __disable_irq(void)
+{
+    __ASM volatile("psrclr ie");
+}
+
+/**
+  \brief   Get PSR
+  \details Returns the content of the PSR Register.
+  \return               PSR Register value
+ */
+__ALWAYS_INLINE uint32_t __get_PSR(void)
+{
+    uint32_t result;
+
+    __ASM volatile("mfcr %0, psr" : "=r"(result));
+    return (result);
+}
+
+/**
+  \brief   Set PSR
+  \details Writes the given value to the PSR Register.
+  \param [in]    psr  PSR Register value to set
+ */
+__ALWAYS_INLINE void __set_PSR(uint32_t psr)
+{
+    __ASM volatile("mtcr %0, psr" : : "r"(psr));
+}
+
+/**
+  \brief   Get SP
+  \details Returns the content of the SP Register.
+  \return               SP Register value
+ */
+__ALWAYS_INLINE uint32_t __get_SP(void)
+{
+    uint32_t result;
+
+    __ASM volatile("mov %0, sp" : "=r"(result));
+    return (result);
+}
+
+/**
+  \brief   Set SP
+  \details Writes the given value to the SP Register.
+  \param [in]    sp  SP Register value to set
+ */
+__ALWAYS_INLINE void __set_SP(uint32_t sp)
+{
+    __ASM volatile("mov sp, %0" : : "r"(sp): "sp");
+}
+
+
+/**
+  \brief   Get VBR Register
+  \details Returns the content of the VBR Register.
+  \return               VBR Register value
+ */
+__ALWAYS_INLINE uint32_t __get_VBR(void)
+{
+    uint32_t result;
+
+    __ASM volatile("mfcr %0, vbr" : "=r"(result));
+    return (result);
+}
+
+/**
+  \brief   Set VBR
+  \details Writes the given value to the VBR Register.
+  \param [in]    vbr  VBR Register value to set
+ */
+__ALWAYS_INLINE void __set_VBR(uint32_t vbr)
+{
+    __ASM volatile("mtcr %0, vbr" : : "r"(vbr));
+}
+
+/**
+  \brief   Get EPC Register
+  \details Returns the content of the EPC Register.
+  \return               EPC Register value
+ */
+__ALWAYS_INLINE uint32_t __get_EPC(void)
+{
+    uint32_t result;
+
+    __ASM volatile("mfcr %0, epc" : "=r"(result));
+    return (result);
+}
+
+/**
+  \brief   Set EPC
+  \details Writes the given value to the EPC Register.
+  \param [in]    epc  EPC Register value to set
+ */
+__ALWAYS_INLINE void __set_EPC(uint32_t epc)
+{
+    __ASM volatile("mtcr %0, epc" : : "r"(epc));
+}
+
+/**
+  \brief   Get EPSR
+  \details Returns the content of the EPSR Register.
+  \return               EPSR Register value
+ */
+__ALWAYS_INLINE uint32_t __get_EPSR(void)
+{
+    uint32_t result;
+
+    __ASM volatile("mfcr %0, epsr" : "=r"(result));
+    return (result);
+}
+
+/**
+  \brief   Set EPSR
+  \details Writes the given value to the EPSR Register.
+  \param [in]    epsr  EPSR Register value to set
+ */
+__ALWAYS_INLINE void __set_EPSR(uint32_t epsr)
+{
+    __ASM volatile("mtcr %0, epsr" : : "r"(epsr));
+}
+
+/**
+  \brief   Get CPUID Register
+  \details Returns the content of the CPUID Register.
+  \return               CPUID Register value
+ */
+__ALWAYS_INLINE uint32_t __get_CPUID(void)
+{
+    uint32_t result;
+
+    __ASM volatile("mfcr %0, cr<13, 0>" : "=r"(result));
+    return (result);
+}
+
+#if       (__SOFTRESET_PRESENT == 1U)
+/**
+  \brief   Set SRCR
+  \details Assigns the given value to the SRCR.
+  \param [in]    srcr  SRCR value to set
+ */
+__ALWAYS_INLINE void __set_SRCR(uint32_t srcr)
+{
+    __ASM volatile("mtcr %0, cr<31, 0>\n" : : "r"(srcr));
+}
+#endif /* __SOFTRESET_PRESENT == 1U */
+
+#if       (__MGU_PRESENT == 1U)
+/**
+  \brief   Get CCR
+  \details Returns the current value of the CCR.
+  \return               CCR Register value
+ */
+__ALWAYS_INLINE uint32_t __get_CCR(void)
+{
+    register uint32_t result;
+
+    __ASM volatile("mfcr %0, cr<18, 0>\n"  : "=r"(result));
+    return (result);
+}
+
+
+/**
+  \brief   Set CCR
+  \details Assigns the given value to the CCR.
+  \param [in]    ccr  CCR value to set
+ */
+__ALWAYS_INLINE void __set_CCR(uint32_t ccr)
+{
+    __ASM volatile("mtcr %0, cr<18, 0>\n" : : "r"(ccr));
+}
+
+
+/**
+  \brief   Get CAPR
+  \details Returns the current value of the CAPR.
+  \return               CAPR Register value
+ */
+__ALWAYS_INLINE uint32_t __get_CAPR(void)
+{
+    register uint32_t result;
+
+    __ASM volatile("mfcr %0, cr<19, 0>\n" : "=r"(result));
+    return (result);
+}
+
+/**
+  \brief   Set CAPR
+  \details Assigns the given value to the CAPR.
+  \param [in]    capr  CAPR value to set
+ */
+__ALWAYS_INLINE void __set_CAPR(uint32_t capr)
+{
+    __ASM volatile("mtcr %0, cr<19, 0>\n" : : "r"(capr));
+}
+
+
+/**
+  \brief   Set PACR
+  \details Assigns the given value to the PACR.
+
+    \param [in]    pacr  PACR value to set
+ */
+__ALWAYS_INLINE void __set_PACR(uint32_t pacr)
+{
+    __ASM volatile("mtcr %0, cr<20, 0>\n" : : "r"(pacr));
+}
+
+
+/**
+  \brief   Get PACR
+  \details Returns the current value of PACR.
+  \return               PACR value
+ */
+__ALWAYS_INLINE uint32_t __get_PACR(void)
+{
+    uint32_t result;
+
+    __ASM volatile("mfcr %0, cr<20, 0>" : "=r"(result));
+    return (result);
+}
+
+/**
+  \brief   Set PRSR
+  \details Assigns the given value to the PRSR.
+
+    \param [in]    prsr  PRSR value to set
+ */
+__ALWAYS_INLINE void __set_PRSR(uint32_t prsr)
+{
+    __ASM volatile("mtcr %0, cr<21, 0>\n" : : "r"(prsr));
+}
+
+/**
+  \brief   Get PRSR
+  \details Returns the current value of PRSR.
+  \return               PRSR value
+ */
+__ALWAYS_INLINE uint32_t __get_PRSR(void)
+{
+    uint32_t result;
+
+    __ASM volatile("mfcr %0, cr<21, 0>" : "=r"(result));
+    return (result);
+}
+#endif /* __MGU_PRESENT == 1U */
+
+/**
+  \brief   Get user sp
+  \details Returns the current value of user r14.
+  \return               UR14 value
+ */
+__ALWAYS_INLINE uint32_t __get_UR14(void)
+{
+    uint32_t result;
+
+    __ASM volatile("mfcr %0, cr<14, 1>" : "=r"(result));
+    return (result);
+}
+
+/**
+  \brief   Enable interrupts and exceptions
+  \details Enables interrupts and exceptions by setting the IE-bit and EE-bit in the PSR.
+           Can only be executed in Privileged modes.
+ */
+__ALWAYS_INLINE void __enable_excp_irq(void)
+{
+    __ASM volatile("psrset ee, ie");
+}
+
+
+/**
+  \brief   Disable interrupts and exceptions
+  \details Disables interrupts and exceptions by clearing the IE-bit and EE-bit in the PSR.
+           Can only be executed in Privileged modes.
+ */
+__ALWAYS_INLINE void __disable_excp_irq(void)
+{
+    __ASM volatile("psrclr ee, ie");
+}
+
+#if       (__GSR_GCR_PRESENT == 1U)
+/**
+  \brief   Get GSR
+  \details Returns the content of the GSR Register.
+  \return               GSR Register value
+ */
+__ALWAYS_INLINE uint32_t __get_GSR(void)
+{
+    uint32_t result;
+
+    __ASM volatile("mfcr %0, cr<12, 0>" : "=r"(result));
+    return (result);
+}
+
+/**
+  \brief   Get GCR
+  \details Returns the content of the GCR Register.
+  \return               GCR Register value
+ */
+__ALWAYS_INLINE uint32_t __get_GCR(void)
+{
+    uint32_t result;
+
+    __ASM volatile("mfcr %0, cr<11, 0>" : "=r"(result));
+    return (result);
+}
+
+/**
+  \brief   Set GCR
+  \details Writes the given value to the GCR Register.
+  \param [in]    gcr  GCR Register value to set
+ */
+__ALWAYS_INLINE void __set_GCR(uint32_t gcr)
+{
+    __ASM volatile("mtcr %0, cr<11, 0>" : : "r"(gcr));
+}
+
+#endif /* (__GSR_GCR_PRESENT == 1U) */
+
+
+#endif /* _CSI_REG_H_ */

+ 1483 - 0
libcpu/c-sky/common/csi_simd.h

@@ -0,0 +1,1483 @@
+/*
+ * Copyright (C) 2017 C-SKY Microsystems Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/******************************************************************************
+ * @file     csi_simd.h
+ * @brief    CSI Single Instruction Multiple Data (SIMD) Header File for GCC.
+ * @version  V1.0
+ * @date     02. June 2017
+ ******************************************************************************/
+
+#ifndef _CSI_SIMD_H_
+#define _CSI_SIMD_H_
+
+/**
+  \brief   Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16]
+           of val2 levitated with the val3.
+  \details Combine a halfword from one register with a halfword from another register.
+           The second argument can be left-shifted before extraction of the halfword.
+  \param [in]    val1   first 16-bit operands
+  \param [in]    val2   second 16-bit operands
+  \param [in]    val3   value for left-shifting val2. Value range [0..31].
+  \return               the combination of halfwords.
+  \remark
+                 res[15:0]  = val1[15:0]              \n
+                 res[31:16] = val2[31:16] << val3
+ */
+__ALWAYS_INLINE uint32_t __PKHBT(uint32_t val1, uint32_t val2, uint32_t val3)
+{
+    return ((((int32_t)(val1) << 0) & (int32_t)0x0000FFFF) | (((int32_t)(val2) << val3) & (int32_t)0xFFFF0000));
+}
+
+/**
+  \brief   Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0]
+           of val2 right-shifted with the val3.
+  \details Combine a halfword from one register with a halfword from another register.
+           The second argument can be right-shifted before extraction of the halfword.
+  \param [in]    val1   first 16-bit operands
+  \param [in]    val2   second 16-bit operands
+  \param [in]    val3   value for right-shifting val2. Value range [1..32].
+  \return               the combination of halfwords.
+  \remark
+                 res[15:0]  = val2[15:0] >> val3        \n
+                 res[31:16] = val1[31:16]
+ */
+__ALWAYS_INLINE uint32_t __PKHTB(uint32_t val1, uint32_t val2, uint32_t val3)
+{
+    return ((((int32_t)(val1) << 0) & (int32_t)0xFFFF0000) | (((int32_t)(val2) >> val3) & (int32_t)0x0000FFFF));
+}
+
+/**
+  \brief   Dual 16-bit signed saturate.
+  \details This function saturates a signed value.
+  \param [in]    x   two signed 16-bit values to be saturated.
+  \param [in]    y   bit position for saturation, an integral constant expression in the range 1 to 16.
+  \return        the sum of the absolute differences of the following bytes, added to the accumulation value:\n
+                 the signed saturation of the low halfword in val1, saturated to the bit position specified in
+                 val2 and returned in the low halfword of the return value.\n
+                 the signed saturation of the high halfword in val1, saturated to the bit position specified in
+                 val2 and returned in the high halfword of the return value.
+ */
+__ALWAYS_INLINE uint32_t __SSAT16(int32_t x, const uint32_t y)
+{
+    int32_t r = 0, s = 0;
+
+    r = __SSAT((((int32_t)x << 16) >> 16), y) & (int32_t)0x0000FFFF;
+    s = __SSAT((((int32_t)x) >> 16), y) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r)));
+}
+
+/**
+  \brief   Dual 16-bit unsigned saturate.
+  \details This function enables you to saturate two signed 16-bit values to a selected unsigned range.
+  \param [in]    x   two signed 16-bit values to be saturated.
+  \param [in]    y   bit position for saturation, an integral constant expression in the range 1 to 16.
+  \return        the saturation of the two signed 16-bit values, as non-negative values:
+                 the saturation of the low halfword in val1, saturated to the bit position specified in
+                 val2 and returned in the low halfword of the return value.\n
+                 the saturation of the high halfword in val1, saturated to the bit position specified in
+                 val2 and returned in the high halfword of the return value.
+ */
+__ALWAYS_INLINE uint32_t __USAT16(uint32_t x, const uint32_t y)
+{
+    int32_t r = 0, s = 0;
+
+    r = __IUSAT(((x << 16) >> 16), y) & 0x0000FFFF;
+    s = __IUSAT(((x) >> 16), y) & 0x0000FFFF;
+
+    return ((s << 16) | (r));
+}
+
+/**
+  \brief   Quad 8-bit saturating addition.
+  \details This function enables you to perform four 8-bit integer additions,
+           saturating the results to the 8-bit signed integer range -2^7 <= x <= 2^7 - 1.
+  \param [in]    x   first four 8-bit summands.
+  \param [in]    y   second four 8-bit summands.
+  \return        the saturated addition of the first byte of each operand in the first byte of the return value.\n
+                 the saturated addition of the second byte of each operand in the second byte of the return value.\n
+                 the saturated addition of the third byte of each operand in the third byte of the return value.\n
+                 the saturated addition of the fourth byte of each operand in the fourth byte of the return value.\n
+                 The returned results are saturated to the 8-bit signed integer range -2^7 <= x <= 2^7 - 1.
+  \remark
+                 res[7:0]   = val1[7:0]   + val2[7:0]        \n
+                 res[15:8]  = val1[15:8]  + val2[15:8]       \n
+                 res[23:16] = val1[23:16] + val2[23:16]      \n
+                 res[31:24] = val1[31:24] + val2[31:24]
+ */
+__ALWAYS_INLINE uint32_t __QADD8(uint32_t x, uint32_t y)
+{
+    int32_t r, s, t, u;
+
+    r = __SSAT(((((int32_t)x << 24) >> 24) + (((int32_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
+    s = __SSAT(((((int32_t)x << 16) >> 24) + (((int32_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
+    t = __SSAT(((((int32_t)x <<  8) >> 24) + (((int32_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
+    u = __SSAT(((((int32_t)x) >> 24) + (((int32_t)y) >> 24)), 8) & (int32_t)0x000000FF;
+
+    return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r)));
+}
+
+/**
+  \brief   Quad 8-bit unsigned saturating addition.
+  \details This function enables you to perform four unsigned 8-bit integer additions,
+           saturating the results to the 8-bit unsigned integer range 0 < x < 2^8 - 1.
+  \param [in]    x   first four 8-bit summands.
+  \param [in]    y   second four 8-bit summands.
+  \return        the saturated addition of the first byte of each operand in the first byte of the return value.\n
+                 the saturated addition of the second byte of each operand in the second byte of the return value.\n
+                 the saturated addition of the third byte of each operand in the third byte of the return value.\n
+                 the saturated addition of the fourth byte of each operand in the fourth byte of the return value.\n
+                 The returned results are saturated to the 8-bit signed integer range 0 <= x <= 2^8 - 1.
+  \remark
+                 res[7:0]   = val1[7:0]   + val2[7:0]        \n
+                 res[15:8]  = val1[15:8]  + val2[15:8]       \n
+                 res[23:16] = val1[23:16] + val2[23:16]      \n
+                 res[31:24] = val1[31:24] + val2[31:24]
+ */
+__ALWAYS_INLINE uint32_t __UQADD8(uint32_t x, uint32_t y)
+{
+    int32_t r, s, t, u;
+
+    r = __IUSAT((((x << 24) >> 24) + ((y << 24) >> 24)), 8) & 0x000000FF;
+    s = __IUSAT((((x << 16) >> 24) + ((y << 16) >> 24)), 8) & 0x000000FF;
+    t = __IUSAT((((x <<  8) >> 24) + ((y <<  8) >> 24)), 8) & 0x000000FF;
+    u = __IUSAT((((x) >> 24) + ((y) >> 24)), 8) & 0x000000FF;
+
+    return ((u << 24) | (t << 16) | (s <<  8) | (r));
+}
+
+/**
+  \brief   Quad 8-bit signed addition.
+  \details This function performs four 8-bit signed integer additions.
+  \param [in]    x  first four 8-bit summands.
+  \param [in]    y  second four 8-bit summands.
+  \return        the addition of the first bytes from each operand, in the first byte of the return value.\n
+                 the addition of the second bytes of each operand, in the second byte of the return value.\n
+                 the addition of the third bytes of each operand, in the third byte of the return value.\n
+                 the addition of the fourth bytes of each operand, in the fourth byte of the return value.
+  \remark
+                 res[7:0]   = val1[7:0]   + val2[7:0]        \n
+                 res[15:8]  = val1[15:8]  + val2[15:8]       \n
+                 res[23:16] = val1[23:16] + val2[23:16]      \n
+                 res[31:24] = val1[31:24] + val2[31:24]
+ */
+__ALWAYS_INLINE uint32_t __SADD8(uint32_t x, uint32_t y)
+{
+    int32_t r, s, t, u;
+
+    r = ((((int32_t)x << 24) >> 24) + (((int32_t)y << 24) >> 24)) & (int32_t)0x000000FF;
+    s = ((((int32_t)x << 16) >> 24) + (((int32_t)y << 16) >> 24)) & (int32_t)0x000000FF;
+    t = ((((int32_t)x <<  8) >> 24) + (((int32_t)y <<  8) >> 24)) & (int32_t)0x000000FF;
+    u = ((((int32_t)x) >> 24) + (((int32_t)y) >> 24)) & (int32_t)0x000000FF;
+
+    return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r)));
+}
+
+/**
+  \brief   Quad 8-bit unsigned addition.
+  \details This function performs four unsigned 8-bit integer additions.
+  \param [in]    x  first four 8-bit summands.
+  \param [in]    y  second four 8-bit summands.
+  \return        the addition of the first bytes from each operand, in the first byte of the return value.\n
+                 the addition of the second bytes of each operand, in the second byte of the return value.\n
+                 the addition of the third bytes of each operand, in the third byte of the return value.\n
+                 the addition of the fourth bytes of each operand, in the fourth byte of the return value.
+  \remark
+                 res[7:0]   = val1[7:0]   + val2[7:0]        \n
+                 res[15:8]  = val1[15:8]  + val2[15:8]       \n
+                 res[23:16] = val1[23:16] + val2[23:16]      \n
+                 res[31:24] = val1[31:24] + val2[31:24]
+ */
+__ALWAYS_INLINE uint32_t __UADD8(uint32_t x, uint32_t y)
+{
+    int32_t r, s, t, u;
+
+    r = (((x << 24) >> 24) + ((y << 24) >> 24)) & 0x000000FF;
+    s = (((x << 16) >> 24) + ((y << 16) >> 24)) & 0x000000FF;
+    t = (((x <<  8) >> 24) + ((y <<  8) >> 24)) & 0x000000FF;
+    u = (((x) >> 24) + ((y) >> 24)) & 0x000000FF;
+
+    return ((u << 24) | (t << 16) | (s <<  8) | (r));
+}
+
+/**
+  \brief   Quad 8-bit saturating subtract.
+  \details This function enables you to perform four 8-bit integer subtractions,
+           saturating the results to the 8-bit signed integer range -2^7 <= x <= 2^7 - 1.
+  \param [in]    x   first four 8-bit summands.
+  \param [in]    y   second four 8-bit summands.
+  \return        the subtraction of the first byte of each operand in the first byte of the return value.\n
+                 the subtraction of the second byte of each operand in the second byte of the return value.\n
+                 the subtraction of the third byte of each operand in the third byte of the return value.\n
+                 the subtraction of the fourth byte of each operand in the fourth byte of the return value.\n
+                 The returned results are saturated to the 8-bit signed integer range -2^7 <= x <= 2^7 - 1.
+  \remark
+                 res[7:0]   = val1[7:0]   - val2[7:0]        \n
+                 res[15:8]  = val1[15:8]  - val2[15:8]       \n
+                 res[23:16] = val1[23:16] - val2[23:16]      \n
+                 res[31:24] = val1[31:24] - val2[31:24]
+ */
+__ALWAYS_INLINE uint32_t __QSUB8(uint32_t x, uint32_t y)
+{
+    int32_t r, s, t, u;
+
+    r = __SSAT(((((int32_t)x << 24) >> 24) - (((int32_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
+    s = __SSAT(((((int32_t)x << 16) >> 24) - (((int32_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
+    t = __SSAT(((((int32_t)x <<  8) >> 24) - (((int32_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
+    u = __SSAT(((((int32_t)x) >> 24) - (((int32_t)y) >> 24)), 8) & (int32_t)0x000000FF;
+
+    return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r)));
+}
+
+/**
+  \brief   Quad 8-bit unsigned saturating subtraction.
+  \details This function enables you to perform four unsigned 8-bit integer subtractions,
+           saturating the results to the 8-bit unsigned integer range 0 < x < 2^8 - 1.
+  \param [in]    x   first four 8-bit summands.
+  \param [in]    y   second four 8-bit summands.
+  \return        the subtraction of the first byte of each operand in the first byte of the return value.\n
+                 the subtraction of the second byte of each operand in the second byte of the return value.\n
+                 the subtraction of the third byte of each operand in the third byte of the return value.\n
+                 the subtraction of the fourth byte of each operand in the fourth byte of the return value.\n
+                 The returned results are saturated to the 8-bit unsigned integer range 0 <= x <= 2^8 - 1.
+  \remark
+                 res[7:0]   = val1[7:0]   - val2[7:0]        \n
+                 res[15:8]  = val1[15:8]  - val2[15:8]       \n
+                 res[23:16] = val1[23:16] - val2[23:16]      \n
+                 res[31:24] = val1[31:24] - val2[31:24]
+ */
+__ALWAYS_INLINE uint32_t __UQSUB8(uint32_t x, uint32_t y)
+{
+    int32_t r, s, t, u;
+
+    r = __IUSAT((((x << 24) >> 24) - ((y << 24) >> 24)), 8) & 0x000000FF;
+    s = __IUSAT((((x << 16) >> 24) - ((y << 16) >> 24)), 8) & 0x000000FF;
+    t = __IUSAT((((x <<  8) >> 24) - ((y <<  8) >> 24)), 8) & 0x000000FF;
+    u = __IUSAT((((x) >> 24) - ((y) >> 24)), 8) & 0x000000FF;
+
+    return ((u << 24) | (t << 16) | (s <<  8) | (r));
+}
+
+/**
+  \brief   Quad 8-bit signed subtraction.
+  \details This function enables you to perform four 8-bit signed integer subtractions.
+  \param [in]    x  first four 8-bit operands of each subtraction.
+  \param [in]    y  second four 8-bit operands of each subtraction.
+  \return        the subtraction of the first bytes from each operand, in the first byte of the return value.\n
+                 the subtraction of the second bytes of each operand, in the second byte of the return value.\n
+                 the subtraction of the third bytes of each operand, in the third byte of the return value.\n
+                 the subtraction of the fourth bytes of each operand, in the fourth byte of the return value.
+  \remark
+                 res[7:0]   = val1[7:0]   - val2[7:0]        \n
+                 res[15:8]  = val1[15:8]  - val2[15:8]       \n
+                 res[23:16] = val1[23:16] - val2[23:16]      \n
+                 res[31:24] = val1[31:24] - val2[31:24]
+ */
+__ALWAYS_INLINE uint32_t __SSUB8(uint32_t x, uint32_t y)
+{
+    int32_t r, s, t, u;
+
+    r = ((((int32_t)x << 24) >> 24) - (((int32_t)y << 24) >> 24)) & (int32_t)0x000000FF;
+    s = ((((int32_t)x << 16) >> 24) - (((int32_t)y << 16) >> 24)) & (int32_t)0x000000FF;
+    t = ((((int32_t)x <<  8) >> 24) - (((int32_t)y <<  8) >> 24)) & (int32_t)0x000000FF;
+    u = ((((int32_t)x) >> 24) - (((int32_t)y) >> 24)) & (int32_t)0x000000FF;
+
+    return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r)));
+}
+
+/**
+  \brief   Quad 8-bit unsigned subtract.
+  \details This function enables you to perform four 8-bit unsigned integer subtractions.
+  \param [in]    x  first four 8-bit operands of each subtraction.
+  \param [in]    y  second four 8-bit operands of each subtraction.
+  \return        the subtraction of the first bytes from each operand, in the first byte of the return value.\n
+                 the subtraction of the second bytes of each operand, in the second byte of the return value.\n
+                 the subtraction of the third bytes of each operand, in the third byte of the return value.\n
+                 the subtraction of the fourth bytes of each operand, in the fourth byte of the return value.
+  \remark
+                 res[7:0]   = val1[7:0]   - val2[7:0]        \n
+                 res[15:8]  = val1[15:8]  - val2[15:8]       \n
+                 res[23:16] = val1[23:16] - val2[23:16]      \n
+                 res[31:24] = val1[31:24] - val2[31:24]
+ */
+__ALWAYS_INLINE uint32_t __USUB8(uint32_t x, uint32_t y)
+{
+    int32_t r, s, t, u;
+
+    r = (((x << 24) >> 24) - ((y << 24) >> 24)) & 0x000000FF;
+    s = (((x << 16) >> 24) - ((y << 16) >> 24)) & 0x000000FF;
+    t = (((x <<  8) >> 24) - ((y <<  8) >> 24)) & 0x000000FF;
+    u = (((x) >> 24) - ((y) >> 24)) & 0x000000FF;
+
+    return ((u << 24) | (t << 16) | (s <<  8) | (r));
+}
+
+/**
+  \brief   Unsigned sum of quad 8-bit unsigned absolute difference.
+  \details This function enables you to perform four unsigned 8-bit subtractions, and add the absolute values
+           of the differences together, returning the result as a single unsigned integer.
+  \param [in]    x  first four 8-bit operands of each subtraction.
+  \param [in]    y  second four 8-bit operands of each subtraction.
+  \return        the subtraction of the first bytes from each operand, in the first byte of the return value.\n
+                 the subtraction of the second bytes of each operand, in the second byte of the return value.\n
+                 the subtraction of the third bytes of each operand, in the third byte of the return value.\n
+                 the subtraction of the fourth bytes of each operand, in the fourth byte of the return value.\n
+                 The sum is returned as a single unsigned integer.
+  \remark
+                 absdiff1   = val1[7:0]   - val2[7:0]        \n
+                 absdiff2   = val1[15:8]  - val2[15:8]       \n
+                 absdiff3   = val1[23:16] - val2[23:16]      \n
+                 absdiff4   = val1[31:24] - val2[31:24]      \n
+                 res[31:0]  = absdiff1 + absdiff2 + absdiff3 + absdiff4
+ */
+__ALWAYS_INLINE uint32_t __USAD8(uint32_t x, uint32_t y)
+{
+    int32_t r, s, t, u;
+
+    r = (((x << 24) >> 24) - ((y << 24) >> 24)) & 0x000000FF;
+    s = (((x << 16) >> 24) - ((y << 16) >> 24)) & 0x000000FF;
+    t = (((x <<  8) >> 24) - ((y <<  8) >> 24)) & 0x000000FF;
+    u = (((x) >> 24) - ((y) >> 24)) & 0x000000FF;
+
+    return (u + t + s + r);
+}
+
+/**
+  \brief   Unsigned sum of quad 8-bit unsigned absolute difference with 32-bit accumulate.
+  \details This function enables you to perform four unsigned 8-bit subtractions, and add the absolute values
+           of the differences to a 32-bit accumulate operand.
+  \param [in]    x  first four 8-bit operands of each subtraction.
+  \param [in]    y  second four 8-bit operands of each subtraction.
+  \param [in]  sum  accumulation value.
+  \return        the sum of the absolute differences of the following bytes, added to the accumulation value:
+                 the subtraction of the first bytes from each operand, in the first byte of the return value.\n
+                 the subtraction of the second bytes of each operand, in the second byte of the return value.\n
+                 the subtraction of the third bytes of each operand, in the third byte of the return value.\n
+                 the subtraction of the fourth bytes of each operand, in the fourth byte of the return value.
+  \remark
+                 absdiff1 = val1[7:0]   - val2[7:0]        \n
+                 absdiff2 = val1[15:8]  - val2[15:8]       \n
+                 absdiff3 = val1[23:16] - val2[23:16]      \n
+                 absdiff4 = val1[31:24] - val2[31:24]      \n
+                 sum = absdiff1 + absdiff2 + absdiff3 + absdiff4 \n
+                 res[31:0] = sum[31:0] + val3[31:0]
+ */
+__ALWAYS_INLINE uint32_t __USADA8(uint32_t x, uint32_t y, uint32_t sum)
+{
+    int32_t r, s, t, u;
+
+    r = (abs(((x << 24) >> 24) - ((y << 24) >> 24))) & 0x000000FF;
+    s = (abs(((x << 16) >> 24) - ((y << 16) >> 24))) & 0x000000FF;
+    t = (abs(((x <<  8) >> 24) - ((y <<  8) >> 24))) & 0x000000FF;
+    u = (abs(((x) >> 24) - ((y) >> 24))) & 0x000000FF;
+
+    return (u + t + s + r + sum);
+}
+
+/**
+  \brief   Dual 16-bit saturating addition.
+  \details This function enables you to perform two 16-bit integer arithmetic additions in parallel,
+           saturating the results to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
+  \param [in]    x   first two 16-bit summands.
+  \param [in]    y   second two 16-bit summands.
+  \return        the saturated addition of the low halfwords, in the low halfword of the return value.\n
+                 the saturated addition of the high halfwords, in the high halfword of the return value.\n
+                 The returned results are saturated to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
+  \remark
+                 res[15:0]  = val1[15:0]  + val2[15:0]        \n
+                 res[31:16] = val1[31:16] + val2[31:16]
+ */
+__ALWAYS_INLINE uint32_t __QADD16(uint32_t x, uint32_t y)
+{
+    int32_t r = 0, s = 0;
+
+    r = __SSAT(((((int32_t)x << 16) >> 16) + (((int32_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
+    s = __SSAT(((((int32_t)x) >> 16) + (((int32_t)y) >> 16)), 16) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r)));
+}
+
+/**
+  \brief   Dual 16-bit unsigned saturating addition.
+  \details This function enables you to perform two unsigned 16-bit integer additions, saturating
+           the results to the 16-bit unsigned integer range 0 < x < 2^16 - 1.
+  \param [in]    x   first two 16-bit summands.
+  \param [in]    y   second two 16-bit summands.
+  \return        the saturated addition of the low halfwords, in the low halfword of the return value.\n
+                 the saturated addition of the high halfwords, in the high halfword of the return value.\n
+                 The results are saturated to the 16-bit unsigned integer range 0 < x < 2^16 - 1.
+  \remark
+                 res[15:0]  = val1[15:0]  + val2[15:0]        \n
+                 res[31:16] = val1[31:16] + val2[31:16]
+ */
+__ALWAYS_INLINE uint32_t __UQADD16(uint32_t x, uint32_t y)
+{
+    int32_t r = 0, s = 0;
+
+    r = __IUSAT((((x << 16) >> 16) + ((y << 16) >> 16)), 16) & 0x0000FFFF;
+    s = __IUSAT((((x) >> 16) + ((y) >> 16)), 16) & 0x0000FFFF;
+
+    return ((s << 16) | (r));
+}
+
+/**
+  \brief   Dual 16-bit signed addition.
+  \details This function enables you to perform two 16-bit signed integer additions.
+  \param [in]    x   first two 16-bit summands.
+  \param [in]    y   second two 16-bit summands.
+  \return        the addition of the low halfwords in the low halfword of the return value.\n
+                 the addition of the high halfwords in the high halfword of the return value.
+  \remark
+                 res[15:0]  = val1[15:0]  + val2[15:0]        \n
+                 res[31:16] = val1[31:16] + val2[31:16]
+ */
+__ALWAYS_INLINE uint32_t __SADD16(uint32_t x, uint32_t y)
+{
+    int32_t r = 0, s = 0;
+
+    r = ((((int32_t)x << 16) >> 16) + (((int32_t)y << 16) >> 16)) & (int32_t)0x0000FFFF;
+    s = ((((int32_t)x) >> 16) + (((int32_t)y) >> 16)) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r)));
+}
+
+/**
+  \brief   Dual 16-bit unsigned addition
+  \details This function enables you to perform two 16-bit unsigned integer additions.
+  \param [in]    x   first two 16-bit summands for each addition.
+  \param [in]    y   second two 16-bit summands for each addition.
+  \return        the addition of the low halfwords in the low halfword of the return value.\n
+                 the addition of the high halfwords in the high halfword of the return value.
+  \remark
+                 res[15:0]  = val1[15:0]  + val2[15:0]        \n
+                 res[31:16] = val1[31:16] + val2[31:16]
+ */
+__ALWAYS_INLINE uint32_t __UADD16(uint32_t x, uint32_t y)
+{
+    int32_t r = 0, s = 0;
+
+    r = (((x << 16) >> 16) + ((y << 16) >> 16)) & 0x0000FFFF;
+    s = (((x) >> 16) + ((y) >> 16)) & 0x0000FFFF;
+
+    return ((s << 16) | (r));
+}
+
+
+/**
+  \brief   Dual 16-bit signed addition with halved results.
+  \details This function enables you to perform two signed 16-bit integer additions, halving the results.
+  \param [in]    x   first two 16-bit summands.
+  \param [in]    y   second two 16-bit summands.
+  \return        the halved addition of the low halfwords, in the low halfword of the return value.\n
+                 the halved addition of the high halfwords, in the high halfword of the return value.
+  \remark
+                 res[15:0]  = (val1[15:0]  + val2[15:0]) >> 1        \n
+                 res[31:16] = (val1[31:16] + val2[31:16]) >> 1
+ */
+__ALWAYS_INLINE uint32_t __SHADD16(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = (((((int32_t)x << 16) >> 16) + (((int32_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+    s = (((((int32_t)x) >> 16) + (((int32_t)y) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r)));
+}
+
+/**
+  \brief   Dual 16-bit unsigned addition with halved results.
+  \details This function enables you to perform two unsigned 16-bit integer additions, halving the results.
+  \param [in]    x   first two 16-bit summands.
+  \param [in]    y   second two 16-bit summands.
+  \return        the halved addition of the low halfwords, in the low halfword of the return value.\n
+                 the halved addition of the high halfwords, in the high halfword of the return value.
+  \remark
+                 res[15:0]  = (val1[15:0]  + val2[15:0]) >> 1        \n
+                 res[31:16] = (val1[31:16] + val2[31:16]) >> 1
+ */
+__ALWAYS_INLINE uint32_t __UHADD16(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = ((((x << 16) >> 16) + ((y << 16) >> 16)) >> 1) & 0x0000FFFF;
+    s = ((((x) >> 16) + ((y) >> 16)) >> 1) & 0x0000FFFF;
+
+    return ((s << 16) | (r));
+}
+
+/**
+  \brief   Quad 8-bit signed addition with halved results.
+  \details This function enables you to perform four signed 8-bit integer additions, halving the results.
+  \param [in]    x   first four 8-bit summands.
+  \param [in]    y   second four 8-bit summands.
+  \return        the halved addition of the first bytes from each operand, in the first byte of the return value.\n
+                 the halved addition of the second bytes from each operand, in the second byte of the return value.\n
+                 the halved addition of the third bytes from each operand, in the third byte of the return value.\n
+                 the halved addition of the fourth bytes from each operand, in the fourth byte of the return value.
+  \remark
+                 res[7:0]   = (val1[7:0]   + val2[7:0]  ) >> 1    \n
+                 res[15:8]  = (val1[15:8]  + val2[15:8] ) >> 1    \n
+                 res[23:16] = (val1[23:16] + val2[23:16]) >> 1    \n
+                 res[31:24] = (val1[31:24] + val2[31:24]) >> 1
+ */
+__ALWAYS_INLINE uint32_t __SHADD8(uint32_t x, uint32_t y)
+{
+    int32_t r, s, t, u;
+
+    r = (((((int32_t)x << 24) >> 24) + (((int32_t)y << 24) >> 24)) >> 1) & (int32_t)0x000000FF;
+    s = (((((int32_t)x << 16) >> 24) + (((int32_t)y << 16) >> 24)) >> 1) & (int32_t)0x000000FF;
+    t = (((((int32_t)x <<  8) >> 24) + (((int32_t)y <<  8) >> 24)) >> 1) & (int32_t)0x000000FF;
+    u = (((((int32_t)x) >> 24) + (((int32_t)y) >> 24)) >> 1) & (int32_t)0x000000FF;
+
+    return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r)));
+}
+
+/**
+  \brief   Quad 8-bit unsigned addition with halved results.
+  \details This function enables you to perform four unsigned 8-bit integer additions, halving the results.
+  \param [in]    x   first four 8-bit summands.
+  \param [in]    y   second four 8-bit summands.
+  \return        the halved addition of the first bytes from each operand, in the first byte of the return value.\n
+                 the halved addition of the second bytes from each operand, in the second byte of the return value.\n
+                 the halved addition of the third bytes from each operand, in the third byte of the return value.\n
+                 the halved addition of the fourth bytes from each operand, in the fourth byte of the return value.
+  \remark
+                 res[7:0]   = (val1[7:0]   + val2[7:0]  ) >> 1    \n
+                 res[15:8]  = (val1[15:8]  + val2[15:8] ) >> 1    \n
+                 res[23:16] = (val1[23:16] + val2[23:16]) >> 1    \n
+                 res[31:24] = (val1[31:24] + val2[31:24]) >> 1
+ */
+__ALWAYS_INLINE uint32_t __UHADD8(uint32_t x, uint32_t y)
+{
+    int32_t r, s, t, u;
+
+    r = ((((x << 24) >> 24) + ((y << 24) >> 24)) >> 1) & 0x000000FF;
+    s = ((((x << 16) >> 24) + ((y << 16) >> 24)) >> 1) & 0x000000FF;
+    t = ((((x <<  8) >> 24) + ((y <<  8) >> 24)) >> 1) & 0x000000FF;
+    u = ((((x) >> 24) + ((y) >> 24)) >> 1) & 0x000000FF;
+
+    return ((u << 24) | (t << 16) | (s <<  8) | (r));
+}
+
+/**
+  \brief   Dual 16-bit saturating subtract.
+  \details This function enables you to perform two 16-bit integer subtractions in parallel,
+           saturating the results to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
+  \param [in]    x   first two 16-bit summands.
+  \param [in]    y   second two 16-bit summands.
+  \return        the saturated subtraction of the low halfwords, in the low halfword of the return value.\n
+                 the saturated subtraction of the high halfwords, in the high halfword of the return value.\n
+                 The returned results are saturated to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
+  \remark
+                 res[15:0]  = val1[15:0]  - val2[15:0]        \n
+                 res[31:16] = val1[31:16] - val2[31:16]
+ */
+__ALWAYS_INLINE uint32_t __QSUB16(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = __SSAT(((((int32_t)x << 16) >> 16) - (((int32_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
+    s = __SSAT(((((int32_t)x) >> 16) - (((int32_t)y) >> 16)), 16) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r)));
+}
+
+/**
+  \brief   Dual 16-bit unsigned saturating subtraction.
+  \details This function enables you to perform two unsigned 16-bit integer subtractions,
+           saturating the results to the 16-bit unsigned integer range 0 < x < 2^16 - 1.
+  \param [in]    x   first two 16-bit operands for each subtraction.
+  \param [in]    y   second two 16-bit operands for each subtraction.
+  \return        the saturated subtraction of the low halfwords, in the low halfword of the return value.\n
+                 the saturated subtraction of the high halfwords, in the high halfword of the return value.\n
+                 The returned results are saturated to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
+  \remark
+                 res[15:0]  = val1[15:0]  - val2[15:0]        \n
+                 res[31:16] = val1[31:16] - val2[31:16]
+ */
+__ALWAYS_INLINE uint32_t __UQSUB16(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = __IUSAT((((x << 16) >> 16) - ((y << 16) >> 16)), 16) & 0x0000FFFF;
+    s = __IUSAT((((x) >> 16) - ((y) >> 16)), 16) & 0x0000FFFF;
+
+    return ((s << 16) | (r));
+}
+
+/**
+  \brief   Dual 16-bit signed subtraction.
+  \details This function enables you to perform two 16-bit signed integer subtractions.
+  \param [in]    x   first two 16-bit operands of each subtraction.
+  \param [in]    y   second two 16-bit operands of each subtraction.
+  \return        the subtraction of the low halfword in the second operand from the low
+                 halfword in the first operand, in the low halfword of the return value. \n
+                 the subtraction of the high halfword in the second operand from the high
+                 halfword in the first operand, in the high halfword of the return value.
+  \remark
+                 res[15:0]  = val1[15:0]  - val2[15:0]        \n
+                 res[31:16] = val1[31:16] - val2[31:16]
+ */
+__ALWAYS_INLINE uint32_t __SSUB16(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = ((((int32_t)x << 16) >> 16) - (((int32_t)y << 16) >> 16)) & (int32_t)0x0000FFFF;
+    s = ((((int32_t)x) >> 16) - (((int32_t)y) >> 16)) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r)));
+}
+
+/**
+  \brief   Dual 16-bit unsigned subtract.
+  \details This function enables you to perform two 16-bit unsigned integer subtractions.
+  \param [in]    x   first two 16-bit operands of each subtraction.
+  \param [in]    y   second two 16-bit operands of each subtraction.
+  \return        the subtraction of the low halfword in the second operand from the low
+                 halfword in the first operand, in the low halfword of the return value. \n
+                 the subtraction of the high halfword in the second operand from the high
+                 halfword in the first operand, in the high halfword of the return value.
+  \remark
+                 res[15:0]  = val1[15:0]  - val2[15:0]        \n
+                 res[31:16] = val1[31:16] - val2[31:16]
+ */
+__ALWAYS_INLINE uint32_t __USUB16(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = (((x << 16) >> 16) - ((y << 16) >> 16)) & 0x0000FFFF;
+    s = (((x) >> 16) - ((y) >> 16)) & 0x0000FFFF;
+
+    return ((s << 16) | (r));
+}
+
+/**
+  \brief   Dual 16-bit signed subtraction with halved results.
+  \details This function enables you to perform two signed 16-bit integer subtractions, halving the results.
+  \param [in]    x   first two 16-bit summands.
+  \param [in]    y   second two 16-bit summands.
+  \return        the halved subtraction of the low halfwords, in the low halfword of the return value.\n
+                 the halved subtraction of the high halfwords, in the high halfword of the return value.
+  \remark
+                 res[15:0]  = (val1[15:0]  - val2[15:0]) >> 1        \n
+                 res[31:16] = (val1[31:16] - val2[31:16]) >> 1
+ */
+__ALWAYS_INLINE uint32_t __SHSUB16(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = (((((int32_t)x << 16) >> 16) - (((int32_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+    s = (((((int32_t)x) >> 16) - (((int32_t)y) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r)));
+}
+
+/**
+  \brief   Dual 16-bit unsigned subtraction with halved results.
+  \details This function enables you to perform two unsigned 16-bit integer subtractions, halving the results.
+  \param [in]    x   first two 16-bit summands.
+  \param [in]    y   second two 16-bit summands.
+  \return        the halved subtraction of the low halfwords, in the low halfword of the return value.\n
+                 the halved subtraction of the high halfwords, in the high halfword of the return value.
+  \remark
+                 res[15:0]  = (val1[15:0]  - val2[15:0]) >> 1        \n
+                 res[31:16] = (val1[31:16] - val2[31:16]) >> 1
+ */
+__ALWAYS_INLINE uint32_t __UHSUB16(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = ((((x << 16) >> 16) - ((y << 16) >> 16)) >> 1) & 0x0000FFFF;
+    s = ((((x) >> 16) - ((y) >> 16)) >> 1) & 0x0000FFFF;
+
+    return ((s << 16) | (r));
+}
+
+/**
+  \brief   Quad 8-bit signed addition with halved results.
+  \details This function enables you to perform four signed 8-bit integer subtractions, halving the results.
+  \param [in]    x   first four 8-bit summands.
+  \param [in]    y   second four 8-bit summands.
+  \return        the halved subtraction of the first bytes from each operand, in the first byte of the return value.\n
+                 the halved subtraction of the second bytes from each operand, in the second byte of the return value.\n
+                 the halved subtraction of the third bytes from each operand, in the third byte of the return value.\n
+                 the halved subtraction of the fourth bytes from each operand, in the fourth byte of the return value.
+  \remark
+                 res[7:0]   = (val1[7:0]   - val2[7:0]  ) >> 1    \n
+                 res[15:8]  = (val1[15:8]  - val2[15:8] ) >> 1    \n
+                 res[23:16] = (val1[23:16] - val2[23:16]) >> 1    \n
+                 res[31:24] = (val1[31:24] - val2[31:24]) >> 1
+ */
+__ALWAYS_INLINE uint32_t __SHSUB8(uint32_t x, uint32_t y)
+{
+    int32_t r, s, t, u;
+
+    r = (((((int32_t)x << 24) >> 24) - (((int32_t)y << 24) >> 24)) >> 1) & (int32_t)0x000000FF;
+    s = (((((int32_t)x << 16) >> 24) - (((int32_t)y << 16) >> 24)) >> 1) & (int32_t)0x000000FF;
+    t = (((((int32_t)x <<  8) >> 24) - (((int32_t)y <<  8) >> 24)) >> 1) & (int32_t)0x000000FF;
+    u = (((((int32_t)x) >> 24) - (((int32_t)y) >> 24)) >> 1) & (int32_t)0x000000FF;
+
+    return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r)));
+}
+
+/**
+  \brief   Quad 8-bit unsigned subtraction with halved results.
+  \details This function enables you to perform four unsigned 8-bit integer subtractions, halving the results.
+  \param [in]    x   first four 8-bit summands.
+  \param [in]    y   second four 8-bit summands.
+  \return        the halved subtraction of the first bytes from each operand, in the first byte of the return value.\n
+                 the halved subtraction of the second bytes from each operand, in the second byte of the return value.\n
+                 the halved subtraction of the third bytes from each operand, in the third byte of the return value.\n
+                 the halved subtraction of the fourth bytes from each operand, in the fourth byte of the return value.
+  \remark
+                 res[7:0]   = (val1[7:0]   - val2[7:0]  ) >> 1    \n
+                 res[15:8]  = (val1[15:8]  - val2[15:8] ) >> 1    \n
+                 res[23:16] = (val1[23:16] - val2[23:16]) >> 1    \n
+                 res[31:24] = (val1[31:24] - val2[31:24]) >> 1
+ */
+__ALWAYS_INLINE uint32_t __UHSUB8(uint32_t x, uint32_t y)
+{
+    int32_t r, s, t, u;
+
+    r = ((((x << 24) >> 24) - ((y << 24) >> 24)) >> 1) & 0x000000FF;
+    s = ((((x << 16) >> 24) - ((y << 16) >> 24)) >> 1) & 0x000000FF;
+    t = ((((x <<  8) >> 24) - ((y <<  8) >> 24)) >> 1) & 0x000000FF;
+    u = ((((x) >> 24) - ((y) >> 24)) >> 1) & 0x000000FF;
+
+    return ((u << 24) | (t << 16) | (s <<  8) | (r));
+}
+
+/**
+  \brief   Dual 16-bit add and subtract with exchange.
+  \details This function enables you to exchange the halfwords of the one operand,
+           then add the high halfwords and subtract the low halfwords,
+           saturating the results to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
+  \param [in]    x   first operand for the subtraction in the low halfword,
+                     and the first operand for the addition in the high halfword.
+  \param [in]    y   second operand for the subtraction in the high halfword,
+                     and the second operand for the addition in the low halfword.
+  \return        the saturated subtraction of the high halfword in the second operand from the
+                 low halfword in the first operand, in the low halfword of the return value.\n
+                 the saturated addition of the high halfword in the first operand and the
+                 low halfword in the second operand, in the high halfword of the return value.\n
+                 The returned results are saturated to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
+  \remark
+                 res[15:0]  = val1[15:0]  - val2[31:16]        \n
+                 res[31:16] = val1[31:16] + val2[15:0]
+ */
+__ALWAYS_INLINE uint32_t __QASX(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = __SSAT(((((int32_t)x << 16) >> 16) - (((int32_t)y) >> 16)), 16) & (int32_t)0x0000FFFF;
+    s = __SSAT(((((int32_t)x) >> 16) + (((int32_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r)));
+}
+
+/**
+  \brief   Dual 16-bit unsigned saturating addition and subtraction with exchange.
+  \details This function enables you to exchange the halfwords of the second operand and
+           perform one unsigned 16-bit integer addition and one unsigned 16-bit subtraction,
+           saturating the results to the 16-bit unsigned integer range 0 <= x <= 2^16 - 1.
+  \param [in]    x   first operand for the subtraction in the low halfword,
+                     and the first operand for the addition in the high halfword.
+  \param [in]    y   second operand for the subtraction in the high halfword,
+                     and the second operand for the addition in the low halfword.
+  \return        the saturated subtraction of the high halfword in the second operand from the
+                 low halfword in the first operand, in the low halfword of the return value.\n
+                 the saturated addition of the high halfword in the first operand and the
+                 low halfword in the second operand, in the high halfword of the return value.\n
+                 The returned results are saturated to the 16-bit unsigned integer range 0 <= x <= 2^16 - 1.
+  \remark
+                 res[15:0]  = val1[15:0]  - val2[31:16]        \n
+                 res[31:16] = val1[31:16] + val2[15:0]
+ */
+__ALWAYS_INLINE uint32_t __UQASX(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = __IUSAT((((x << 16) >> 16) - ((y) >> 16)), 16) & 0x0000FFFF;
+    s = __IUSAT((((x) >> 16) + ((y << 16) >> 16)), 16) & 0x0000FFFF;
+
+    return ((s << 16) | (r));
+}
+
+/**
+  \brief   Dual 16-bit addition and subtraction with exchange.
+  \details It enables you to exchange the halfwords of the second operand, add the high halfwords
+           and subtract the low halfwords.
+  \param [in]    x   first operand for the subtraction in the low halfword,
+                     and the first operand for the addition in the high halfword.
+  \param [in]    y   second operand for the subtraction in the high halfword,
+                     and the second operand for the addition in the low halfword.
+  \return        the subtraction of the high halfword in the second operand from the
+                 low halfword in the first operand, in the low halfword of the return value.\n
+                 the addition of the high halfword in the first operand and the
+                 low halfword in the second operand, in the high halfword of the return value.
+  \remark
+                 res[15:0]  = val1[15:0]  - val2[31:16]        \n
+                 res[31:16] = val1[31:16] + val2[15:0]
+ */
+__ALWAYS_INLINE uint32_t __SASX(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = ((((int32_t)x << 16) >> 16) - (((int32_t)y) >> 16)) & (int32_t)0x0000FFFF;
+    s = ((((int32_t)x) >> 16) + (((int32_t)y << 16) >> 16)) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r)));
+}
+
+/**
+  \brief   Dual 16-bit unsigned addition and subtraction with exchange.
+  \details This function enables you to exchange the two halfwords of the second operand,
+           add the high halfwords and subtract the low halfwords.
+  \param [in]    x   first operand for the subtraction in the low halfword,
+                     and the first operand for the addition in the high halfword.
+  \param [in]    y   second operand for the subtraction in the high halfword,
+                     and the second operand for the addition in the low halfword.
+  \return        the subtraction of the high halfword in the second operand from the
+                 low halfword in the first operand, in the low halfword of the return value.\n
+                 the addition of the high halfword in the first operand and the
+                 low halfword in the second operand, in the high halfword of the return value.
+  \remark
+                 res[15:0]  = val1[15:0]  - val2[31:16]        \n
+                 res[31:16] = val1[31:16] + val2[15:0]
+ */
+__ALWAYS_INLINE uint32_t __UASX(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = (((x << 16) >> 16) - ((y) >> 16)) & 0x0000FFFF;
+    s = (((x) >> 16) + ((y << 16) >> 16)) & 0x0000FFFF;
+
+    return ((s << 16) | (r));
+}
+
+/**
+  \brief   Dual 16-bit signed addition and subtraction with halved results.
+  \details This function enables you to exchange the two halfwords of one operand, perform one
+           signed 16-bit integer addition and one signed 16-bit subtraction, and halve the results.
+  \param [in]    x   first 16-bit operands.
+  \param [in]    y   second 16-bit operands.
+  \return        the halved subtraction of the high halfword in the second operand from the
+                 low halfword in the first operand, in the low halfword of the return value.\n
+                 the halved addition of the low halfword in the second operand from the high
+                 halfword in the first operand, in the high halfword of the return value.
+  \remark
+                 res[15:0]  = (val1[15:0]  - val2[31:16]) >> 1        \n
+                 res[31:16] = (val1[31:16] + val2[15:0]) >> 1
+ */
+__ALWAYS_INLINE uint32_t __SHASX(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = (((((int32_t)x << 16) >> 16) - (((int32_t)y) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+    s = (((((int32_t)x) >> 16) + (((int32_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r)));
+}
+
+/**
+  \brief   Dual 16-bit unsigned addition and subtraction with halved results and exchange.
+  \details This function enables you to exchange the halfwords of the second operand,
+           add the high halfwords and subtract the low halfwords, halving the results.
+  \param [in]    x   first operand for the subtraction in the low halfword, and
+                     the first operand for the addition in the high halfword.
+  \param [in]    y   second operand for the subtraction in the high halfword, and
+                     the second operand for the addition in the low halfword.
+  \return        the halved subtraction of the high halfword in the second operand from the
+                 low halfword in the first operand, in the low halfword of the return value.\n
+                 the halved addition of the low halfword in the second operand from the high
+                 halfword in the first operand, in the high halfword of the return value.
+  \remark
+                 res[15:0]  = (val1[15:0]  - val2[31:16]) >> 1        \n
+                 res[31:16] = (val1[31:16] + val2[15:0]) >> 1
+ */
+__ALWAYS_INLINE uint32_t __UHASX(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = ((((x << 16) >> 16) - ((y) >> 16)) >> 1) & 0x0000FFFF;
+    s = ((((x) >> 16) + ((y << 16) >> 16)) >> 1) & 0x0000FFFF;
+
+    return ((s << 16) | (r));
+}
+
+/**
+  \brief   Dual 16-bit subtract and add with exchange.
+  \details This function enables you to exchange the halfwords of one operand,
+           then subtract the high halfwords and add the low halfwords,
+           saturating the results to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
+  \param [in]    x   first operand for the addition in the low halfword,
+                     and the first operand for the subtraction in the high halfword.
+  \param [in]    y   second operand for the addition in the high halfword,
+                     and the second operand for the subtraction in the low halfword.
+  \return        the saturated addition of the low halfword of the first operand and the high
+                 halfword of the second operand, in the low halfword of the return value.\n
+                 the saturated subtraction of the low halfword of the second operand from the
+                 high halfword of the first operand, in the high halfword of the return value.\n
+                 The returned results are saturated to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
+  \remark
+                 res[15:0]  = val1[15:0]  + val2[31:16]        \n
+                 res[31:16] = val1[31:16] - val2[15:0]
+ */
+__ALWAYS_INLINE uint32_t __QSAX(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = __SSAT(((((int32_t)x << 16) >> 16) + (((int32_t)y) >> 16)), 16) & (int32_t)0x0000FFFF;
+    s = __SSAT(((((int32_t)x) >> 16) - (((int32_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r)));
+}
+
+/**
+  \brief   Dual 16-bit unsigned saturating subtraction and addition with exchange.
+  \details This function enables you to exchange the halfwords of the second operand and perform
+           one unsigned 16-bit integer subtraction and one unsigned 16-bit addition, saturating
+           the results to the 16-bit unsigned integer range 0 <= x <= 2^16 - 1.
+  \param [in]    x   first operand for the addition in the low halfword,
+                     and the first operand for the subtraction in the high halfword.
+  \param [in]    y   second operand for the addition in the high halfword,
+                     and the second operand for the subtraction in the low halfword.
+  \return        the saturated addition of the low halfword of the first operand and the high
+                 halfword of the second operand, in the low halfword of the return value.\n
+                 the saturated subtraction of the low halfword of the second operand from the
+                 high halfword of the first operand, in the high halfword of the return value.\n
+                 The returned results are saturated to the 16-bit unsigned integer range 0 <= x <= 2^16 - 1.
+  \remark
+                 res[15:0]  = val1[15:0]  + val2[31:16]        \n
+                 res[31:16] = val1[31:16] - val2[15:0]
+ */
+__ALWAYS_INLINE uint32_t __UQSAX(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = __IUSAT((((x << 16) >> 16) + ((y) >> 16)), 16) & 0x0000FFFF;
+    s = __IUSAT((((x) >> 16) - ((y << 16) >> 16)), 16) & 0x0000FFFF;
+
+    return ((s << 16) | (r));
+}
+
+/**
+  \brief   Dual 16-bit unsigned subtract and add with exchange.
+  \details This function enables you to exchange the halfwords of the second operand,
+           subtract the high halfwords and add the low halfwords.
+  \param [in]    x   first operand for the addition in the low halfword,
+                     and the first operand for the subtraction in the high halfword.
+  \param [in]    y   second operand for the addition in the high halfword,
+                     and the second operand for the subtraction in the low halfword.
+  \return        the addition of the low halfword of the first operand and the high
+                 halfword of the second operand, in the low halfword of the return value.\n
+                 the subtraction of the low halfword of the second operand from the
+                 high halfword of the first operand, in the high halfword of the return value.\n
+  \remark
+                 res[15:0]  = val1[15:0]  + val2[31:16]        \n
+                 res[31:16] = val1[31:16] - val2[15:0]
+ */
+__ALWAYS_INLINE uint32_t __USAX(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = (((x << 16) >> 16) + ((y) >> 16)) & 0x0000FFFF;
+    s = (((x) >> 16) - ((y << 16) >> 16)) & 0x0000FFFF;
+
+    return ((s << 16) | (r));
+}
+
+/**
+  \brief   Dual 16-bit signed subtraction and addition with exchange.
+  \details This function enables you to exchange the two halfwords of one operand and perform one
+           16-bit integer subtraction and one 16-bit addition.
+  \param [in]    x   first operand for the addition in the low halfword, and the first operand
+                     for the subtraction in the high halfword.
+  \param [in]    y   second operand for the addition in the high halfword, and the second
+                     operand for the subtraction in the low halfword.
+  \return        the addition of the low halfword of the first operand and the high
+                 halfword of the second operand, in the low halfword of the return value.\n
+                 the subtraction of the low halfword of the second operand from the
+                 high halfword of the first operand, in the high halfword of the return value.\n
+  \remark
+                 res[15:0]  = val1[15:0]  + val2[31:16]        \n
+                 res[31:16] = val1[31:16] - val2[15:0]
+ */
+__ALWAYS_INLINE uint32_t __SSAX(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = ((((int32_t)x << 16) >> 16) + (((int32_t)y) >> 16)) & (int32_t)0x0000FFFF;
+    s = ((((int32_t)x) >> 16) - (((int32_t)y << 16) >> 16)) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r)));
+}
+
+
+/**
+  \brief   Dual 16-bit signed subtraction and addition with halved results.
+  \details This function enables you to exchange the two halfwords of one operand, perform one signed
+           16-bit integer subtraction and one signed 16-bit addition, and halve the results.
+  \param [in]    x   first 16-bit operands.
+  \param [in]    y   second 16-bit operands.
+  \return        the halved addition of the low halfword in the first operand and the
+                 high halfword in the second operand, in the low halfword of the return value.\n
+                 the halved subtraction of the low halfword in the second operand from the
+                 high halfword in the first operand, in the high halfword of the return value.
+  \remark
+                 res[15:0]  = (val1[15:0]  + val2[31:16]) >> 1        \n
+                 res[31:16] = (val1[31:16] - val2[15:0]) >> 1
+ */
+__ALWAYS_INLINE uint32_t __SHSAX(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = (((((int32_t)x << 16) >> 16) + (((int32_t)y) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+    s = (((((int32_t)x) >> 16) - (((int32_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r)));
+}
+
+/**
+  \brief   Dual 16-bit unsigned subtraction and addition with halved results and exchange.
+  \details This function enables you to exchange the halfwords of the second operand,
+           subtract the high halfwords and add the low halfwords, halving the results.
+  \param [in]    x   first operand for the addition in the low halfword, and
+                     the first operand for the subtraction in the high halfword.
+  \param [in]    y   second operand for the addition in the high halfword, and
+                     the second operand for the subtraction in the low halfword.
+  \return        the halved addition of the low halfword in the first operand and the
+                 high halfword in the second operand, in the low halfword of the return value.\n
+                 the halved subtraction of the low halfword in the second operand from the
+                 high halfword in the first operand, in the high halfword of the return value.
+  \remark
+                 res[15:0]  = (val1[15:0]  + val2[31:16]) >> 1        \n
+                 res[31:16] = (val1[31:16] - val2[15:0]) >> 1
+ */
+__ALWAYS_INLINE uint32_t __UHSAX(uint32_t x, uint32_t y)
+{
+    int32_t r, s;
+
+    r = ((((x << 16) >> 16) + ((y) >> 16)) >> 1) & 0x0000FFFF;
+    s = ((((x) >> 16) - ((y << 16) >> 16)) >> 1) & 0x0000FFFF;
+
+    return ((s << 16) | (r));
+}
+
+/**
+  \brief   Dual 16-bit signed multiply with exchange returning difference.
+  \details This function enables you to perform two 16-bit signed multiplications, subtracting
+           one of the products from the other. The halfwords of the second operand are exchanged
+           before performing the arithmetic. This produces top * bottom and bottom * top multiplication.
+  \param [in]    x   first 16-bit operands for each multiplication.
+  \param [in]    y   second 16-bit operands for each multiplication.
+  \return        the difference of the products of the two 16-bit signed multiplications.
+  \remark
+                 p1 = val1[15:0]  * val2[31:16]       \n
+                 p2 = val1[31:16] * val2[15:0]        \n
+                 res[31:0] = p1 - p2
+ */
+__ALWAYS_INLINE uint32_t __SMUSDX(uint32_t x, uint32_t y)
+{
+    return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y) >> 16)) -
+                       ((((int32_t)x) >> 16) * (((int32_t)y << 16) >> 16))));
+}
+
+/**
+  \brief   Sum of dual 16-bit signed multiply with exchange.
+  \details This function enables you to perform two 16-bit signed multiplications with exchanged
+           halfwords of the second operand, adding the products together.
+  \param [in]    x   first 16-bit operands for each multiplication.
+  \param [in]    y   second 16-bit operands for each multiplication.
+  \return        the sum of the products of the two 16-bit signed multiplications with exchanged halfwords of the second operand.
+  \remark
+                 p1 = val1[15:0]  * val2[31:16]       \n
+                 p2 = val1[31:16] * val2[15:0]        \n
+                 res[31:0] = p1 + p2
+ */
+__ALWAYS_INLINE uint32_t __SMUADX(uint32_t x, uint32_t y)
+{
+    return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y) >> 16)) +
+                       ((((int32_t)x) >> 16) * (((int32_t)y << 16) >> 16))));
+}
+
+
+/**
+  \brief   Saturating add.
+  \details This function enables you to obtain the saturating add of two integers.
+  \param [in]    x   first summand of the saturating add operation.
+  \param [in]    y   second summand of the saturating add operation.
+  \return        the saturating addition of val1 and val2.
+  \remark
+                 res[31:0] = SAT(val1 + SAT(val2))
+ */
+__ALWAYS_INLINE int32_t __QADD(int32_t x, int32_t y)
+{
+    int32_t result;
+
+    if (y >= 0)
+    {
+        if (x + y >= x)
+        {
+            result = x + y;
+        }
+        else
+        {
+            result = 0x7FFFFFFF;
+        }
+    }
+    else
+    {
+        if (x + y < x)
+        {
+            result = x + y;
+        }
+        else
+        {
+            result = 0x80000000;
+        }
+    }
+
+    return result;
+}
+
+/**
+  \brief   Saturating subtract.
+  \details This function enables you to obtain the saturating add of two integers.
+  \param [in]    x   first summand of the saturating add operation.
+  \param [in]    y   second summand of the saturating add operation.
+  \return        the saturating addition of val1 and val2.
+  \remark
+                 res[31:0] = SAT(val1 + SAT(val2))
+ */
+__ALWAYS_INLINE int32_t __QSUB(int32_t x, int32_t y)
+{
+    int64_t tmp;
+    int32_t result;
+
+    tmp = (int64_t)x - (int64_t)y;
+
+    if (tmp > 0x7fffffff)
+    {
+        tmp = 0x7fffffff;
+    }
+    else if (tmp < (-2147483647 - 1))
+    {
+        tmp = -2147483647 - 1;
+    }
+
+    result = tmp;
+    return result;
+}
+
+/**
+  \brief   Dual 16-bit signed multiply with single 32-bit accumulator.
+  \details This function enables you to perform two signed 16-bit multiplications,
+           adding both results to a 32-bit accumulate operand.
+  \param [in]    x   first 16-bit operands for each multiplication.
+  \param [in]    y   second 16-bit operands for each multiplication.
+  \param [in]  sum   accumulate value.
+  \return        the product of each multiplication added to the accumulate value, as a 32-bit integer.
+  \remark
+                 p1 = val1[15:0]  * val2[15:0]      \n
+                 p2 = val1[31:16] * val2[31:16]     \n
+                 res[31:0] = p1 + p2 + val3[31:0]
+ */
+__ALWAYS_INLINE uint32_t __SMLAD(uint32_t x, uint32_t y, uint32_t sum)
+{
+    return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y << 16) >> 16)) +
+                       ((((int32_t)x) >> 16) * (((int32_t)y) >> 16)) +
+                       (((int32_t)sum))));
+}
+
+/**
+  \brief   Pre-exchanged dual 16-bit signed multiply with single 32-bit accumulator.
+  \details This function enables you to perform two signed 16-bit multiplications with exchanged
+           halfwords of the second operand, adding both results to a 32-bit accumulate operand.
+  \param [in]    x   first 16-bit operands for each multiplication.
+  \param [in]    y   second 16-bit operands for each multiplication.
+  \param [in]  sum   accumulate value.
+  \return        the product of each multiplication with exchanged halfwords of the second
+                 operand added to the accumulate value, as a 32-bit integer.
+  \remark
+                 p1 = val1[15:0]  * val2[31:16]     \n
+                 p2 = val1[31:16] * val2[15:0]      \n
+                 res[31:0] = p1 + p2 + val3[31:0]
+ */
+__ALWAYS_INLINE uint32_t __SMLADX(uint32_t x, uint32_t y, uint32_t sum)
+{
+    return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y) >> 16)) +
+                       ((((int32_t)x) >> 16) * (((int32_t)y << 16) >> 16)) +
+                       (((int32_t)sum))));
+}
+
+/**
+  \brief   Dual 16-bit signed multiply with exchange subtract with 32-bit accumulate.
+  \details This function enables you to perform two 16-bit signed multiplications, take the
+           difference of the products, subtracting the high halfword product from the low
+           halfword product, and add the difference to a 32-bit accumulate operand.
+  \param [in]    x   first 16-bit operands for each multiplication.
+  \param [in]    y   second 16-bit operands for each multiplication.
+  \param [in]  sum   accumulate value.
+  \return        the difference of the product of each multiplication, added to the accumulate value.
+  \remark
+                 p1 = val1[15:0]  * val2[15:0]       \n
+                 p2 = val1[31:16] * val2[31:16]      \n
+                 res[31:0] = p1 - p2 + val3[31:0]
+ */
+__ALWAYS_INLINE uint32_t __SMLSD(uint32_t x, uint32_t y, uint32_t sum)
+{
+    return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y << 16) >> 16)) -
+                       ((((int32_t)x) >> 16) * (((int32_t)y) >> 16)) +
+                       (((int32_t)sum))));
+}
+
+/**
+  \brief   Dual 16-bit signed multiply with exchange subtract with 32-bit accumulate.
+  \details This function enables you to exchange the halfwords in the second operand, then perform two 16-bit
+           signed multiplications. The difference of the products is added to a 32-bit accumulate operand.
+  \param [in]    x   first 16-bit operands for each multiplication.
+  \param [in]    y   second 16-bit operands for each multiplication.
+  \param [in]  sum   accumulate value.
+  \return        the difference of the product of each multiplication, added to the accumulate value.
+  \remark
+                 p1 = val1[15:0]  * val2[31:16]     \n
+                 p2 = val1[31:16] * val2[15:0]      \n
+                 res[31:0] = p1 - p2 + val3[31:0]
+ */
+__ALWAYS_INLINE uint32_t __SMLSDX(uint32_t x, uint32_t y, uint32_t sum)
+{
+    return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y) >> 16)) -
+                       ((((int32_t)x) >> 16) * (((int32_t)y << 16) >> 16)) +
+                       (((int32_t)sum))));
+}
+
+/**
+  \brief   Dual 16-bit signed multiply with single 64-bit accumulator.
+  \details This function enables you to perform two signed 16-bit multiplications, adding both results
+           to a 64-bit accumulate operand. Overflow is only possible as a result of the 64-bit addition.
+           This overflow is not detected if it occurs. Instead, the result wraps around modulo2^64.
+  \param [in]    x   first 16-bit operands for each multiplication.
+  \param [in]    y   second 16-bit operands for each multiplication.
+  \param [in]  sum   accumulate value.
+  \return        the product of each multiplication added to the accumulate value.
+  \remark
+                 p1 = val1[15:0]  * val2[15:0]      \n
+                 p2 = val1[31:16] * val2[31:16]     \n
+                 sum = p1 + p2 + val3[63:32][31:0]  \n
+                 res[63:32] = sum[63:32]            \n
+                 res[31:0]  = sum[31:0]
+ */
+__ALWAYS_INLINE uint64_t __SMLALD(uint32_t x, uint32_t y, uint64_t sum)
+{
+    return ((uint64_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y << 16) >> 16)) +
+                       ((((int32_t)x) >> 16) * (((int32_t)y) >> 16)) +
+                       (((uint64_t)sum))));
+}
+
+/**
+  \brief   Dual 16-bit signed multiply with exchange with single 64-bit accumulator.
+  \details This function enables you to exchange the halfwords of the second operand, and perform two
+           signed 16-bit multiplications, adding both results to a 64-bit accumulate operand. Overflow
+           is only possible as a result of the 64-bit addition. This overflow is not detected if it occurs.
+           Instead, the result wraps around modulo2^64.
+  \param [in]    x   first 16-bit operands for each multiplication.
+  \param [in]    y   second 16-bit operands for each multiplication.
+  \param [in]  sum   accumulate value.
+  \return        the product of each multiplication added to the accumulate value.
+  \remark
+                 p1 = val1[15:0]  * val2[31:16]     \n
+                 p2 = val1[31:16] * val2[15:0]      \n
+                 sum = p1 + p2 + val3[63:32][31:0]  \n
+                 res[63:32] = sum[63:32]            \n
+                 res[31:0]  = sum[31:0]
+ */
+__ALWAYS_INLINE uint64_t __SMLALDX(uint32_t x, uint32_t y, uint64_t sum)
+{
+    return ((uint64_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y) >> 16)) +
+                       ((((int32_t)x) >> 16) * (((int32_t)y << 16) >> 16)) +
+                       (((uint64_t)sum))));
+}
+
+/**
+  \brief   dual 16-bit signed multiply subtract with 64-bit accumulate.
+  \details This function It enables you to perform two 16-bit signed multiplications, take the difference
+           of the products, subtracting the high halfword product from the low halfword product, and add the
+           difference to a 64-bit accumulate operand. Overflow cannot occur during the multiplications or the
+           subtraction. Overflow can occur as a result of the 64-bit addition, and this overflow is not
+           detected. Instead, the result wraps round to modulo2^64.
+  \param [in]    x   first 16-bit operands for each multiplication.
+  \param [in]    y   second 16-bit operands for each multiplication.
+  \param [in]  sum   accumulate value.
+  \return        the difference of the product of each multiplication, added to the accumulate value.
+  \remark
+                 p1 = val1[15:0]  * val2[15:0]      \n
+                 p2 = val1[31:16] * val2[31:16]     \n
+                 res[63:0] = p1 - p2 + val3[63:0]
+ */
+__ALWAYS_INLINE uint64_t __SMLSLD(uint32_t x, uint32_t y, uint64_t sum)
+{
+    return ((uint64_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y << 16) >> 16)) -
+                       ((((int32_t)x) >> 16) * (((int32_t)y) >> 16)) +
+                       (((uint64_t)sum))));
+}
+
+/**
+  \brief   Dual 16-bit signed multiply with exchange subtract with 64-bit accumulate.
+  \details This function enables you to exchange the halfwords of the second operand, perform two 16-bit multiplications,
+           adding the difference of the products to a 64-bit accumulate operand. Overflow cannot occur during the
+           multiplications or the subtraction. Overflow can occur as a result of the 64-bit addition, and this overflow
+           is not detected. Instead, the result wraps round to modulo2^64.
+  \param [in]    x   first 16-bit operands for each multiplication.
+  \param [in]    y   second 16-bit operands for each multiplication.
+  \param [in]  sum   accumulate value.
+  \return        the difference of the product of each multiplication, added to the accumulate value.
+  \remark
+                 p1 = val1[15:0]  * val2[31:16]      \n
+                 p2 = val1[31:16] * val2[15:0]       \n
+                 res[63:0] = p1 - p2 + val3[63:0]
+ */
+__ALWAYS_INLINE uint64_t __SMLSLDX(uint32_t x, uint32_t y, uint64_t sum)
+{
+    return ((uint64_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y) >> 16)) -
+                       ((((int32_t)x) >> 16) * (((int32_t)y << 16) >> 16)) +
+                       (((uint64_t)sum))));
+}
+
+/**
+  \brief   32-bit signed multiply with 32-bit truncated accumulator.
+  \details This function enables you to perform a signed 32-bit multiplications, adding the most
+           significant 32 bits of the 64-bit result to a 32-bit accumulate operand.
+  \param [in]    x   first operand for multiplication.
+  \param [in]    y   second operand for multiplication.
+  \param [in]  sum   accumulate value.
+  \return        the product of multiplication (most significant 32 bits) is added to the accumulate value, as a 32-bit integer.
+  \remark
+                 p = val1 * val2      \n
+                 res[31:0] = p[61:32] + val3[31:0]
+ */
+__ALWAYS_INLINE uint32_t __SMMLA(int32_t x, int32_t y, int32_t sum)
+{
+    return (uint32_t)((int32_t)((int64_t)((int64_t)x * (int64_t)y) >> 32) + sum);
+}
+
+/**
+  \brief   Sum of dual 16-bit signed multiply.
+  \details This function enables you to perform two 16-bit signed multiplications, adding the products together.
+  \param [in]    x   first 16-bit operands for each multiplication.
+  \param [in]    y   second 16-bit operands for each multiplication.
+  \return        the sum of the products of the two 16-bit signed multiplications.
+  \remark
+                 p1 = val1[15:0]  * val2[15:0]      \n
+                 p2 = val1[31:16] * val2[31:16]     \n
+                 res[31:0] = p1 + p2
+ */
+__ALWAYS_INLINE uint32_t __SMUAD(uint32_t x, uint32_t y)
+{
+    return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y << 16) >> 16)) +
+                       ((((int32_t)x) >> 16) * (((int32_t)y) >> 16))));
+}
+
+/**
+  \brief   Dual 16-bit signed multiply returning difference.
+  \details This function enables you to perform two 16-bit signed multiplications, taking the difference
+           of the products by subtracting the high halfword product from the low halfword product.
+  \param [in]    x   first 16-bit operands for each multiplication.
+  \param [in]    y   second 16-bit operands for each multiplication.
+  \return        the difference of the products of the two 16-bit signed multiplications.
+  \remark
+                 p1 = val1[15:0]  * val2[15:0]      \n
+                 p2 = val1[31:16] * val2[31:16]     \n
+                 res[31:0] = p1 - p2
+ */
+__ALWAYS_INLINE uint32_t __SMUSD(uint32_t x, uint32_t y)
+{
+    return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y << 16) >> 16)) -
+                       ((((int32_t)x) >> 16) * (((int32_t)y) >> 16))));
+}
+
+/**
+  \brief   Dual extracted 8-bit to 16-bit signed addition.
+  \details This function enables you to extract two 8-bit values from the second operand (at bit positions
+           [7:0] and [23:16]), sign-extend them to 16-bits each, and add the results to the first operand.
+  \param [in]    x   values added to the sign-extended to 16-bit values.
+  \param [in]    y   two 8-bit values to be extracted and sign-extended.
+  \return        the addition of val1 and val2, where the 8-bit values in val2[7:0] and
+                 val2[23:16] have been extracted and sign-extended prior to the addition.
+  \remark
+                 res[15:0]  = val1[15:0] + SignExtended(val2[7:0])      \n
+                 res[31:16] = val1[31:16] + SignExtended(val2[23:16])
+ */
+__ALWAYS_INLINE uint32_t __SXTAB16(uint32_t x, uint32_t y)
+{
+    return ((uint32_t)((((((int32_t)y << 24) >> 24) + (((int32_t)x << 16) >> 16)) & (int32_t)0x0000FFFF) |
+                       (((((int32_t)y <<  8) >>  8)  + (((int32_t)x >> 16) << 16)) & (int32_t)0xFFFF0000)));
+}
+
+/**
+  \brief   Extracted 16-bit to 32-bit unsigned addition.
+  \details This function enables you to extract two 8-bit values from one operand, zero-extend
+           them to 16 bits each, and add the results to two 16-bit values from another operand.
+  \param [in]    x   values added to the zero-extended to 16-bit values.
+  \param [in]    y   two 8-bit values to be extracted and zero-extended.
+  \return        the addition of val1 and val2, where the 8-bit values in val2[7:0] and
+                 val2[23:16] have been extracted and zero-extended prior to the addition.
+  \remark
+                 res[15:0]  = ZeroExt(val2[7:0]   to 16 bits) + val1[15:0]      \n
+                 res[31:16] = ZeroExt(val2[31:16] to 16 bits) + val1[31:16]
+ */
+__ALWAYS_INLINE uint32_t __UXTAB16(uint32_t x, uint32_t y)
+{
+    return ((uint32_t)(((((y << 24) >> 24) + ((x << 16) >> 16)) & 0x0000FFFF) |
+                       ((((y <<  8) >>  8) + ((x >> 16) << 16)) & 0xFFFF0000)));
+}
+
+/**
+  \brief   Dual extract 8-bits and sign extend each to 16-bits.
+  \details This function enables you to extract two 8-bit values from an operand and sign-extend them to 16 bits each.
+  \param [in]    x   two 8-bit values in val[7:0] and val[23:16] to be sign-extended.
+  \return        the 8-bit values sign-extended to 16-bit values.\n
+                 sign-extended value of val[7:0] in the low halfword of the return value.\n
+                 sign-extended value of val[23:16] in the high halfword of the return value.
+  \remark
+                 res[15:0]  = SignExtended(val[7:0])       \n
+                 res[31:16] = SignExtended(val[23:16])
+ */
+__ALWAYS_INLINE uint32_t __SXTB16(uint32_t x)
+{
+    return ((uint32_t)(((((int32_t)x << 24) >> 24) & (int32_t)0x0000FFFF) |
+                       ((((int32_t)x <<  8) >>  8) & (int32_t)0xFFFF0000)));
+}
+
+/**
+  \brief   Dual extract 8-bits and zero-extend to 16-bits.
+  \details This function enables you to extract two 8-bit values from an operand and zero-extend them to 16 bits each.
+  \param [in]    x   two 8-bit values in val[7:0] and val[23:16] to be zero-extended.
+  \return        the 8-bit values sign-extended to 16-bit values.\n
+                 sign-extended value of val[7:0] in the low halfword of the return value.\n
+                 sign-extended value of val[23:16] in the high halfword of the return value.
+  \remark
+                 res[15:0]  = SignExtended(val[7:0])       \n
+                 res[31:16] = SignExtended(val[23:16])
+ */
+__ALWAYS_INLINE uint32_t __UXTB16(uint32_t x)
+{
+    return ((uint32_t)((((x << 24) >> 24) & 0x0000FFFF) |
+                       (((x <<  8) >>  8) & 0xFFFF0000)));
+}
+
+#endif /* _CSI_SIMD_H_ */