mdct_arm.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. cPI3_8 EQU 0x30fbc54d
  2. cPI2_8 EQU 0x5a82799a
  3. cPI1_8 EQU 0x7641af3d
  4. AREA |.text|, CODE, READONLY, ALIGN=2
  5. THUMB
  6. REQUIRE8
  7. PRESERVE8
  8. mdct_butterfly_8 PROC
  9. add r9, r5, r1 ; x4 + x0
  10. sub r5, r5, r1 ; x4 - x0
  11. add r7, r6, r2 ; x5 + x1
  12. sub r6, r6, r2 ; x5 - x1
  13. add r8, r10, r3 ; x6 + x2
  14. sub r10, r10, r3 ; x6 - x2
  15. add r12, r11, r4 ; x7 + x3
  16. sub r11, r11, r4 ; x7 - x3
  17. add r1, r10, r6 ; y0 = (x6 - x2) + (x5 - x1)
  18. sub r2, r11, r5 ; y1 = (x7 - x3) - (x4 - x0)
  19. sub r3, r10, r6 ; y2 = (x6 - x2) - (x5 - x1)
  20. add r4, r11, r5 ; y3 = (x7 - x3) + (x4 - x0)
  21. sub r5, r8, r9 ; y4 = (x6 + x2) - (x4 + x0)
  22. sub r6, r12, r7 ; y5 = (x7 + x3) - (x5 + x1)
  23. add r10, r8, r9 ; y6 = (x6 + x2) + (x4 + x0)
  24. add r11, r12, r7 ; y7 = (x7 + x3) + (x5 + x1)
  25. stmia r0, {r1, r2, r3, r4, r5, r6, r10, r11}
  26. bx lr
  27. ENDP
  28. mdct_butterfly_16 PROC
  29. str lr, [sp, #-4]!
  30. add r1, r0, #8*4
  31. ldmia r0, {r2, r3, r4, r5}
  32. ldmia r1, {r6, r7, r8, r9}
  33. add r6, r6, r2 ; y8 = x8 + x0
  34. rsb r2, r6, r2, lsl #1 ; x0 - x8
  35. add r7, r7, r3 ; y9 = x9 + x1
  36. rsb r3, r7, r3, lsl #1 ; x1 - x9
  37. add r8, r8, r4 ; y10 = x10 + x2
  38. sub r11, r8, r4, lsl #1 ; x10 - x2
  39. add r9, r9, r5 ; y11 = x11 + x3
  40. rsb r10, r9, r5, lsl #1 ; x3 - x11
  41. stmia r1!, {r6, r7, r8, r9}
  42. add r2, r2, r3 ; (x0 - x8) + (x1 - x9)
  43. rsb r3, r2, r3, lsl #1 ; (x1 - x9) - (x0 - x8)
  44. ldr r12, =cPI2_8
  45. smull r8, r5, r12, r2
  46. smull r8, r6, r12, r3
  47. mov r5, r5, lsl #1
  48. mov r6, r6, lsl #1
  49. stmia r0!, {r5, r6, r10, r11}
  50. ldmia r0, {r2, r3, r4, r5}
  51. ldmia r1, {r6, r7, r8, r9}
  52. add r6, r6, r2 ; y12 = x12 + x4
  53. sub r2, r6, r2, lsl #1 ; x12 - x4
  54. add r7, r7, r3 ; y13 = x13 + x5
  55. sub r3, r7, r3, lsl #1 ; x13 - x5
  56. add r8, r8, r4 ; y10 = x14 + x6
  57. sub r10, r8, r4, lsl #1 ; x14 - x6
  58. add r9, r9, r5 ; y11 = x15 + x7
  59. sub r11, r9, r5, lsl #1 ; x15 - x7
  60. stmia r1, {r6, r7, r8, r9}
  61. sub r2, r2, r3 ; (x12 - x4) - (x13 - x5)
  62. add r3, r2, r3, lsl #1 ; (x12 - x4) + (x13 - x5)
  63. smull r8, r5, r12, r2
  64. smull r8, r6, r12, r3
  65. mov r5, r5, lsl #1
  66. mov r6, r6, lsl #1
  67. ; no stmia here, r5, r6, r10, r11 are passed to mdct_butterfly_8
  68. sub r0, r0, #4*4
  69. ldmia r0, {r1, r2, r3, r4}
  70. bl mdct_butterfly_8
  71. add r0, r0, #8*4
  72. ldmia r0, {r1, r2, r3, r4, r5, r6, r10, r11}
  73. bl mdct_butterfly_8
  74. ldr pc, [sp], #4
  75. ENDP
  76. mdct_butterfly_32 PROC
  77. EXPORT mdct_butterfly_32
  78. stmdb sp!, {r4-r11, lr}
  79. add r1, r0, #16*4
  80. ldmia r0, {r2, r3, r4, r5}
  81. ldmia r1, {r6, r7, r8, r9}
  82. add r6, r6, r2 ; y16 = x16 + x0
  83. rsb r2, r6, r2, lsl #1 ; x0 - x16
  84. add r7, r7, r3 ; y17 = x17 + x1
  85. rsb r3, r7, r3, lsl #1 ; x1 - x17
  86. add r8, r8, r4 ; y18 = x18 + x2
  87. rsb r4, r8, r4, lsl #1 ; x2 - x18
  88. add r9, r9, r5 ; y19 = x19 + x3
  89. rsb r5, r9, r5, lsl #1 ; x3 - x19
  90. stmia r1!, {r6, r7, r8, r9}
  91. ldr r12, =cPI1_8
  92. ldr lr, =cPI3_8
  93. smull r10, r6, r12, r2
  94. rsb r2, r2, #0
  95. smlal r10, r6, lr, r3
  96. smull r10, r7, r12, r3
  97. smlal r10, r7, lr, r2
  98. mov r6, r6, lsl #1
  99. mov r7, r7, lsl #1
  100. add r4, r4, r5 ; (x3 - x19) + (x2 - x18)
  101. rsb r5, r4, r5, lsl #1 ; (x3 - x19) - (x2 - x18)
  102. ldr r11, =cPI2_8
  103. smull r10, r8, r4, r11
  104. smull r10, r9, r5, r11
  105. mov r8, r8, lsl #1
  106. mov r9, r9, lsl #1
  107. stmia r0!, {r6, r7, r8, r9}
  108. ldmia r0, {r2, r3, r4, r5}
  109. ldmia r1, {r6, r7, r8, r9}
  110. add r6, r6, r2 ; y20 = x20 + x4
  111. rsb r2, r6, r2, lsl #1 ; x4 - x20
  112. add r7, r7, r3 ; y21 = x21 + x5
  113. rsb r3, r7, r3, lsl #1 ; x5 - x21
  114. add r8, r8, r4 ; y22 = x22 + x6
  115. sub r4, r8, r4, lsl #1 ; x22 - x6
  116. add r9, r9, r5 ; y23 = x23 + x7
  117. rsb r5, r9, r5, lsl #1 ; x7 - x23
  118. stmia r1!, {r6, r7, r8, r9}
  119. smull r10, r6, lr, r2
  120. rsb r2, r2, #0
  121. smlal r10, r6, r12, r3
  122. smull r10, r7, lr, r3
  123. smlal r10, r7, r12, r2
  124. mov r6, r6, lsl #1
  125. mov r7, r7, lsl #1
  126. mov r8, r5
  127. mov r9, r4
  128. stmia r0!, {r6, r7, r8, r9}
  129. ldmia r0, {r2, r3, r4, r5}
  130. ldmia r1, {r6, r7, r8, r9}
  131. add r6, r6, r2 ; y24 = x24 + x8
  132. sub r2, r6, r2, lsl #1 ; x24 - x8
  133. add r7, r7, r3 ; y25 = x25 + x9
  134. sub r3, r7, r3, lsl #1 ; x25 - x9
  135. add r8, r8, r4 ; y26 = x26 + x10
  136. sub r4, r8, r4, lsl #1 ; x26 - x10
  137. add r9, r9, r5 ; y27 = x27 + x11
  138. sub r5, r9, r5, lsl #1 ; x27 - x11
  139. stmia r1!, {r6, r7, r8, r9}
  140. smull r10, r7, lr, r3
  141. rsb r3, r3, #0
  142. smlal r10, r7, r12, r2
  143. smull r10, r6, r12, r3
  144. smlal r10, r6, lr, r2
  145. mov r6, r6, lsl #1
  146. mov r7, r7, lsl #1
  147. sub r4, r4, r5 ; (x26 - x10) - (x27 - x11)
  148. add r5, r4, r5, lsl #1 ; (x26 - x10) + (x27 - x11)
  149. ldr r11, =cPI2_8
  150. smull r10, r8, r11, r4
  151. smull r10, r9, r11, r5
  152. mov r8, r8, lsl #1
  153. mov r9, r9, lsl #1
  154. stmia r0!, {r6, r7, r8, r9}
  155. ldmia r0, {r2, r3, r4, r5}
  156. ldmia r1, {r6, r7, r8, r9}
  157. add r6, r6, r2 ; y28 = x28 + x12
  158. sub r2, r6, r2, lsl #1 ; x28 - x12
  159. add r7, r7, r3 ; y29 = x29 + x13
  160. sub r3, r7, r3, lsl #1 ; x29 - x13
  161. add r8, r8, r4 ; y30 = x30 + x14
  162. sub r4, r8, r4, lsl #1 ; x30 - x14
  163. add r9, r9, r5 ; y31 = x31 + x15
  164. sub r5, r9, r5, lsl #1 ; x31 - x15
  165. stmia r1, {r6, r7, r8, r9}
  166. smull r10, r7, r12, r3
  167. rsb r3, r3, #0
  168. smlal r10, r7, lr, r2
  169. smull r10, r6, lr, r3
  170. smlal r10, r6, r12, r2
  171. mov r6, r6, lsl #1
  172. mov r7, r7, lsl #1
  173. mov r8, r4
  174. mov r9, r5
  175. stmia r0, {r6, r7, r8, r9}
  176. sub r0, r0, #12*4
  177. str r0, [sp, #-4]!
  178. bl mdct_butterfly_16
  179. ldr r0, [sp], #4
  180. add r0, r0, #16*4
  181. bl mdct_butterfly_16
  182. ldmia sp!, {r4-r11, pc}
  183. ENDP
  184. ; mdct_butterfly_generic_loop(x1, x2, T0, step, Ttop)
  185. mdct_butterfly_generic_loop PROC
  186. EXPORT mdct_butterfly_generic_loop
  187. stmdb sp!, {r4-r11, lr}
  188. str r2, [sp, #-4]
  189. ldr r4, [sp, #36]
  190. label_1
  191. ldmdb r0, {r6, r7, r8, r9}
  192. ldmdb r1, {r10, r11, r12, r14}
  193. add r6, r6, r10
  194. sub r10, r6, r10, lsl #1
  195. add r7, r7, r11
  196. rsb r11, r7, r11, lsl #1
  197. add r8, r8, r12
  198. sub r12, r8, r12, lsl #1
  199. add r9, r9, r14
  200. rsb r14, r9, r14, lsl #1
  201. stmdb r0!, {r6, r7, r8, r9}
  202. ldmia r2, {r6, r7}
  203. smull r5, r8, r6, r14
  204. rsb r14, r14, #0
  205. smlal r5, r8, r7, r12
  206. smull r5, r9, r6, r12
  207. smlal r5, r9, r7, r14
  208. mov r8, r8, lsl #1
  209. mov r9, r9, lsl #1
  210. stmdb r1!, {r8, r9}
  211. add r2, r2, r3, lsl #2
  212. ldmia r2, {r6, r7}
  213. smull r5, r8, r6, r11
  214. rsb r11, r11, #0
  215. smlal r5, r8, r7, r10
  216. smull r5, r9, r6, r10
  217. smlal r5, r9, r7, r11
  218. mov r8, r8, lsl #1
  219. mov r9, r9, lsl #1
  220. stmdb r1!, {r8, r9}
  221. add r2, r2, r3, lsl #2
  222. cmp r2, r4
  223. blo label_1
  224. ldr r4, [sp, #-4]
  225. label_2
  226. ldmdb r0, {r6, r7, r8, r9}
  227. ldmdb r1, {r10, r11, r12, r14}
  228. add r6, r6, r10
  229. sub r10, r6, r10, lsl #1
  230. add r7, r7, r11
  231. sub r11, r7, r11, lsl #1
  232. add r8, r8, r12
  233. sub r12, r8, r12, lsl #1
  234. add r9, r9, r14
  235. sub r14, r9, r14, lsl #1
  236. stmdb r0!, {r6, r7, r8, r9}
  237. ldmia r2, {r6, r7}
  238. smull r5, r9, r6, r14
  239. rsb r14, r14, #0
  240. smlal r5, r9, r7, r12
  241. smull r5, r8, r6, r12
  242. smlal r5, r8, r7, r14
  243. mov r8, r8, lsl #1
  244. mov r9, r9, lsl #1
  245. stmdb r1!, {r8, r9}
  246. sub r2, r2, r3, lsl #2
  247. ldmia r2, {r6, r7}
  248. smull r5, r9, r6, r11
  249. rsb r11, r11, #0
  250. smlal r5, r9, r7, r10
  251. smull r5, r8, r6, r10
  252. smlal r5, r8, r7, r11
  253. mov r8, r8, lsl #1
  254. mov r9, r9, lsl #1
  255. stmdb r1!, {r8, r9}
  256. sub r2, r2, r3, lsl #2
  257. cmp r2, r4
  258. bhi label_2
  259. ldr r4, [sp, #36]
  260. label_3
  261. ldmdb r0, {r6, r7, r8, r9}
  262. ldmdb r1, {r10, r11, r12, r14}
  263. add r6, r6, r10
  264. rsb r10, r6, r10, lsl #1
  265. add r7, r7, r11
  266. rsb r11, r7, r11, lsl #1
  267. add r8, r8, r12
  268. rsb r12, r8, r12, lsl #1
  269. add r9, r9, r14
  270. rsb r14, r9, r14, lsl #1
  271. stmdb r0!, {r6, r7, r8, r9}
  272. ldmia r2, {r6, r7}
  273. smull r5, r8, r6, r12
  274. rsb r12, r12, #0
  275. smlal r5, r8, r7, r14
  276. smull r5, r9, r6, r14
  277. smlal r5, r9, r7, r12
  278. mov r8, r8, lsl #1
  279. mov r9, r9, lsl #1
  280. stmdb r1!, {r8, r9}
  281. add r2, r2, r3, lsl #2
  282. ldmia r2, {r6, r7}
  283. smull r5, r8, r6, r10
  284. rsb r10, r10, #0
  285. smlal r5, r8, r7, r11
  286. smull r5, r9, r6, r11
  287. smlal r5, r9, r7, r10
  288. mov r8, r8, lsl #1
  289. mov r9, r9, lsl #1
  290. stmdb r1!, {r8, r9}
  291. add r2, r2, r3, lsl #2
  292. cmp r2, r4
  293. blo label_3
  294. ldr r4, [sp, #-4]
  295. label_4
  296. ldmdb r0, {r6, r7, r8, r9}
  297. ldmdb r1, {r10, r11, r12, r14}
  298. add r6, r6, r10
  299. sub r10, r6, r10, lsl #1
  300. add r7, r7, r11
  301. rsb r11, r7, r11, lsl #1
  302. add r8, r8, r12
  303. sub r12, r8, r12, lsl #1
  304. add r9, r9, r14
  305. rsb r14, r9, r14, lsl #1
  306. stmdb r0!, {r6, r7, r8, r9}
  307. ldmia r2, {r6, r7}
  308. smull r5, r9, r6, r12
  309. smlal r5, r9, r7, r14
  310. rsb r12, r12, #0
  311. smull r5, r8, r6, r14
  312. smlal r5, r8, r7, r12
  313. mov r8, r8, lsl #1
  314. mov r9, r9, lsl #1
  315. stmdb r1!, {r8, r9}
  316. sub r2, r2, r3, lsl #2
  317. ldmia r2, {r6, r7}
  318. smull r5, r9, r6, r10
  319. rsb r10, r10, #0
  320. smlal r5, r9, r7, r11
  321. smull r5, r8, r6, r11
  322. smlal r5, r8, r7, r10
  323. mov r8, r8, lsl #1
  324. mov r9, r9, lsl #1
  325. stmdb r1!, {r8, r9}
  326. sub r2, r2, r3, lsl #2
  327. cmp r2, r4
  328. bhi label_4
  329. ldmia sp!, {r4-r11, pc}
  330. ENDP
  331. END