123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415 |
- cPI3_8 EQU 0x30fbc54d
- cPI2_8 EQU 0x5a82799a
- cPI1_8 EQU 0x7641af3d
- AREA |.text|, CODE, READONLY, ALIGN=2
- THUMB
- REQUIRE8
- PRESERVE8
- mdct_butterfly_8 PROC
- add r9, r5, r1 ; x4 + x0
- sub r5, r5, r1 ; x4 - x0
- add r7, r6, r2 ; x5 + x1
- sub r6, r6, r2 ; x5 - x1
- add r8, r10, r3 ; x6 + x2
- sub r10, r10, r3 ; x6 - x2
- add r12, r11, r4 ; x7 + x3
- sub r11, r11, r4 ; x7 - x3
- add r1, r10, r6 ; y0 = (x6 - x2) + (x5 - x1)
- sub r2, r11, r5 ; y1 = (x7 - x3) - (x4 - x0)
- sub r3, r10, r6 ; y2 = (x6 - x2) - (x5 - x1)
- add r4, r11, r5 ; y3 = (x7 - x3) + (x4 - x0)
- sub r5, r8, r9 ; y4 = (x6 + x2) - (x4 + x0)
- sub r6, r12, r7 ; y5 = (x7 + x3) - (x5 + x1)
- add r10, r8, r9 ; y6 = (x6 + x2) + (x4 + x0)
- add r11, r12, r7 ; y7 = (x7 + x3) + (x5 + x1)
- stmia r0, {r1, r2, r3, r4, r5, r6, r10, r11}
- bx lr
-
- ENDP
- mdct_butterfly_16 PROC
- str lr, [sp, #-4]!
- add r1, r0, #8*4
- ldmia r0, {r2, r3, r4, r5}
- ldmia r1, {r6, r7, r8, r9}
- add r6, r6, r2 ; y8 = x8 + x0
- rsb r2, r6, r2, lsl #1 ; x0 - x8
- add r7, r7, r3 ; y9 = x9 + x1
- rsb r3, r7, r3, lsl #1 ; x1 - x9
- add r8, r8, r4 ; y10 = x10 + x2
- sub r11, r8, r4, lsl #1 ; x10 - x2
- add r9, r9, r5 ; y11 = x11 + x3
- rsb r10, r9, r5, lsl #1 ; x3 - x11
- stmia r1!, {r6, r7, r8, r9}
- add r2, r2, r3 ; (x0 - x8) + (x1 - x9)
- rsb r3, r2, r3, lsl #1 ; (x1 - x9) - (x0 - x8)
- ldr r12, =cPI2_8
- smull r8, r5, r12, r2
- smull r8, r6, r12, r3
- mov r5, r5, lsl #1
- mov r6, r6, lsl #1
- stmia r0!, {r5, r6, r10, r11}
- ldmia r0, {r2, r3, r4, r5}
- ldmia r1, {r6, r7, r8, r9}
- add r6, r6, r2 ; y12 = x12 + x4
- sub r2, r6, r2, lsl #1 ; x12 - x4
- add r7, r7, r3 ; y13 = x13 + x5
- sub r3, r7, r3, lsl #1 ; x13 - x5
- add r8, r8, r4 ; y10 = x14 + x6
- sub r10, r8, r4, lsl #1 ; x14 - x6
- add r9, r9, r5 ; y11 = x15 + x7
- sub r11, r9, r5, lsl #1 ; x15 - x7
- stmia r1, {r6, r7, r8, r9}
- sub r2, r2, r3 ; (x12 - x4) - (x13 - x5)
- add r3, r2, r3, lsl #1 ; (x12 - x4) + (x13 - x5)
- smull r8, r5, r12, r2
- smull r8, r6, r12, r3
- mov r5, r5, lsl #1
- mov r6, r6, lsl #1
- ; no stmia here, r5, r6, r10, r11 are passed to mdct_butterfly_8
- sub r0, r0, #4*4
- ldmia r0, {r1, r2, r3, r4}
- bl mdct_butterfly_8
- add r0, r0, #8*4
- ldmia r0, {r1, r2, r3, r4, r5, r6, r10, r11}
- bl mdct_butterfly_8
- ldr pc, [sp], #4
- ENDP
- mdct_butterfly_32 PROC
- EXPORT mdct_butterfly_32
- stmdb sp!, {r4-r11, lr}
- add r1, r0, #16*4
- ldmia r0, {r2, r3, r4, r5}
- ldmia r1, {r6, r7, r8, r9}
- add r6, r6, r2 ; y16 = x16 + x0
- rsb r2, r6, r2, lsl #1 ; x0 - x16
- add r7, r7, r3 ; y17 = x17 + x1
- rsb r3, r7, r3, lsl #1 ; x1 - x17
- add r8, r8, r4 ; y18 = x18 + x2
- rsb r4, r8, r4, lsl #1 ; x2 - x18
- add r9, r9, r5 ; y19 = x19 + x3
- rsb r5, r9, r5, lsl #1 ; x3 - x19
- stmia r1!, {r6, r7, r8, r9}
- ldr r12, =cPI1_8
- ldr lr, =cPI3_8
- smull r10, r6, r12, r2
- rsb r2, r2, #0
- smlal r10, r6, lr, r3
- smull r10, r7, r12, r3
- smlal r10, r7, lr, r2
- mov r6, r6, lsl #1
- mov r7, r7, lsl #1
- add r4, r4, r5 ; (x3 - x19) + (x2 - x18)
- rsb r5, r4, r5, lsl #1 ; (x3 - x19) - (x2 - x18)
- ldr r11, =cPI2_8
- smull r10, r8, r4, r11
- smull r10, r9, r5, r11
- mov r8, r8, lsl #1
- mov r9, r9, lsl #1
- stmia r0!, {r6, r7, r8, r9}
- ldmia r0, {r2, r3, r4, r5}
- ldmia r1, {r6, r7, r8, r9}
- add r6, r6, r2 ; y20 = x20 + x4
- rsb r2, r6, r2, lsl #1 ; x4 - x20
- add r7, r7, r3 ; y21 = x21 + x5
- rsb r3, r7, r3, lsl #1 ; x5 - x21
- add r8, r8, r4 ; y22 = x22 + x6
- sub r4, r8, r4, lsl #1 ; x22 - x6
- add r9, r9, r5 ; y23 = x23 + x7
- rsb r5, r9, r5, lsl #1 ; x7 - x23
- stmia r1!, {r6, r7, r8, r9}
- smull r10, r6, lr, r2
- rsb r2, r2, #0
- smlal r10, r6, r12, r3
- smull r10, r7, lr, r3
- smlal r10, r7, r12, r2
- mov r6, r6, lsl #1
- mov r7, r7, lsl #1
- mov r8, r5
- mov r9, r4
- stmia r0!, {r6, r7, r8, r9}
- ldmia r0, {r2, r3, r4, r5}
- ldmia r1, {r6, r7, r8, r9}
- add r6, r6, r2 ; y24 = x24 + x8
- sub r2, r6, r2, lsl #1 ; x24 - x8
- add r7, r7, r3 ; y25 = x25 + x9
- sub r3, r7, r3, lsl #1 ; x25 - x9
- add r8, r8, r4 ; y26 = x26 + x10
- sub r4, r8, r4, lsl #1 ; x26 - x10
- add r9, r9, r5 ; y27 = x27 + x11
- sub r5, r9, r5, lsl #1 ; x27 - x11
- stmia r1!, {r6, r7, r8, r9}
- smull r10, r7, lr, r3
- rsb r3, r3, #0
- smlal r10, r7, r12, r2
- smull r10, r6, r12, r3
- smlal r10, r6, lr, r2
- mov r6, r6, lsl #1
- mov r7, r7, lsl #1
- sub r4, r4, r5 ; (x26 - x10) - (x27 - x11)
- add r5, r4, r5, lsl #1 ; (x26 - x10) + (x27 - x11)
- ldr r11, =cPI2_8
- smull r10, r8, r11, r4
- smull r10, r9, r11, r5
- mov r8, r8, lsl #1
- mov r9, r9, lsl #1
- stmia r0!, {r6, r7, r8, r9}
- ldmia r0, {r2, r3, r4, r5}
- ldmia r1, {r6, r7, r8, r9}
- add r6, r6, r2 ; y28 = x28 + x12
- sub r2, r6, r2, lsl #1 ; x28 - x12
- add r7, r7, r3 ; y29 = x29 + x13
- sub r3, r7, r3, lsl #1 ; x29 - x13
- add r8, r8, r4 ; y30 = x30 + x14
- sub r4, r8, r4, lsl #1 ; x30 - x14
- add r9, r9, r5 ; y31 = x31 + x15
- sub r5, r9, r5, lsl #1 ; x31 - x15
- stmia r1, {r6, r7, r8, r9}
- smull r10, r7, r12, r3
- rsb r3, r3, #0
- smlal r10, r7, lr, r2
- smull r10, r6, lr, r3
- smlal r10, r6, r12, r2
- mov r6, r6, lsl #1
- mov r7, r7, lsl #1
- mov r8, r4
- mov r9, r5
- stmia r0, {r6, r7, r8, r9}
- sub r0, r0, #12*4
- str r0, [sp, #-4]!
- bl mdct_butterfly_16
- ldr r0, [sp], #4
- add r0, r0, #16*4
- bl mdct_butterfly_16
- ldmia sp!, {r4-r11, pc}
- ENDP
- ; mdct_butterfly_generic_loop(x1, x2, T0, step, Ttop)
- mdct_butterfly_generic_loop PROC
- EXPORT mdct_butterfly_generic_loop
- stmdb sp!, {r4-r11, lr}
- str r2, [sp, #-4]
- ldr r4, [sp, #36]
- label_1
- ldmdb r0, {r6, r7, r8, r9}
- ldmdb r1, {r10, r11, r12, r14}
- add r6, r6, r10
- sub r10, r6, r10, lsl #1
- add r7, r7, r11
- rsb r11, r7, r11, lsl #1
- add r8, r8, r12
- sub r12, r8, r12, lsl #1
- add r9, r9, r14
- rsb r14, r9, r14, lsl #1
- stmdb r0!, {r6, r7, r8, r9}
- ldmia r2, {r6, r7}
- smull r5, r8, r6, r14
- rsb r14, r14, #0
- smlal r5, r8, r7, r12
- smull r5, r9, r6, r12
- smlal r5, r9, r7, r14
- mov r8, r8, lsl #1
- mov r9, r9, lsl #1
- stmdb r1!, {r8, r9}
- add r2, r2, r3, lsl #2
- ldmia r2, {r6, r7}
- smull r5, r8, r6, r11
- rsb r11, r11, #0
- smlal r5, r8, r7, r10
- smull r5, r9, r6, r10
- smlal r5, r9, r7, r11
- mov r8, r8, lsl #1
- mov r9, r9, lsl #1
- stmdb r1!, {r8, r9}
- add r2, r2, r3, lsl #2
- cmp r2, r4
- blo label_1
- ldr r4, [sp, #-4]
- label_2
- ldmdb r0, {r6, r7, r8, r9}
- ldmdb r1, {r10, r11, r12, r14}
- add r6, r6, r10
- sub r10, r6, r10, lsl #1
- add r7, r7, r11
- sub r11, r7, r11, lsl #1
- add r8, r8, r12
- sub r12, r8, r12, lsl #1
- add r9, r9, r14
- sub r14, r9, r14, lsl #1
- stmdb r0!, {r6, r7, r8, r9}
- ldmia r2, {r6, r7}
- smull r5, r9, r6, r14
- rsb r14, r14, #0
- smlal r5, r9, r7, r12
- smull r5, r8, r6, r12
- smlal r5, r8, r7, r14
- mov r8, r8, lsl #1
- mov r9, r9, lsl #1
- stmdb r1!, {r8, r9}
- sub r2, r2, r3, lsl #2
- ldmia r2, {r6, r7}
- smull r5, r9, r6, r11
- rsb r11, r11, #0
- smlal r5, r9, r7, r10
- smull r5, r8, r6, r10
- smlal r5, r8, r7, r11
- mov r8, r8, lsl #1
- mov r9, r9, lsl #1
- stmdb r1!, {r8, r9}
- sub r2, r2, r3, lsl #2
- cmp r2, r4
- bhi label_2
- ldr r4, [sp, #36]
- label_3
- ldmdb r0, {r6, r7, r8, r9}
- ldmdb r1, {r10, r11, r12, r14}
- add r6, r6, r10
- rsb r10, r6, r10, lsl #1
- add r7, r7, r11
- rsb r11, r7, r11, lsl #1
- add r8, r8, r12
- rsb r12, r8, r12, lsl #1
- add r9, r9, r14
- rsb r14, r9, r14, lsl #1
- stmdb r0!, {r6, r7, r8, r9}
- ldmia r2, {r6, r7}
- smull r5, r8, r6, r12
- rsb r12, r12, #0
- smlal r5, r8, r7, r14
- smull r5, r9, r6, r14
- smlal r5, r9, r7, r12
- mov r8, r8, lsl #1
- mov r9, r9, lsl #1
- stmdb r1!, {r8, r9}
- add r2, r2, r3, lsl #2
- ldmia r2, {r6, r7}
- smull r5, r8, r6, r10
- rsb r10, r10, #0
- smlal r5, r8, r7, r11
- smull r5, r9, r6, r11
- smlal r5, r9, r7, r10
- mov r8, r8, lsl #1
- mov r9, r9, lsl #1
- stmdb r1!, {r8, r9}
- add r2, r2, r3, lsl #2
- cmp r2, r4
- blo label_3
- ldr r4, [sp, #-4]
- label_4
- ldmdb r0, {r6, r7, r8, r9}
- ldmdb r1, {r10, r11, r12, r14}
- add r6, r6, r10
- sub r10, r6, r10, lsl #1
- add r7, r7, r11
- rsb r11, r7, r11, lsl #1
- add r8, r8, r12
- sub r12, r8, r12, lsl #1
- add r9, r9, r14
- rsb r14, r9, r14, lsl #1
- stmdb r0!, {r6, r7, r8, r9}
- ldmia r2, {r6, r7}
- smull r5, r9, r6, r12
- smlal r5, r9, r7, r14
- rsb r12, r12, #0
- smull r5, r8, r6, r14
- smlal r5, r8, r7, r12
- mov r8, r8, lsl #1
- mov r9, r9, lsl #1
- stmdb r1!, {r8, r9}
- sub r2, r2, r3, lsl #2
- ldmia r2, {r6, r7}
- smull r5, r9, r6, r10
- rsb r10, r10, #0
- smlal r5, r9, r7, r11
- smull r5, r8, r6, r11
- smlal r5, r8, r7, r10
- mov r8, r8, lsl #1
- mov r9, r9, lsl #1
- stmdb r1!, {r8, r9}
- sub r2, r2, r3, lsl #2
- cmp r2, r4
- bhi label_4
- ldmia sp!, {r4-r11, pc}
- ENDP
- END
|