857 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			857 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
| @/******************************************************************************
 | |
| @ *
 | |
| @ * Copyright (C) 2018 The Android Open Source Project
 | |
| @ *
 | |
| @ * Licensed under the Apache License, Version 2.0 (the "License");
 | |
| @ * you may not use this file except in compliance with the License.
 | |
| @ * You may obtain a copy of the License at:
 | |
| @ *
 | |
| @ * http://www.apache.org/licenses/LICENSE-2.0
 | |
| @ *
 | |
| @ * Unless required by applicable law or agreed to in writing, software
 | |
| @ * distributed under the License is distributed on an "AS IS" BASIS,
 | |
| @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| @ * See the License for the specific language governing permissions and
 | |
| @ * limitations under the License.
 | |
| @ *
 | |
| @ *****************************************************************************
 | |
| @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 | |
| @*/
 | |
| 
 | |
| 
 | |
| .text
 | |
| .p2align 2
 | |
| 
 | |
|     .global ixheaacd_sbr_imdct_using_fft
 | |
|     .type ixheaacd_sbr_imdct_using_fft, %function
 | |
| ixheaacd_sbr_imdct_using_fft:
 | |
| 
 | |
|     STMFD           sp!, {r4-r12, lr}
 | |
|     VPUSH           {D8 - D15}
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     LDR             r5, [sp, #0x68]
 | |
|     LDR             r6, [sp, #0x68+4]
 | |
|     LDR             r7, [sp, #0x68+8]
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| COND_6: CMP         r1, #0x10
 | |
|     BNE             COND_7
 | |
|     MOV             r8, #1
 | |
|     MOV             r4, r7
 | |
|     B               RADIX_4_FIRST_START
 | |
| 
 | |
| COND_7: CMP         r1, #0x20
 | |
| 
 | |
|     MOV             r8, #1
 | |
|     MOV             r4, r7
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| RADIX_8_FIRST_START:
 | |
| 
 | |
| 
 | |
|     LSR             r9 , r1, #5
 | |
|     LSL             r1, r1, #1
 | |
| 
 | |
| RADIX_8_FIRST_LOOP:
 | |
| 
 | |
|     MOV             r5 , r2
 | |
|     MOV             r6 , r2
 | |
|     MOV             r7 , r2
 | |
|     MOV             r11 , r2
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     LDRB            r12, [r4, #0]
 | |
|     ADD             r5, r5, r12, LSL #3
 | |
|     VLD2.32         {d0[0], d2[0]}, [r5] , r1
 | |
|     ADD             r5, r5, r1
 | |
|     VLD2.32         {d8[0], d10[0]}, [r5] , r1
 | |
|     SUB             r5, r5, r1, LSL #1
 | |
|     VLD2.32         {d4[0], d6[0]}, [r5] , r1
 | |
|     ADD             r5, r5, r1
 | |
|     VLD2.32         {d12[0], d14[0]}, [r5], r1
 | |
|     SUB             r5, r5, r1, LSL #2
 | |
| 
 | |
|     LDRB            r12, [r4, #1]
 | |
|     ADD             r6, r6, r12, LSL #3
 | |
|     VLD2.32         {d0[1], d2[1]}, [r6] , r1
 | |
|     ADD             r6, r6, r1
 | |
|     VLD2.32         {d8[1], d10[1]}, [r6] , r1
 | |
|     SUB             r6, r6, r1, LSL #1
 | |
|     VLD2.32         {d4[1], d6[1]}, [r6] , r1
 | |
|     ADD             r6, r6, r1
 | |
|     VLD2.32         {d12[1], d14[1]}, [r6], r1
 | |
|     SUB             r6, r6, r1, LSL #2
 | |
| 
 | |
| 
 | |
|     LDRB            r12, [r4, #2]
 | |
|     ADD             r7, r7, r12 , LSL #3
 | |
|     VLD2.32         {d1[0], d3[0]}, [r7] , r1
 | |
|     ADD             r7, r7, r1
 | |
|     VLD2.32         {d9[0], d11[0]}, [r7] , r1
 | |
|     SUB             r7, r7, r1, LSL #1
 | |
| 
 | |
|     LDRB            r12, [r4, #3]
 | |
|     ADD             r11, r11, r12 , LSL #3
 | |
|     VLD2.32         {d1[1], d3[1]}, [r11] , r1
 | |
|     ADD             r11, r11, r1
 | |
|     VLD2.32         {d9[1], d11[1]}, [r11] , r1
 | |
|     SUB             r11, r11, r1, LSL #1
 | |
| 
 | |
| 
 | |
| 
 | |
|     VADD.I32        q8, q0, q4
 | |
|     VLD2.32         {d5[0], d7[0]}, [r7] , r1
 | |
|     ADD             r7, r7, r1
 | |
| 
 | |
|     VSUB.I32        q9, q0, q4
 | |
|     VLD2.32         {d13[0], d15[0]}, [r7], r1
 | |
|     SUB             r7, r7, r1, LSL #2
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     VADD.I32        q0, q1, q5
 | |
|     VLD2.32         {d5[1], d7[1]}, [r11] , r1
 | |
|     ADD             r11, r11, r1
 | |
| 
 | |
|     VSUB.I32        q4, q1, q5
 | |
|     VLD2.32         {d13[1], d15[1]}, [r11], r1
 | |
|     SUB             r11, r11, r1, LSL #2
 | |
| 
 | |
| 
 | |
| 
 | |
|     ADD             r4, r4, #4
 | |
| 
 | |
|     ADD             r5, r5, r1, LSR #1
 | |
|     ADD             r6, r6, r1, LSR #1
 | |
|     ADD             r7, r7, r1, LSR #1
 | |
|     ADD             r11, r11, r1, LSR #1
 | |
| 
 | |
| 
 | |
|     VADD.I32        q1, q2, q6
 | |
|     VLD2.32         {d28[0], d30[0]}, [r5] , r1
 | |
| 
 | |
| 
 | |
|     VSUB.I32        q5, q2, q6
 | |
|     VLD2.32         {d20[0], d22[0]}, [r5] , r1
 | |
| 
 | |
| 
 | |
|     VADD.I32        q2, q3, q7
 | |
|     VLD2.32         {d24[0], d26[0]}, [r5] , r1
 | |
| 
 | |
| 
 | |
|     VSUB.I32        q6, q3, q7
 | |
|     VLD2.32         {d28[1], d30[1]}, [r6] , r1
 | |
| 
 | |
|     VADD.S32        q3, q9, q6
 | |
|     VLD2.32         {d20[1], d22[1]}, [r6] , r1
 | |
| 
 | |
|     VSUB.S32        q7, q9, q6
 | |
|     VLD2.32         {d24[1], d26[1]}, [r6] , r1
 | |
| 
 | |
|     VSUB.S32        q6, q4, q5
 | |
|     VLD2.32         {d29[0], d31[0]}, [r7] , r1
 | |
| 
 | |
|     VADD.S32        q9, q4, q5
 | |
|     VLD2.32         {d21[0], d23[0]}, [r7] , r1
 | |
| 
 | |
|     VADD.S32        q4, q8, q1
 | |
|     VLD2.32         {d25[0], d27[0]}, [r7] , r1
 | |
| 
 | |
|     VSUB.S32        q5, q8, q1
 | |
|     VLD2.32         {d29[1], d31[1]}, [r11] , r1
 | |
| 
 | |
|     VADD.S32        q8, q0, q2
 | |
|     VLD2.32         {d21[1], d23[1]}, [r11] , r1
 | |
| 
 | |
|     VSUB.S32        q0, q0, q2
 | |
|     VLD2.32         {d25[1], d27[1]}, [r11] , r1
 | |
| 
 | |
| 
 | |
|     VPUSH           {q3}
 | |
|     VPUSH           {q7}
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     VLD2.32         {d2[0], d4[0]}, [r5], r1
 | |
| 
 | |
|     VADD.I32        q7, q14, q12
 | |
| 
 | |
|     VLD2.32         {d2[1], d4[1]}, [r6] , r1
 | |
| 
 | |
|     VSUB.I32        q3, q14, q12
 | |
| 
 | |
|     VLD2.32         {d3[0], d5[0]}, [r7] , r1
 | |
| 
 | |
|     VADD.I32        q14, q15, q13
 | |
| 
 | |
|     VLD2.32         {d3[1], d5[1]}, [r11] , r1
 | |
| 
 | |
|     VSUB.I32        q12, q15, q13
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     VADD.I32        q15, q10, q1
 | |
|     VSUB.I32        q13, q10, q1
 | |
|     VADD.I32        q10, q11, q2
 | |
|     VSUB.I32        q1, q11, q2
 | |
| 
 | |
| 
 | |
| 
 | |
|     VADD.S32        q11, q7, q15
 | |
|     VSUB.S32        q2, q7, q15
 | |
|     VADD.S32        q7, q14, q10
 | |
|     VSUB.S32        q15, q14, q10
 | |
| 
 | |
|     VADD.S32        q14, q3, q12
 | |
|     VSUB.S32        q10, q3, q12
 | |
|     VADD.S32        q3, q13, q1
 | |
|     VSUB.S32        q12, q13, q1
 | |
| 
 | |
|     VADD.S32        q1 , q14, q12
 | |
|     VSUB.S32        q13, q14, q12
 | |
|     VSUB.S32        q12, q3, q10
 | |
| 
 | |
|     VUZP.16         d2, d3
 | |
|     VADD.S32        q14, q3, q10
 | |
| 
 | |
|     VUZP.16         d26, d27
 | |
|     VADD.S32        q3, q4, q11
 | |
| 
 | |
|     VUZP.16         d24, d25
 | |
|     VSUB.S32        q10, q4, q11
 | |
| 
 | |
|     VUZP.16         d28, d29
 | |
|     VADD.S32        q4, q8, q7
 | |
| 
 | |
|     MOVW            r14, #0x5a82
 | |
| 
 | |
|     VSUB.S32        q11, q8, q7
 | |
| 
 | |
|     VADD.S32        q8, q5, q15
 | |
|     VSUB.S32        q7, q5, q15
 | |
|     VSUB.S32        q5, q0, q2
 | |
|     VADD.S32        q15, q0, q2
 | |
| 
 | |
|     VPOP            {q0}
 | |
|     VPOP            {q2}
 | |
|     VPUSH           {q3-q4}
 | |
|     VPUSH           {q10}
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     VDUP.16         d20, r14
 | |
| 
 | |
| 
 | |
|     VMULL.u16       q4, d26, d20
 | |
|     VMULL.u16       q3, d28, d20
 | |
| 
 | |
|     VPUSH           {q7-q8}
 | |
|     VPUSH           {q5}
 | |
| 
 | |
|     VSHR.S32        q4, q4, #15
 | |
|     VSHR.S32        q3, q3, #15
 | |
| 
 | |
|     VQDMLAL.S16     q4, d27, d20
 | |
|     VQDMLAL.S16     q3, d29, d20
 | |
| 
 | |
| 
 | |
|     VPUSH           {q11}
 | |
| 
 | |
|     VMULL.u16       q13, d24, d20
 | |
|     VMULL.u16       q14, d2, d20
 | |
| 
 | |
|     VADD.S32        q5, q2, q4
 | |
|     VSUB.S32        q7, q2, q4
 | |
| 
 | |
|     VADD.S32        q8, q6, q3
 | |
|     VSUB.S32        q6, q6, q3
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     VSHR.S32        q13, q13, #15
 | |
|     VSHR.S32        q14, q14, #15
 | |
| 
 | |
|     VQDMLAL.S16     q13, d25, d20
 | |
|     VQDMLAL.S16     q14, d3, d20
 | |
| 
 | |
|     VPOP            {q1}
 | |
|     VPOP            {q10}
 | |
| 
 | |
|     VADD.S32        q2, q0, q13
 | |
|     VSUB.S32        q4, q0, q13
 | |
| 
 | |
|     VADD.S32        q11, q9, q14
 | |
|     VSUB.S32        q3, q9, q14
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     VPOP            {q14}
 | |
|     VPOP            {q9}
 | |
|     VPOP            {q0}
 | |
|     VPOP            {q12, q13}
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     VTRN.32         q12, q5
 | |
| 
 | |
|     VSHL.S32        q12, q12, #1
 | |
|     VTRN.32         q9, q2
 | |
|     VSHL.S32        q5, q5, #1
 | |
| 
 | |
|     VSHL.S32        q9, q9, #1
 | |
|     VTRN.32         q0, q7
 | |
|     VSHL.S32        q2, q2, #1
 | |
| 
 | |
|     VSHL.S32        q0, q0, #1
 | |
|     VTRN.32         q14, q4
 | |
|     VSHL.S32        q7, q7, #1
 | |
| 
 | |
|     VSHL.S32        q14, q14, #1
 | |
|     VTRN.32         q13, q6
 | |
|     VSHL.S32        q4, q4, #1
 | |
| 
 | |
|     VSHL.S32        q13, q13, #1
 | |
|     VTRN.32         q10, q3
 | |
|     VSHL.S32        q6, q6, #1
 | |
| 
 | |
|     VSHL.S32        q10, q10, #1
 | |
|     VTRN.32         q1, q8
 | |
|     VSHL.S32        q3, q3, #1
 | |
| 
 | |
|     VSHL.S32        q1, q1, #1
 | |
|     VTRN.32         q15, q11
 | |
|     VSHL.S32        q8, q8, #1
 | |
| 
 | |
|     VSHL.S32        q15, q15, #1
 | |
|     VSWP            d18, d25
 | |
| 
 | |
|     VSHL.S32        q11, q11, #1
 | |
|     VSWP            d4, d11
 | |
| 
 | |
|     VSWP            d1, d28
 | |
|     VSWP            d15, d8
 | |
| 
 | |
|     VSWP            d20, d27
 | |
|     VSWP            d6, d13
 | |
| 
 | |
|     VSWP            d30, d3
 | |
|     VSWP            d22, d17
 | |
| 
 | |
|     VST2.32         {q12, q13}, [r3]!
 | |
|     VST2.32         {q0, q1}, [r3]!
 | |
| 
 | |
|     VST2.32         {q5, q6}, [r3]!
 | |
|     VST2.32         {q7, q8}, [r3]!
 | |
| 
 | |
|     VMOV            q5, q11
 | |
| 
 | |
|     VST2.32         {q9, q10}, [r3]!
 | |
|     VST2.32         {q14, q15}, [r3]!
 | |
| 
 | |
|     VST2.32         {q2, q3}, [r3]!
 | |
|     VST2.32         {q4, q5}, [r3]!
 | |
| 
 | |
| 
 | |
|     SUBS            r9, r9, #1
 | |
|     BNE             RADIX_8_FIRST_LOOP
 | |
| 
 | |
|     LSR             r1, r1, #1
 | |
|     SUB             r3, r1, LSL #3
 | |
| 
 | |
|     MOV             r5, #8
 | |
|     MOV             r4, #32
 | |
|     LSR             r6, r1, #5
 | |
| 
 | |
|     B               RADIX_4_FIRST_ENDS
 | |
| 
 | |
| RADIX_8_FIRST_ENDS:
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| RADIX_4_FIRST_START:
 | |
| 
 | |
| 
 | |
|     LSR             r9 , r1, #4
 | |
|     LSL             r1, r1, #1
 | |
| 
 | |
| RADIX_4_LOOP:
 | |
| 
 | |
|     MOV             r5 , r2
 | |
|     MOV             r6 , r2
 | |
|     MOV             r7 , r2
 | |
|     MOV             r11 , r2
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     LDRB            r12, [r4, #0]
 | |
|     ADD             r5, r5, r12, LSL #3
 | |
| 
 | |
|     VLD2.32         {d0[0], d2[0]}, [r5] , r1
 | |
|     ADD             r5, r5, r1
 | |
|     VLD2.32         {d8[0], d10[0]}, [r5] , r1
 | |
|     SUB             r5, r5, r1, LSL #1
 | |
|     VLD2.32         {d4[0], d6[0]}, [r5] , r1
 | |
|     ADD             r5, r5, r1
 | |
|     VLD2.32         {d12[0], d14[0]}, [r5], r1
 | |
| 
 | |
|     LDRB            r12, [r4, #1]
 | |
|     ADD             r6, r6, r12, LSL #3
 | |
| 
 | |
|     VLD2.32         {d0[1], d2[1]}, [r6] , r1
 | |
|     ADD             r6, r6, r1
 | |
|     VLD2.32         {d8[1], d10[1]}, [r6] , r1
 | |
|     SUB             r6, r6, r1, LSL #1
 | |
|     VLD2.32         {d4[1], d6[1]}, [r6] , r1
 | |
|     ADD             r6, r6, r1
 | |
|     VLD2.32         {d12[1], d14[1]}, [r6], r1
 | |
| 
 | |
| 
 | |
|     LDRB            r12, [r4, #2]
 | |
|     ADD             r7, r7, r12, LSL #3
 | |
| 
 | |
|     VLD2.32         {d1[0], d3[0]}, [r7] , r1
 | |
|     ADD             r7, r7, r1
 | |
|     VLD2.32         {d9[0], d11[0]}, [r7] , r1
 | |
| 
 | |
|     LDRB            r12, [r4, #3]
 | |
|     ADD             r11, r11, r12 , LSL #3
 | |
| 
 | |
|     VLD2.32         {d1[1], d3[1]}, [r11] , r1
 | |
|     ADD             r11, r11, r1
 | |
|     VLD2.32         {d9[1], d11[1]}, [r11] , r1
 | |
| 
 | |
| 
 | |
|     SUB             r7, r7, r1, LSL #1
 | |
|     VADD.S32        q8, q0, q4
 | |
|     VLD2.32         {d5[0], d7[0]}, [r7] , r1
 | |
|     ADD             r7, r7, r1
 | |
|     VADD.S32        q9, q1, q5
 | |
|     VLD2.32         {d13[0], d15[0]}, [r7], r1
 | |
| 
 | |
| 
 | |
| 
 | |
|     SUB             r11, r11, r1, LSL #1
 | |
|     VSUB.S32        q10, q0, q4
 | |
|     VLD2.32         {d5[1], d7[1]}, [r11] , r1
 | |
|     ADD             r11, r11, r1
 | |
|     VSUB.S32        q11, q1, q5
 | |
|     VLD2.32         {d13[1], d15[1]}, [r11], r1
 | |
| 
 | |
| 
 | |
|     ADD             r4, r4, #4
 | |
| 
 | |
|     VADD.S32        q12, q2, q6
 | |
|     VADD.S32        q13, q3, q7
 | |
|     VSUB.S32        q14, q2, q6
 | |
|     VSUB.S32        q15, q3, q7
 | |
| 
 | |
|     VADD.S32        q0, q8, q12
 | |
|     VADD.S32        q1, q9, q13
 | |
|     VSUB.S32        q2, q8, q12
 | |
|     VSUB.S32        q3, q9, q13
 | |
| 
 | |
|     VADD.S32        q4, q10, q15
 | |
|     VSUB.S32        q5, q11, q14
 | |
|     VADD.S32        q7, q11, q14
 | |
|     VSUB.S32        q6, q10, q15
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     VTRN.32         q0, q4
 | |
| 
 | |
|     VSHL.S32        q0, q0, #1
 | |
|     VTRN.32         q2, q6
 | |
|     VSHL.S32        q4, q4, #1
 | |
| 
 | |
|     VSHL.S32        q2, q2, #1
 | |
|     VTRN.32         q1, q5
 | |
|     VSHL.S32        q6, q6, #1
 | |
| 
 | |
|     VSHL.S32        q1, q1, #1
 | |
|     VTRN.32         q3, q7
 | |
|     VSHL.S32        q5, q5, #1
 | |
| 
 | |
|     VSHL.S32        q3, q3, #1
 | |
|     VSWP            d4, d1
 | |
| 
 | |
|     VSHL.S32        q7, q7, #1
 | |
|     VSWP            d12, d9
 | |
| 
 | |
| 
 | |
| 
 | |
|     VSWP            d6, d3
 | |
|     VSWP            d14, d11
 | |
| 
 | |
| 
 | |
|     VST2.32         {q0, q1}, [r3]!
 | |
|     VST2.32         {q4, q5}, [r3]!
 | |
| 
 | |
|     VST2.32         {q2, q3}, [r3]!
 | |
|     VST2.32         {q6, q7}, [r3]!
 | |
| 
 | |
| 
 | |
| 
 | |
|     SUBS            r9, r9, #1
 | |
|     BNE             RADIX_4_LOOP
 | |
| 
 | |
|     LSR             r1, r1, #1
 | |
|     SUB             r3, r1, LSL #3
 | |
|     MOV             r5, #4
 | |
|     MOV             r4, #64
 | |
|     LSR             r6, r1, #4
 | |
| 
 | |
| 
 | |
| RADIX_4_FIRST_ENDS:
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     PUSH            {r3}
 | |
| 
 | |
|     LSR             r5, r5, #2
 | |
| 
 | |
| OUTER_LOOP_R4:
 | |
| 
 | |
|     LDR             r14, [sp]
 | |
| 
 | |
| 
 | |
|     MOV             r7, r5
 | |
|     MOV             r2, #0
 | |
|     MOV             r9, r0
 | |
|     LSL             r12 , r5, #5
 | |
| MIDDLE_LOOP_R4:
 | |
| 
 | |
| 
 | |
|     VLD2.16         {d0[0], d1[0]}, [r9], r2
 | |
|     VLD2.16         {d2[0], d3[0]}, [r9], r2
 | |
|     ADD             r11, r2, r4, LSL #2
 | |
|     VLD2.16         {d4[0], d5[0]}, [r9]
 | |
|     ADD             r10, r0, r11
 | |
| 
 | |
| 
 | |
|     VLD2.16         {d0[1], d1[1]}, [r10], r11
 | |
|     VLD2.16         {d2[1], d3[1]}, [r10], r11
 | |
|     ADD             r2, r11, r4, LSL #2
 | |
|     VLD2.16         {d4[1], d5[1]}, [r10]
 | |
|     ADD             r9, r0, r2
 | |
| 
 | |
| 
 | |
|     VLD2.16         {d0[2], d1[2]}, [r9], r2
 | |
|     VLD2.16         {d2[2], d3[2]}, [r9], r2
 | |
|     ADD             r11, r2, r4, LSL #2
 | |
|     VLD2.16         {d4[2], d5[2]}, [r9]
 | |
|     ADD             r10, r0, r11
 | |
| 
 | |
| 
 | |
| 
 | |
|     VLD2.16         {d0[3], d1[3]}, [r10], r11
 | |
|     VLD2.16         {d2[3], d3[3]}, [r10], r11
 | |
|     ADD             r2, r11, r4, LSL #2
 | |
|     VLD2.16         {d4[3], d5[3]}, [r10]
 | |
|     ADD             r9, r0, r2
 | |
| 
 | |
|     MOV             r10, r6
 | |
| 
 | |
| 
 | |
| 
 | |
| INNER_LOOP_R4:
 | |
| 
 | |
|     VLD2.32         {q3, q4}, [r14], r12
 | |
| 
 | |
|     VSHR.S32        q3, q3, #1
 | |
|     VLD4.16         {q5, q6}, [r14], r12
 | |
|     VSHR.S32        q4, q4, #1
 | |
| 
 | |
|     VSHR.U16        d10, d10, #1
 | |
|     VLD4.16         {q7, q8}, [r14], r12
 | |
|     VSHR.U16        d12, d12, #1
 | |
| 
 | |
|     VMULL.S16       q11, d10, d0
 | |
|     VMLSL.S16       q11, d12, d1
 | |
|     VLD4.16         {q9, q10}, [r14], r12
 | |
|     VMULL.S16       q12, d10, d1
 | |
|     VMLAL.S16       q12, d12, d0
 | |
| 
 | |
|     VSHR.U16        d14, d14, #1
 | |
|     VSHR.U16        d16, d16, #1
 | |
| 
 | |
|     SUB             r14, r14, r12, LSL #2
 | |
| 
 | |
|     VSHR.U16        d18, d18, #1
 | |
|     VSHR.U16        d20, d20, #1
 | |
| 
 | |
|     VMULL.S16       q13, d14, d2
 | |
|     VMLSL.S16       q13, d16, d3
 | |
| 
 | |
|     VSHR.S32        q11, q11, #15
 | |
| 
 | |
|     VMULL.S16       q14, d14, d3
 | |
|     VMLAL.S16       q14, d16, d2
 | |
| 
 | |
|     VMULL.S16       q15, d18, d4
 | |
|     VMLSL.S16       q15, d20, d5
 | |
| 
 | |
|     VMLAL.S16       q11, d11, d0
 | |
|     VMLSL.S16       q11, d13, d1
 | |
| 
 | |
|     VSHR.S32        q12, q12, #15
 | |
|     VSHR.S32        q13, q13, #15
 | |
|     VSHR.S32        q14, q14, #15
 | |
|     VSHR.S32        q15, q15, #15
 | |
| 
 | |
| 
 | |
|     VMLAL.S16       q12, d11, d1
 | |
|     VMLAL.S16       q12, d13, d0
 | |
| 
 | |
| 
 | |
|     VMULL.S16       q5, d18, d5
 | |
|     VMLAL.S16       q5, d20, d4
 | |
| 
 | |
| 
 | |
|     VMLAL.S16       q13, d15, d2
 | |
|     VMLSL.S16       q13, d17, d3
 | |
| 
 | |
|     VMLAL.S16       q14, d15, d3
 | |
|     VMLAL.S16       q14, d17, d2
 | |
| 
 | |
| 
 | |
|     VMLAL.S16       q15, d19, d4
 | |
|     VMLSL.S16       q15, d21, d5
 | |
| 
 | |
|     VSHR.S32        q5, q5, #15
 | |
| 
 | |
|     VMLAL.S16       q5, d19, d5
 | |
|     VMLAL.S16       q5, d21, d4
 | |
| 
 | |
| 
 | |
| 
 | |
|     CMP             r7, r5
 | |
|     BNE             BYPASS_IF
 | |
| 
 | |
|     ADD             r14, r14, r12
 | |
| 
 | |
|     LDR             r3, [r14], r12
 | |
|     ASR             r3, r3, #1
 | |
|     VMOV.S32        d22[0], r3
 | |
| 
 | |
|     LDR             r3, [r14], r12
 | |
|     ASR             r3, r3, #1
 | |
|     VMOV.S32        d26[0], r3
 | |
| 
 | |
|     LDR             r3, [r14]
 | |
|     ASR             r3, r3, #1
 | |
|     VMOV.S32        d30[0], r3
 | |
| 
 | |
|     SUB             r14, r14, r12, LSL #1
 | |
|     ADD             r14, r14, #4
 | |
| 
 | |
|     LDR             r3, [r14], r12
 | |
|     ASR             r3, r3, #1
 | |
|     VMOV.S32        d24[0], r3
 | |
| 
 | |
|     LDR             r3, [r14], r12
 | |
|     ASR             r3, r3, #1
 | |
|     VMOV.S32        d28[0], r3
 | |
| 
 | |
|     LDR             r3, [r14], r12
 | |
|     ASR             r3, r3, #1
 | |
|     VMOV.S32        d10[0], r3
 | |
| 
 | |
|     SUB             r14, r14, #4
 | |
| 
 | |
|     SUB             r14, r14, r12, LSL #2
 | |
| 
 | |
| BYPASS_IF:
 | |
| 
 | |
|     VADD.S32        q6, q3, q13
 | |
|     VADD.S32        q7, q4, q14
 | |
|     VSUB.S32        q3, q3, q13
 | |
|     VSUB.S32        q4, q4, q14
 | |
|     VADD.S32        q8, q11, q15
 | |
|     VADD.S32        q9, q12, q5
 | |
| 
 | |
|     VSUB.S32        q15, q11, q15
 | |
|     VSUB.S32        q14, q12, q5
 | |
| 
 | |
| 
 | |
|     VADD.S32        q10, q6, q8
 | |
|     VADD.S32        q11, q7, q9
 | |
|     VADD.S32        q12, q3, q14
 | |
|     VSUB.S32        q13, q4, q15
 | |
| 
 | |
|     VSUB.S32        q6, q6, q8
 | |
|     VST2.32         {q10, q11}, [r14], r12
 | |
|     VSUB.S32        q7, q7, q9
 | |
| 
 | |
|     VSUB.S32        q8, q3, q14
 | |
|     VST2.32         {q12, q13}, [r14], r12
 | |
|     VADD.S32        q9, q4, q15
 | |
| 
 | |
| 
 | |
|     VST2.32         {q6, q7}, [r14], r12
 | |
|     VST2.32         {q8, q9}, [r14], r12
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     SUBS            r10, r10, #1
 | |
|     BNE             INNER_LOOP_R4
 | |
| 
 | |
|     SUB             r14, r14, r1, LSL #3
 | |
|     ADD             r14, r14, #32
 | |
| 
 | |
|     SUBS            r7, r7, #1
 | |
|     BNE             MIDDLE_LOOP_R4
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     LSR             r4, r4, #2
 | |
|     LSL             r5, r5, #2
 | |
|     LSR             r6, r6, #2
 | |
|     SUBS            r8, r8, #1
 | |
|     BNE             OUTER_LOOP_R4
 | |
| END_LOOPS:
 | |
|     POP             {r3}
 | |
|     VPOP            {D8 - D15}
 | |
|     LDMFD           sp!, {r4-r12, pc}
 | |
| 
 |