342 lines
		
	
	
		
			7.7 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			342 lines
		
	
	
		
			7.7 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
| .macro push_v_regs
 | |
|     stp             d8, d9, [sp, #-16]!
 | |
|     stp             d10, d11, [sp, #-16]!
 | |
|     stp             d12, d13, [sp, #-16]!
 | |
|     stp             d14, d15, [sp, #-16]!
 | |
|     stp             X8, X9, [sp, #-16]!
 | |
|     stp             X10, X11, [sp, #-16]!
 | |
|     stp             X12, X13, [sp, #-16]!
 | |
|     stp             X14, X15, [sp, #-16]!
 | |
|     stp             X16, X17, [sp, #-16]!
 | |
|     stp             X29, X30, [sp, #-16]!
 | |
| .endm
 | |
| .macro pop_v_regs
 | |
|     ldp             X29, X30, [sp], #16
 | |
|     ldp             X16, X17, [sp], #16
 | |
|     ldp             X14, X15, [sp], #16
 | |
|     ldp             X12, X13, [sp], #16
 | |
|     ldp             X10, X11, [sp], #16
 | |
|     ldp             X8, X9, [sp], #16
 | |
|     ldp             d14, d15, [sp], #16
 | |
|     ldp             d12, d13, [sp], #16
 | |
|     ldp             d10, d11, [sp], #16
 | |
|     ldp             d8, d9, [sp], #16
 | |
| .endm
 | |
| 
 | |
| .text
 | |
| .p2align 2
 | |
|     .global ixheaacd_sbr_qmfanal32_winadds
 | |
| 
 | |
| ixheaacd_sbr_qmfanal32_winadds:         // PROC
 | |
| 
 | |
|     // STMFD sp!, {x4-x12, x14}
 | |
|     push_v_regs
 | |
|     stp             x19, x20, [sp, #-16]!
 | |
|     //VPUSH       {D8 - D15}
 | |
|     //LDR w5,  [SP, #108]              //filterStates
 | |
|     //sxtw x5,w5
 | |
|     //LDR w6,  [SP, #112]              //timeIn
 | |
|     //sxtw x6,w6
 | |
|     //LDR w7,  [SP, #116]              //stride
 | |
|     //sxtw x7,w7
 | |
| 
 | |
|     LSL             x9, x7, #1
 | |
| 
 | |
| 
 | |
|     MOV             x20, x4
 | |
|     ADD             x5, x5, #64
 | |
|     MOV             w10, #3
 | |
| 
 | |
|     //ADD         x5, x5, #56
 | |
|     //MOV         x10, #1
 | |
|     ////SUB         x6, x6, x9
 | |
|     //CMP           x7, #1
 | |
|     //MOV         x11, #-8
 | |
|     //BGT         LOOP_SKIP_ODD
 | |
| 
 | |
| LOOP:
 | |
|     LDRSH           w4  , [x6]
 | |
|     ADD             x6, x6, x9
 | |
|     LDRSH           w8  , [x6]
 | |
|     ADD             x6, x6, x9
 | |
|     LDRSH           w11  , [x6]
 | |
|     ADD             x6, x6, x9
 | |
|     LDRSH           w12 , [x6]
 | |
|     ADD             x6, x6, x9
 | |
| 
 | |
|     STRH            w4  , [x5 , #-2]!
 | |
|     STRH            w8  , [x5 , #-2]!
 | |
|     STRH            w11  , [x5 , #-2]!
 | |
|     STRH            w12 , [x5 , #-2]!
 | |
| 
 | |
|     LDRSH           w4  , [x6]
 | |
|     ADD             x6, x6, x9
 | |
|     LDRSH           w8  , [x6]
 | |
|     ADD             x6, x6, x9
 | |
|     LDRSH           w11  , [x6]
 | |
|     ADD             x6, x6, x9
 | |
|     LDRSH           w12 , [x6]
 | |
|     ADD             x6, x6, x9
 | |
| 
 | |
|     STRH            w4  , [x5 , #-2]!
 | |
|     STRH            w8  , [x5 , #-2]!
 | |
|     STRH            w11  , [x5 , #-2]!
 | |
|     STRH            w12 , [x5 , #-2]!
 | |
|     SUBS            w10, w10, #1
 | |
| 
 | |
|     BPL             LOOP
 | |
| 
 | |
| 
 | |
| //LOOP:
 | |
| //  LD1 {v0.4h} , [x6], #8
 | |
| //  LD1 {v1.4h} , [x6], #8
 | |
| //
 | |
| //  REV64  v4.4h , v0.4h
 | |
| //  REV64  v5.4h , v1.4h
 | |
| //
 | |
| //  ST1 {v4.4h} , [x5] , x11
 | |
| //  ST1 {v5.4h} , [x5] , x11
 | |
| //
 | |
| //  LD1 {v2.4h} , [x6], #8
 | |
| //  LD1 {v3.4h} , [x6], #8
 | |
| //
 | |
| //  REV64  v6.4h , v2.4h
 | |
| //  REV64  v7.4h , v3.4h
 | |
| //
 | |
| //  ST1 {v6.4h} , [x5] , x11
 | |
| //  ST1 {v7.4h} , [x5] , x11
 | |
| //
 | |
| //    SUBS        x10, x10, #1
 | |
| //    BPL         LOOP
 | |
| //  B       SKIP_LOOP
 | |
| //
 | |
| //LOOP_SKIP_ODD:
 | |
| //  LD2 {v0.4h , v1.4h} , [x6], #16
 | |
| //  LD2 {v2.4h , v3.4h} , [x6], #16
 | |
| //
 | |
| //  REV64  v1.4h , v0.4h
 | |
| //  REV64  v3.4h , v2.4h
 | |
| //
 | |
| //  ST1 {v1.4h} , [x5], x11
 | |
| //  ST1 {v3.4h} , [x5], x11
 | |
| //
 | |
| //  LD2 {v4.4h , v5.4h} , [x6], #16
 | |
| //  LD2 {v6.4h , v7.4h} , [x6], #16
 | |
| //
 | |
| //
 | |
| //  REV64  v5.4h , v4.4h
 | |
| //  REV64  v7.4h , v6.4h
 | |
| //
 | |
| //  ST1 {v5.4h} , [x5], x11
 | |
| //  ST1 {v7.4h} , [x5], x11
 | |
| //
 | |
| //    SUBS        x10, x10, #1
 | |
| //    BPL         LOOP_SKIP_ODD
 | |
| 
 | |
| SKIP_LOOP:
 | |
| 
 | |
|     //LDR w4,  [SP, #104]              //winAdd
 | |
|     // sxtw x4,w4
 | |
| 
 | |
|     MOV             x4, x20
 | |
|     MOV             x5, #8
 | |
|     LD1             {v0.4h}, [x0], #8
 | |
|     MOV             x6, #64
 | |
| 
 | |
|     LSL             x6, x6, #1
 | |
|     LD2             {v1.4h, v2.4h}, [x2], #16
 | |
|     MOV             x7, #244
 | |
| 
 | |
|     MOV             x9, x0
 | |
|     ADD             x0, x0, #120
 | |
| 
 | |
|     MOV             x11, x4
 | |
|     LD1             {v2.4h}, [x0], x6
 | |
|     ADD             x11, x11, #128
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     MOV             x10, x2
 | |
|     ADD             x2, x2, #240
 | |
| 
 | |
|     sMULL           v30.4s, v0.4h, v1.4h
 | |
|     LD2             {v3.4h, v4.4h}, [x2], #16
 | |
|     ADD             x2, x2, #240
 | |
| 
 | |
| 
 | |
|     LD1             {v4.4h}, [x0], x6
 | |
|     sMLAL           v30.4s, v2.4h, v3.4h
 | |
| 
 | |
|     LD2             {v5.4h, v6.4h}, [x2], #16
 | |
| 
 | |
| 
 | |
|     ADD             x2, x2, #240
 | |
|     LD1             {v6.4h}, [x0], x6
 | |
|     sMLAL           v30.4s, v4.4h, v5.4h
 | |
| 
 | |
|     LD2             {v7.4h, v8.4h}, [x2], #16
 | |
| 
 | |
| 
 | |
|     ADD             x2, x2, #240
 | |
|     LD1             {v8.4h}, [x0], x6
 | |
|     sMLAL           v30.4s, v6.4h, v7.4h
 | |
| 
 | |
|     MOV             x0, x9
 | |
|     LD2             {v9.4h, v10.4h}, [x2], #16
 | |
| 
 | |
| 
 | |
|     ADD             x2, x2, #240
 | |
|     LD1             {v10.4h}, [x1], #8
 | |
|     sMLAL           v30.4s, v8.4h, v9.4h
 | |
| 
 | |
| 
 | |
| 
 | |
|     MOV             x9, x1
 | |
|     LD2             {v11.4h, v12.4h}, [x3], #16
 | |
|     ADD             x1, x1, #120
 | |
| 
 | |
| 
 | |
|     MOV             x2, x10
 | |
|     LD1             {v12.4h}, [x1], x6
 | |
|     MOV             x10, x3
 | |
| 
 | |
|     ADD             x3, x3, #240
 | |
|     LD2             {v13.4h, v14.4h}, [x3], #16
 | |
|     ADD             x3, x3, #240
 | |
| 
 | |
| 
 | |
|     LD2             {v15.4h, v16.4h}, [x3], #16
 | |
| 
 | |
|     LD1             {v14.4h}, [x1], x6
 | |
|     ADD             x3, x3, #240
 | |
| 
 | |
| 
 | |
| 
 | |
|     LD1             {v16.4h}, [x1], x6
 | |
|     SUB             x5, x5, #1
 | |
| 
 | |
|     LD2             {v17.4h, v18.4h}, [x3], #16
 | |
| 
 | |
| 
 | |
|     ADD             x3, x3, #240
 | |
|     LD1             {v18.4h}, [x1], x6
 | |
| 
 | |
|     MOV             x1, x9
 | |
|     LD2             {v19.4h, v20.4h}, [x3], #16
 | |
| 
 | |
|     ADD             x3, x3, #240
 | |
| 
 | |
|     MOV             x3, x10
 | |
| 
 | |
| 
 | |
| LOOP_1:
 | |
| 
 | |
| 
 | |
|     LD1             {v0.4h}, [x0], #8
 | |
| 
 | |
|     MOV             x9, x0
 | |
|     LD2             {v1.4h, v2.4h}, [x2], #16
 | |
|     ADD             x0, x0, #120
 | |
| 
 | |
|     MOV             x10, x2
 | |
|     ST1             { v30.4s}, [x4], #16
 | |
|     ADD             x2, x2, #240
 | |
| 
 | |
| 
 | |
|     sMULL           v30.4s, v10.4h, v11.4h
 | |
|     LD1             {v2.4h}, [x0], x6
 | |
|     sMLAL           v30.4s, v12.4h, v13.4h
 | |
| 
 | |
|     sMLAL           v30.4s, v14.4h, v15.4h
 | |
|     LD2             {v3.4h, v4.4h}, [x2], #16
 | |
|     sMLAL           v30.4s, v16.4h, v17.4h
 | |
| 
 | |
|     sMLAL           v30.4s, v18.4h, v19.4h
 | |
|     LD1             {v4.4h}, [x0], x6
 | |
|     ADD             x2, x2, #240
 | |
| 
 | |
|     ST1             { v30.4s}, [x11], #16
 | |
| 
 | |
| 
 | |
|     sMULL           v30.4s, v0.4h, v1.4h
 | |
|     LD2             {v5.4h, v6.4h}, [x2], #16
 | |
|     sMLAL           v30.4s, v2.4h, v3.4h
 | |
| 
 | |
| 
 | |
| 
 | |
|     ADD             x2, x2, #240
 | |
|     LD1             {v6.4h}, [x0], x6
 | |
|     sMLAL           v30.4s, v4.4h, v5.4h
 | |
| 
 | |
|     LD2             {v7.4h, v8.4h}, [x2], #16
 | |
| 
 | |
| 
 | |
|     ADD             x2, x2, #240
 | |
|     LD1             {v8.4h}, [x0], x6
 | |
|     sMLAL           v30.4s, v6.4h, v7.4h
 | |
| 
 | |
|     MOV             x0, x9
 | |
|     LD2             {v9.4h, v10.4h}, [x2], #16
 | |
| 
 | |
| 
 | |
| 
 | |
|     ADD             x2, x2, #240
 | |
|     LD1             {v10.4h}, [x1], #8
 | |
|     MOV             x2, x10
 | |
| 
 | |
|     MOV             x9, x1
 | |
|     LD2             {v11.4h, v12.4h}, [x3], #16
 | |
|     ADD             x1, x1, #120
 | |
| 
 | |
| 
 | |
|     sMLAL           v30.4s, v8.4h, v9.4h
 | |
|     LD1             {v12.4h}, [x1], x6
 | |
|     MOV             x10, x3
 | |
| 
 | |
| 
 | |
|     ADD             x3, x3, #240
 | |
|     LD2             {v13.4h, v14.4h}, [x3], #16
 | |
|     ADD             x3, x3, #240
 | |
| 
 | |
| 
 | |
| 
 | |
|     LD1             {v14.4h}, [x1], x6
 | |
|     LD2             {v15.4h, v16.4h}, [x3], #16
 | |
|     ADD             x3, x3, #240
 | |
| 
 | |
| 
 | |
|     LD1             {v16.4h}, [x1], x6
 | |
|     LD2             {v17.4h, v18.4h}, [x3], #16
 | |
|     ADD             x3, x3, #240
 | |
| 
 | |
| 
 | |
|     LD1             {v18.4h}, [x1], x6
 | |
|     SUBS            x5, x5, #1
 | |
| 
 | |
|     MOV             x1, x9
 | |
|     LD2             {v19.4h, v20.4h}, [x3], #16
 | |
| 
 | |
|     ADD             x3, x3, #240
 | |
| 
 | |
|     MOV             x3, x10
 | |
| 
 | |
|     BGT             LOOP_1
 | |
| 
 | |
|     ST1             { v30.4s}, [x4], #16
 | |
|     sMULL           v30.4s, v10.4h, v11.4h
 | |
|     sMLAL           v30.4s, v12.4h, v13.4h
 | |
| 
 | |
|     sMLAL           v30.4s, v14.4h, v15.4h
 | |
|     sMLAL           v30.4s, v16.4h, v17.4h
 | |
|     sMLAL           v30.4s, v18.4h, v19.4h
 | |
| 
 | |
|     ST1             { v30.4s}, [x11], #16
 | |
| 
 | |
|     //VPOP        {D8 - D15}
 | |
|     // LDMFD sp!, {x4-x12, x15}
 | |
|     ldp             x19, x20, [sp], #16
 | |
|     pop_v_regs
 | |
|     ret
 | |
|     // ENDP
 |