404 lines
		
	
	
		
			9.9 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			404 lines
		
	
	
		
			9.9 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
| ///******************************************************************************
 | |
| // *
 | |
| // * Copyright (C) 2018 The Android Open Source Project
 | |
| // *
 | |
| // * Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // * you may not use this file except in compliance with the License.
 | |
| // * You may obtain a copy of the License at:
 | |
| // *
 | |
| // * http://www.apache.org/licenses/LICENSE-2.0
 | |
| // *
 | |
| // * Unless required by applicable law or agreed to in writing, software
 | |
| // * distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // * See the License for the specific language governing permissions and
 | |
| // * limitations under the License.
 | |
| // *
 | |
| // *****************************************************************************
 | |
| // * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 | |
| //*/
 | |
| 
 | |
| 
 | |
| .macro push_v_regs
 | |
|     stp             q8, q9, [sp, #-32]!
 | |
|     stp             q10, q11, [sp, #-32]!
 | |
|     stp             q12, q13, [sp, #-32]!
 | |
|     stp             q14, q15, [sp, #-32]!
 | |
|     stp             x21, x22, [sp, #-16]!
 | |
|     stp             x23, x24, [sp, #-16]!
 | |
| .endm
 | |
| .macro pop_v_regs
 | |
|     ldp             x23, x24, [sp], #16
 | |
|     ldp             x21, x22, [sp], #16
 | |
|     ldp             q14, q15, [sp], #32
 | |
|     ldp             q12, q13, [sp], #32
 | |
|     ldp             q10, q11, [sp], #32
 | |
|     ldp             q8, q9, [sp], #32
 | |
| .endm
 | |
| 
 | |
| .macro swp reg1, reg2
 | |
|     MOV             X16, \reg1
 | |
|     MOV             \reg1, \reg2
 | |
|     MOV             \reg2, x16
 | |
| .endm
 | |
| .text
 | |
| .global ixheaacd_sbr_qmfsyn64_winadd
 | |
| 
 | |
| ixheaacd_sbr_qmfsyn64_winadd:
 | |
| 
 | |
|     push_v_regs
 | |
| 
 | |
| 
 | |
| 
 | |
|     MOV             w7, #0x8000
 | |
|     LD1             {v0.4h}, [x0], #8
 | |
|     MOV             x12, x2
 | |
| 
 | |
|     dup             v30.4s, w7
 | |
|     LD1             {v1.4h}, [x2], #8
 | |
|     dup             v22.4s, w4
 | |
| 
 | |
|     MOV             x10, x0
 | |
|     MOV             x11, x2
 | |
|     ADD             x0, x0, #504
 | |
|     ADD             x2, x2, #248
 | |
| 
 | |
|     NEG             v28.4s, v22.4s
 | |
|     sshL            v20.4s, v30.4s, v28.4s
 | |
|     MOV             x6, #64
 | |
|     LSL             x6, x6, #1
 | |
|     ADD             x12, x12, x6
 | |
|     MOV             x7, #128
 | |
|     LSL             x9, x7, #1
 | |
|     ADD             x1, x1, x9
 | |
|     MOV             x6, #16
 | |
|     MOV             x7, #128
 | |
|     LSL             x9, x7, #1
 | |
|     MOV             x7, #256
 | |
|     LSL             x8, x7, #1
 | |
| 
 | |
|     LSL             x5, x5, #1
 | |
|     LD1             {v2.4h}, [x0], x8
 | |
|     mov             v26.16b, v20.16b
 | |
| 
 | |
| 
 | |
|     sMLAL           v26.4s, v0.4h, v1.4h
 | |
|     LD1             {v3.4h}, [x2], x9
 | |
| 
 | |
|     LD1             {v4.4h}, [x0], x8
 | |
|     sMLAL           v26.4s, v2.4h, v3.4h
 | |
| 
 | |
|     LD1             {v5.4h}, [x2], x9
 | |
| 
 | |
|     LD1             {v6.4h}, [x0], x8
 | |
|     sMLAL           v26.4s, v5.4h, v4.4h
 | |
| 
 | |
|     LD1             {v7.4h}, [x2], x9
 | |
| 
 | |
|     LD1             {v8.4h}, [x0], x8
 | |
|     sMLAL           v26.4s, v7.4h, v6.4h
 | |
| 
 | |
|     LD1             {v9.4h}, [x2], x9
 | |
|     MOV             x0, x10
 | |
| 
 | |
| 
 | |
|     MOV             x2, x11
 | |
|     LD1             {v10.4h}, [x1], #8
 | |
|     sMLAL           v26.4s, v9.4h, v8.4h
 | |
| 
 | |
|     MOV             x10, x1
 | |
|     LD1             {v11.4h}, [x12], #8
 | |
|     ADD             x1, x1, #504
 | |
| 
 | |
| 
 | |
| 
 | |
|     MOV             x11, x12
 | |
|     LD1             {v12.4h}, [x1], x8
 | |
|     ADD             x12, x12, #248
 | |
| 
 | |
|     sMLAL           v26.4s, v10.4h, v11.4h
 | |
|     LD1             {v13.4h}, [x12], x9
 | |
| 
 | |
|     LD1             {v14.4h}, [x1], x8
 | |
|     sMLAL           v26.4s, v12.4h, v13.4h
 | |
| 
 | |
|     LD1             {v15.4h}, [x12], x9
 | |
| 
 | |
|     LD1             {v16.4h}, [x1], x8
 | |
|     sMLAL           v26.4s, v15.4h, v14.4h
 | |
| 
 | |
|     LD1             {v17.4h}, [x12], x9
 | |
| 
 | |
|     LD1             {v18.4h}, [x1], x8
 | |
|     sMLAL           v26.4s, v17.4h, v16.4h
 | |
| 
 | |
|     LD1             {v19.4h}, [x12], x9
 | |
| 
 | |
|     sMLAL           v26.4s, v19.4h, v18.4h
 | |
|     LD1             {v0.4h}, [x0], #8
 | |
|     MOV             x12, x11
 | |
| 
 | |
|     MOV             x1, x10
 | |
|     LD1             {v1.4h}, [x2], #8
 | |
|     MOV             x10, x0
 | |
| 
 | |
|     sQshL           v26.4s, v26.4s, v22.4s
 | |
| 
 | |
|     ADD             x0, x0, #504
 | |
| 
 | |
|     MOV             x11, x2
 | |
|     LD1             {v2.4h}, [x0], x8
 | |
|     ADD             x2, x2, #248
 | |
| 
 | |
|     sshR            v28.4s, v26.4s, #16
 | |
|     LD1             {v3.4h}, [x2], x9
 | |
| 
 | |
| 
 | |
|     UZP2            v29.8h, v28.8h, v28.8h
 | |
|     UZP1            v28.8h, v28.8h, v28.8h
 | |
|     mov             v26.16b, v20.16b
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     LD1             {v4.4h}, [x0], x8
 | |
|     LD1             {v5.4h}, [x2], x9
 | |
| 
 | |
|     LD1             {v6.4h}, [x0], x8
 | |
|     LD1             {v7.4h}, [x2], x9
 | |
| 
 | |
|     LD1             {v8.4h}, [x0], x8
 | |
|     LD1             {v9.4h}, [x2], x9
 | |
|     MOV             x0, x10
 | |
| 
 | |
| 
 | |
|     MOV             x2, x11
 | |
|     LD1             {v10.4h}, [x1], #8
 | |
| 
 | |
|     MOV             x10, x1
 | |
|     LD1             {v11.4h}, [x12], #8
 | |
|     ADD             x1, x1, #504
 | |
| 
 | |
| 
 | |
|     MOV             x11, x12
 | |
|     LD1             {v12.4h}, [x1], x8
 | |
|     ADD             x12, x12, #248
 | |
| 
 | |
| 
 | |
|     LD1             {v13.4h}, [x12], x9
 | |
| 
 | |
|     LD1             {v14.4h}, [x1], x8
 | |
|     LD1             {v15.4h}, [x12], x9
 | |
| 
 | |
|     LD1             {v16.4h}, [x1], x8
 | |
|     LD1             {v17.4h}, [x12], x9
 | |
| 
 | |
|     LD1             {v18.4h}, [x1], x8
 | |
|     SUB             x6, x6, #2
 | |
|     LD1             {v19.4h}, [x12], x9
 | |
|     MOV             x1, x10
 | |
| 
 | |
|     MOV             x12, x11
 | |
| 
 | |
| LOOP_1:
 | |
| 
 | |
|     sMLAL           v26.4s, v0.4h, v1.4h
 | |
|     ST1             {v28.h}[0], [x3], x5
 | |
| 
 | |
|     sMLAL           v26.4s, v2.4h, v3.4h
 | |
|     LD1             {v0.4h}, [x0], #8
 | |
|     sMLAL           v26.4s, v5.4h, v4.4h
 | |
| 
 | |
|     sMLAL           v26.4s, v7.4h, v6.4h
 | |
|     ST1             {v28.h}[1], [x3], x5
 | |
| 
 | |
| 
 | |
|     MOV             x10, x0
 | |
|     LD1             {v1.4h}, [x2], #8
 | |
|     ADD             x0, x0, #504
 | |
| 
 | |
|     sMLAL           v26.4s, v9.4h, v8.4h
 | |
|     ST1             {v28.h}[2], [x3], x5
 | |
| 
 | |
|     sMLAL           v26.4s, v10.4h, v11.4h
 | |
|     ST1             {v28.h}[3], [x3], x5
 | |
| 
 | |
|     MOV             x11, x2
 | |
|     LD1             {v2.4h}, [x0], x8
 | |
|     ADD             x2, x2, #248
 | |
| 
 | |
|     sMLAL           v26.4s, v12.4h, v13.4h
 | |
|     LD1             {v3.4h}, [x2], x9
 | |
|     sMLAL           v26.4s, v15.4h, v14.4h
 | |
| 
 | |
|     sMLAL           v26.4s, v17.4h, v16.4h
 | |
|     LD1             {v4.4h}, [x0], x8
 | |
|     sMLAL           v26.4s, v19.4h, v18.4h
 | |
| 
 | |
|     LD1             {v5.4h}, [x2], x9
 | |
| 
 | |
|     LD1             {v6.4h}, [x0], x8
 | |
|     sQshL           v26.4s, v26.4s, v22.4s
 | |
| 
 | |
|     sshR            v28.4s, v26.4s, #16
 | |
|     LD1             {v7.4h}, [x2], x9
 | |
|     mov             v26.16b, v20.16b
 | |
| 
 | |
| 
 | |
|     UZP2            v29.8h, v28.8h, v28.8h
 | |
|     UZP1            v28.8h, v28.8h, v28.8h
 | |
|     sMLAL           v26.4s, v0.4h, v1.4h
 | |
| 
 | |
|     sMLAL           v26.4s, v2.4h, v3.4h
 | |
|     LD1             {v8.4h}, [x0], x8
 | |
|     sMLAL           v26.4s, v5.4h, v4.4h
 | |
| 
 | |
|     sMLAL           v26.4s, v7.4h, v6.4h
 | |
|     LD1             {v9.4h}, [x2], x9
 | |
| 
 | |
| 
 | |
|     LD1             {v10.4h}, [x1], #8
 | |
|     sMLAL           v26.4s, v9.4h, v8.4h
 | |
| 
 | |
|     MOV             x2, x11
 | |
|     LD1             {v11.4h}, [x12], #8
 | |
|     MOV             x0, x10
 | |
| 
 | |
|     MOV             x10, x1
 | |
| 
 | |
|     ADD             x1, x1, #504
 | |
| 
 | |
|     MOV             x11, x12
 | |
|     LD1             {v12.4h}, [x1], x8
 | |
|     ADD             x12, x12, #248
 | |
| 
 | |
|     LD1             {v13.4h}, [x12], x9
 | |
|     sMLAL           v26.4s, v10.4h, v11.4h
 | |
| 
 | |
|     LD1             {v14.4h}, [x1], x8
 | |
|     sMLAL           v26.4s, v12.4h, v13.4h
 | |
| 
 | |
|     LD1             {v15.4h}, [x12], x9
 | |
| 
 | |
|     LD1             {v16.4h}, [x1], x8
 | |
|     sMLAL           v26.4s, v15.4h, v14.4h
 | |
| 
 | |
|     LD1             {v17.4h}, [x12], x9
 | |
| 
 | |
|     LD1             {v18.4h}, [x1], x8
 | |
|     sMLAL           v26.4s, v17.4h, v16.4h
 | |
| 
 | |
|     LD1             {v19.4h}, [x12], x9
 | |
|     MOV             x1, x10
 | |
| 
 | |
|     sMLAL           v26.4s, v19.4h, v18.4h
 | |
|     ST1             {v28.h}[0], [x3], x5
 | |
| 
 | |
|     MOV             x12, x11
 | |
|     LD1             {v0.4h}, [x0], #8
 | |
| 
 | |
|     LD1             {v1.4h}, [x2], #8
 | |
|     sQshL           v26.4s, v26.4s, v22.4s
 | |
| 
 | |
| 
 | |
|     ST1             {v28.h}[1], [x3], x5
 | |
|     MOV             x10, x0
 | |
| 
 | |
|     ST1             {v28.h}[2], [x3], x5
 | |
|     ADD             x0, x0, #504
 | |
| 
 | |
|     ST1             {v28.h}[3], [x3], x5
 | |
|     MOV             x11, x2
 | |
| 
 | |
|     sshR            v28.4s, v26.4s, #16
 | |
|     LD1             {v2.4h}, [x0], x8
 | |
|     ADD             x2, x2, #248
 | |
| 
 | |
|     LD1             {v3.4h}, [x2], x9
 | |
|     LD1             {v4.4h}, [x0], x8
 | |
|     LD1             {v5.4h}, [x2], x9
 | |
|     LD1             {v6.4h}, [x0], x8
 | |
|     LD1             {v7.4h}, [x2], x9
 | |
|     LD1             {v8.4h}, [x0], x8
 | |
|     LD1             {v9.4h}, [x2], x9
 | |
| 
 | |
|     UZP2            v29.8h, v28.8h, v28.8h
 | |
|     UZP1            v28.8h, v28.8h, v28.8h
 | |
|     mov             v26.16b, v20.16b
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|     MOV             x0, x10
 | |
|     LD1             {v10.4h}, [x1], #8
 | |
|     MOV             x2, x11
 | |
| 
 | |
|     MOV             x10, x1
 | |
|     LD1             {v11.4h}, [x12], #8
 | |
|     ADD             x1, x1, #504
 | |
| 
 | |
| 
 | |
|     MOV             x11, x12
 | |
|     LD1             {v12.4h}, [x1], x8
 | |
|     ADD             x12, x12, #248
 | |
| 
 | |
| 
 | |
|     LD1             {v13.4h}, [x12], x9
 | |
| 
 | |
|     LD1             {v14.4h}, [x1], x8
 | |
|     LD1             {v15.4h}, [x12], x9
 | |
| 
 | |
|     LD1             {v16.4h}, [x1], x8
 | |
|     LD1             {v17.4h}, [x12], x9
 | |
| 
 | |
|     SUBS            x6, x6, #2
 | |
|     LD1             {v18.4h}, [x1], x8
 | |
| 
 | |
|     MOV             x1, x10
 | |
|     LD1             {v19.4h}, [x12], x9
 | |
| 
 | |
|     MOV             x12, x11
 | |
| 
 | |
| 
 | |
|     BGT             LOOP_1
 | |
| 
 | |
|     sMLAL           v26.4s, v0.4h, v1.4h
 | |
|     ST1             {v28.h}[0], [x3], x5
 | |
|     sMLAL           v26.4s, v2.4h, v3.4h
 | |
| 
 | |
|     sMLAL           v26.4s, v5.4h, v4.4h
 | |
|     ST1             {v28.h}[1], [x3], x5
 | |
|     sMLAL           v26.4s, v7.4h, v6.4h
 | |
| 
 | |
|     sMLAL           v26.4s, v9.4h, v8.4h
 | |
|     ST1             {v28.h}[2], [x3], x5
 | |
|     sMLAL           v26.4s, v10.4h, v11.4h
 | |
| 
 | |
|     sMLAL           v26.4s, v12.4h, v13.4h
 | |
|     ST1             {v28.h}[3], [x3], x5
 | |
|     sMLAL           v26.4s, v15.4h, v14.4h
 | |
| 
 | |
| 
 | |
| 
 | |
|     sMLAL           v26.4s, v17.4h, v16.4h
 | |
| 
 | |
|     sMLAL           v26.4s, v19.4h, v18.4h
 | |
| 
 | |
|     sQshL           v26.4s, v26.4s, v22.4s
 | |
| 
 | |
|     sshR            v28.4s, v26.4s, #16
 | |
| 
 | |
|     UZP2            v29.8h, v28.8h, v28.8h
 | |
|     UZP1            v28.8h, v28.8h, v28.8h
 | |
| 
 | |
| 
 | |
|     ST1             {v28.h}[0], [x3], x5
 | |
|     ST1             {v28.h}[1], [x3], x5
 | |
|     ST1             {v28.h}[2], [x3], x5
 | |
|     ST1             {v28.h}[3], [x3], x5
 | |
| 
 | |
| 
 | |
|     pop_v_regs
 | |
|     ret
 | |
| 
 |