124 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			124 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
| ///******************************************************************************
 | |
| // *
 | |
| // * Copyright (C) 2018 The Android Open Source Project
 | |
| // *
 | |
| // * Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // * you may not use this file except in compliance with the License.
 | |
| // * You may obtain a copy of the License at:
 | |
| // *
 | |
| // * http://www.apache.org/licenses/LICENSE-2.0
 | |
| // *
 | |
| // * Unless required by applicable law or agreed to in writing, software
 | |
| // * distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // * See the License for the specific language governing permissions and
 | |
| // * limitations under the License.
 | |
| // *
 | |
| // *****************************************************************************
 | |
| // * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 | |
| //*/
 | |
| 
 | |
| 
 | |
| .macro push_v_regs
 | |
|     stp             q8, q9, [sp, #-32]!
 | |
|     stp             q10, q11, [sp, #-32]!
 | |
|     stp             q12, q13, [sp, #-32]!
 | |
|     stp             q14, q15, [sp, #-32]!
 | |
|     stp             X8, X9, [sp, #-16]!
 | |
|     stp             X10, X11, [sp, #-16]!
 | |
|     stp             X12, X13, [sp, #-16]!
 | |
|     stp             X14, X15, [sp, #-16]!
 | |
|     stp             X16, X17, [sp, #-16]!
 | |
|     stp             X29, X30, [sp, #-16]!
 | |
| .endm
 | |
| .macro pop_v_regs
 | |
|     ldp             X29, X30, [sp], #16
 | |
|     ldp             X16, X17, [sp], #16
 | |
|     ldp             X14, X15, [sp], #16
 | |
|     ldp             X12, X13, [sp], #16
 | |
|     ldp             X10, X11, [sp], #16
 | |
|     ldp             X8, X9, [sp], #16
 | |
|     ldp             q14, q15, [sp], #32
 | |
|     ldp             q12, q13, [sp], #32
 | |
|     ldp             q10, q11, [sp], #32
 | |
|     ldp             q8, q9, [sp], #32
 | |
| .endm
 | |
| .text
 | |
| .global ixheaacd_neg_shift_spec_armv8
 | |
| ixheaacd_neg_shift_spec_armv8:
 | |
|     push_v_regs
 | |
|     MOV             X5, #448
 | |
|     SUB             X6, X5, #1
 | |
|     LSL             X6, X6, #2
 | |
|     ADD             X6, X6, X0
 | |
|     MOV             X8, #-16
 | |
|     SUB             X6, X6, #12
 | |
|     LSL             X7, X3, #1
 | |
|     DUP             V31.4S, W2
 | |
|     MOV             W4, #0x8000
 | |
|     DUP             V30.4S, W4
 | |
| 
 | |
|     LD1             {V0.4S}, [X6], X8
 | |
|     SQNEG           V0.4S, V0.4S
 | |
| 
 | |
|     LD1             {V6.4S}, [X6], X8
 | |
|     SQSHL           V25.4S, V0.4S, V31.4S
 | |
|     SQADD           V24.4S, V25.4S, V30.4S
 | |
|     SSHR            V23.4S, V24.4S, #16
 | |
|     REV64           V23.4S, V23.4S
 | |
|     SUB             X5, X5, #8
 | |
| 
 | |
|     UZP1            V27.8H, V23.8H, V23.8H
 | |
|     SQNEG           V29.4S, V6.4S
 | |
| 
 | |
| LOOP_1:
 | |
| 
 | |
|     ST1             {V27.H}[2], [X1], X7
 | |
|     SQSHL           V22.4S, V29.4S, V31.4S
 | |
|     LD1             {V0.4S}, [X6], X8
 | |
|     ST1             {V27.H}[3], [X1], X7
 | |
|     SQADD           V21.4S, V22.4S, V30.4S
 | |
|     ST1             {V27.H}[0], [X1], X7
 | |
|     SQNEG           V0.4S, V0.4S
 | |
|     ST1             {V27.H}[1], [X1], X7
 | |
|     SSHR            V20.4S, V21.4S, #16
 | |
|     REV64           V20.4S, V20.4S
 | |
|     SUBS            X5, X5, #8
 | |
| 
 | |
| 
 | |
|     UZP1            V27.8H, V20.8H, V20.8H
 | |
|     SQSHL           V25.4S, V0.4S, V31.4S
 | |
|     ST1             {V27.H}[2], [X1], X7
 | |
|     LD1             {V6.4S}, [X6], X8
 | |
|     SQADD           V24.4S, V25.4S, V30.4S
 | |
|     ST1             {V27.H}[3], [X1], X7
 | |
|     SSHR            V23.4S, V24.4S, #16
 | |
|     ST1             {V27.H}[0], [X1], X7
 | |
|     REV64           V23.4S, V23.4S
 | |
|     ST1             {V27.H}[1], [X1], X7
 | |
| 
 | |
| 
 | |
|     UZP1            V27.8H, V23.8H, V23.8H
 | |
|     SQNEG           V29.4S, V6.4S
 | |
| 
 | |
|     BGT             LOOP_1
 | |
| 
 | |
|     ST1             {V27.H}[2], [X1], X7
 | |
|     SQSHL           V22.4S, V29.4S, V31.4S
 | |
|     ST1             {V27.H}[3], [X1], X7
 | |
|     ST1             {V27.H}[0], [X1], X7
 | |
|     SQADD           V21.4S, V22.4S, V30.4S
 | |
|     ST1             {V27.H}[1], [X1], X7
 | |
|     SSHR            V20.4S, V21.4S, #16
 | |
| 
 | |
|     REV64           V20.4S, V20.4S
 | |
| 
 | |
|     UZP1            V27.8H, V20.8H, V20.8H
 | |
| 
 | |
|     ST1             {V27.H}[2], [X1], X7
 | |
|     ST1             {V27.H}[3], [X1], X7
 | |
|     ST1             {V27.H}[0], [X1], X7
 | |
|     ST1             {V27.H}[1], [X1], X7
 | |
|     pop_v_regs
 | |
|     RET
 |