216 lines
5.2 KiB
ArmAsm
216 lines
5.2 KiB
ArmAsm
@/******************************************************************************
|
|
@ *
|
|
@ * Copyright (C) 2015 The Android Open Source Project
|
|
@ *
|
|
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
|
@ * you may not use this file except in compliance with the License.
|
|
@ * You may obtain a copy of the License at:
|
|
@ *
|
|
@ * http://www.apache.org/licenses/LICENSE-2.0
|
|
@ *
|
|
@ * Unless required by applicable law or agreed to in writing, software
|
|
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
|
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@ * See the License for the specific language governing permissions and
|
|
@ * limitations under the License.
|
|
@ *
|
|
@ *****************************************************************************
|
|
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
|
@*/
|
|
|
|
@******************************************************************************
|
|
@*
|
|
@* @brief
|
|
@* This file contains definitions of routines for spatial filter
|
|
@*
|
|
@* @author
|
|
@* Ittiam
|
|
@*
|
|
@* @par List of Functions:
|
|
@* - ideint_cac_8x8_a9()
|
|
@*
|
|
@* @remarks
|
|
@* None
|
|
@*
|
|
@*******************************************************************************
|
|
|
|
|
|
@******************************************************************************
|
|
@*
|
|
@* @brief Calculates Combing Artifact
|
|
@*
|
|
@* @par Description
|
|
@* This functions calculates combing artifact check (CAC) for given two fields
|
|
@*
|
|
@* @param[in] pu1_top
|
|
@* UWORD8 pointer to top field
|
|
@*
|
|
@* @param[in] pu1_bot
|
|
@* UWORD8 pointer to bottom field
|
|
@*
|
|
@* @param[in] top_strd
|
|
@* Top field stride
|
|
@*
|
|
@* @param[in] bot_strd
|
|
@* Bottom field stride
|
|
@*
|
|
@* @returns
|
|
@* None
|
|
@*
|
|
@* @remarks
|
|
@*
|
|
@******************************************************************************
|
|
|
|
.global ideint_cac_8x8_a9
|
|
|
|
ideint_cac_8x8_a9:
|
|
|
|
stmfd sp!, {r4-r10, lr}
|
|
vpush {d9}
|
|
|
|
@ Load first row of top
|
|
vld1.u8 d28, [r0], r2
|
|
|
|
@ Load first row of bottom
|
|
vld1.u8 d29, [r1], r3
|
|
|
|
@ Load second row of top
|
|
vld1.u8 d30, [r0], r2
|
|
|
|
@ Load second row of bottom
|
|
vld1.u8 d31, [r1], r3
|
|
|
|
|
|
@ Calculate row based adj and alt values
|
|
@ Get row sums
|
|
vpaddl.u8 q0, q14
|
|
|
|
vpaddl.u8 q1, q15
|
|
|
|
vpaddl.u16 q0, q0
|
|
|
|
vpaddl.u16 q1, q1
|
|
|
|
@ Both q0 and q1 have four 32 bit sums corresponding to first 4 rows
|
|
@ Pack q0 and q1 into a single register (sum does not exceed 16bits)
|
|
|
|
vshl.u32 q8, q1, #16
|
|
vorr.u32 q8, q0, q8
|
|
@ q8 now contains 8 sums
|
|
|
|
@ Load third row of top
|
|
vld1.u8 d24, [r0], r2
|
|
|
|
@ Load third row of bottom
|
|
vld1.u8 d25, [r1], r3
|
|
|
|
@ Load fourth row of top
|
|
vld1.u8 d26, [r0], r2
|
|
|
|
@ Load fourth row of bottom
|
|
vld1.u8 d27, [r1], r3
|
|
|
|
@ Get row sums
|
|
vpaddl.u8 q2, q12
|
|
|
|
vpaddl.u8 q3, q13
|
|
|
|
vpaddl.u16 q2, q2
|
|
|
|
vpaddl.u16 q3, q3
|
|
@ Both q2 and q3 have four 32 bit sums corresponding to last 4 rows
|
|
@ Pack q2 and q3 into a single register (sum does not exceed 16bits)
|
|
|
|
vshl.u32 q9, q3, #16
|
|
vorr.u32 q9, q2, q9
|
|
@ q9 now contains 8 sums
|
|
|
|
@ Compute absolute diff between top and bottom row sums
|
|
vabd.u16 d16, d16, d17
|
|
vabd.u16 d17, d18, d19
|
|
|
|
@ RSUM_CSUM_THRESH
|
|
vmov.u16 q9, #20
|
|
|
|
@ Eliminate values smaller than RSUM_CSUM_THRESH
|
|
vcge.u16 q10, q8, q9
|
|
vand.u16 q10, q8, q10
|
|
@ q10 now contains 8 absolute diff of sums above the threshold
|
|
|
|
|
|
@ Compute adj
|
|
vadd.u16 d20, d20, d21
|
|
|
|
@ d20 has four adj values for two sub-blocks
|
|
|
|
@ Compute alt
|
|
vabd.u32 q0, q0, q1
|
|
vabd.u32 q2, q2, q3
|
|
|
|
vadd.u32 q0, q0, q2
|
|
vadd.u32 d21, d0, d1
|
|
@ d21 has two values for two sub-blocks
|
|
|
|
|
|
@ Calculate column based adj and alt values
|
|
|
|
vrhadd.u8 q0, q14, q15
|
|
vrhadd.u8 q1, q12, q13
|
|
vrhadd.u8 q0, q0, q1
|
|
|
|
vabd.u8 d0, d0, d1
|
|
|
|
@ RSUM_CSUM_THRESH >> 2
|
|
vmov.u8 d9, #5
|
|
|
|
@ Eliminate values smaller than RSUM_CSUM_THRESH >> 2
|
|
vcge.u8 d1, d0, d9
|
|
vand.u8 d0, d0, d1
|
|
@ d0 now contains 8 absolute diff of sums above the threshold
|
|
|
|
|
|
vpaddl.u8 d0, d0
|
|
vshl.u16 d0, d0, #2
|
|
|
|
@ Add row based adj
|
|
vadd.u16 d20, d0, d20
|
|
|
|
vpaddl.u16 d20, d20
|
|
@ d20 now contains 2 adj values
|
|
|
|
|
|
vrhadd.u8 d0, d28, d29
|
|
vrhadd.u8 d2, d24, d25
|
|
vrhadd.u8 d0, d0, d2
|
|
|
|
vrhadd.u8 d1, d30, d31
|
|
vrhadd.u8 d3, d26, d27
|
|
vrhadd.u8 d1, d1, d3
|
|
|
|
vabd.u8 d0, d0, d1
|
|
vpaddl.u8 d0, d0
|
|
|
|
vshl.u16 d0, d0, #2
|
|
vpaddl.u16 d0, d0
|
|
vadd.u32 d21, d0, d21
|
|
|
|
|
|
@ d21 now contains 2 alt values
|
|
|
|
@ SAD_BIAS_MULT_SHIFT
|
|
vshr.u32 d0, d21, #3
|
|
vadd.u32 d21, d21, d0
|
|
|
|
@ SAD_BIAS_ADDITIVE >> 1
|
|
vmov.u32 d0, #4
|
|
vadd.u32 d21, d21, d0
|
|
|
|
vclt.u32 d0, d21, d20
|
|
vpaddl.u32 d0, d0
|
|
|
|
vmov.u32 r0, d0[0]
|
|
cmp r0, #0
|
|
movne r0, #1
|
|
vpop {d9}
|
|
ldmfd sp!, {r4-r10, pc}
|