android13/hardware/rockchip/hwcomposer/einkhwc/rgb888_to_gray_256_neon.s

1079 lines
20 KiB
ArmAsm

.text
.global neon_rgb888_to_gray16ARM_32
neon_rgb888_to_gray16ARM_32:
@ r0 = dest
@ r1 = src
@ r2 = h
@ r3 = w
mov r12,r13
push {r4,r5,r6,r7,r8,r9,r10,r11,lr}
ldmfd r12,{r7} @r7 = vir_width
mov r8,r3
mul r2,r2,r7
mov r3,#11
mov r4,#16
mov r5,#5
mov r9,#0x00
mov r11,r0
vdup.8 d4,r3
vdup.8 d5,r4
vdup.8 d6,r5
mov r3,#00
LOOP:
vld4.8 {d0-d3},[r1]!
vld4.8 {d7-d10},[r1]!
vld4.8 {d11-d14},[r1]!
vld4.8 {d15-d18},[r1]!
vmull.u8 q10,d0,d4
vmlal.u8 q10,d1,d5
vmlal.u8 q10,d2,d6
vshr.u16 q11,q10,#9
vmov.u16 r5,d22[0]
and r6,r5,#0x0000000f
vmov.u16 r5,d22[1]
orr r6,r6,r5,lsl #4
vmov.u16 r5,d22[2]
orr r6,r6,r5,lsl #8
vmov.u16 r5,d22[3]
orr r6,r6,r5,lsl #12
vmov.u16 r5,d23[0]
orr r6,r6,r5,lsl #16
vmov.u16 r5,d23[1]
orr r6,r6,r5,lsl #20
vmov.u16 r5,d23[2]
orr r6,r6,r5,lsl #24
vmov.u16 r5,d23[3]
orr r6,r6,r5,lsl #28
str r6,[r0],#4
vmull.u8 q10,d7,d4
vmlal.u8 q10,d8,d5
vmlal.u8 q10,d9,d6
vshr.u16 q11,q10,#9
vmov.u16 r5,d22[0]
and r6,r5,#0x0000000f
vmov.u16 r5,d22[1]
orr r6,r6,r5,lsl #4
vmov.u16 r5,d22[2]
orr r6,r6,r5,lsl #8
vmov.u16 r5,d22[3]
orr r6,r6,r5,lsl #12
vmov.u16 r5,d23[0]
orr r6,r6,r5,lsl #16
vmov.u16 r5,d23[1]
orr r6,r6,r5,lsl #20
vmov.u16 r5,d23[2]
orr r6,r6,r5,lsl #24
vmov.u16 r5,d23[3]
orr r6,r6,r5,lsl #28
str r6,[r0],#4
vmull.u8 q10,d11,d4
vmlal.u8 q10,d12,d5
vmlal.u8 q10,d13,d6
vshr.u16 q11,q10,#9
vmov.u16 r5,d22[0]
and r6,r5,#0x0000000f
vmov.u16 r5,d22[1]
orr r6,r6,r5,lsl #4
vmov.u16 r5,d22[2]
orr r6,r6,r5,lsl #8
vmov.u16 r5,d22[3]
orr r6,r6,r5,lsl #12
vmov.u16 r5,d23[0]
orr r6,r6,r5,lsl #16
vmov.u16 r5,d23[1]
orr r6,r6,r5,lsl #20
vmov.u16 r5,d23[2]
orr r6,r6,r5,lsl #24
vmov.u16 r5,d23[3]
orr r6,r6,r5,lsl #28
str r6,[r0],#4
vmull.u8 q10,d15,d4
vmlal.u8 q10,d16,d5
vmlal.u8 q10,d17,d6
vshr.u16 q11,q10,#9
vmov.u16 r5,d22[0]
and r6,r5,#0x0000000f
vmov.u16 r5,d22[1]
orr r6,r6,r5,lsl #4
vmov.u16 r5,d22[2]
orr r6,r6,r5,lsl #8
vmov.u16 r5,d22[3]
orr r6,r6,r5,lsl #12
vmov.u16 r5,d23[0]
orr r6,r6,r5,lsl #16
vmov.u16 r5,d23[1]
orr r6,r6,r5,lsl #20
vmov.u16 r5,d23[2]
orr r6,r6,r5,lsl #24
vmov.u16 r5,d23[3]
orr r6,r6,r5,lsl #28
str r6,[r0],#4
add r9,r9,#32
add r3,r3,#32
cmp r3,r7
bne ADD_TO_LOOP
mov r3,#00
add r11,r11,r8,lsr #1
mov r0,r11
ADD_TO_LOOP:
cmp r9,r2
blo LOOP
pop {r4,r5,r6,r7,r8,r9,r10,r11,pc}
.text
.global neon_rgb888_to_gray16ARM_16
neon_rgb888_to_gray16ARM_16:
@ r0 = dest
@ r1 = src
@ r2 = h
@ r3 = w
mov r12,r13
push {r4,r5,r6,r7,r8,r9,r10,r11,lr}
ldmfd r12,{r7} @r7 = vir_width
mov r8,r3
mul r2,r2,r7
mov r3,#11
mov r4,#16
mov r5,#5
mov r9,#0x00
mov r11,r0
vdup.8 d4,r3
vdup.8 d5,r4
vdup.8 d6,r5
mov r3,#00
LOOP_16:
vld4.8 {d0-d3},[r1]!
vld4.8 {d7-d10},[r1]!
vmull.u8 q10,d0,d4
vmlal.u8 q10,d1,d5
vmlal.u8 q10,d2,d6
vshr.u16 q11,q10,#9
vmov.u16 r5,d22[0]
and r6,r5,#0x0000000f
vmov.u16 r5,d22[1]
orr r6,r6,r5,lsl #4
vmov.u16 r5,d22[2]
orr r6,r6,r5,lsl #8
vmov.u16 r5,d22[3]
orr r6,r6,r5,lsl #12
vmov.u16 r5,d23[0]
orr r6,r6,r5,lsl #16
vmov.u16 r5,d23[1]
orr r6,r6,r5,lsl #20
vmov.u16 r5,d23[2]
orr r6,r6,r5,lsl #24
vmov.u16 r5,d23[3]
orr r6,r6,r5,lsl #28
str r6,[r0],#4
vmull.u8 q10,d7,d4
vmlal.u8 q10,d8,d5
vmlal.u8 q10,d9,d6
vshr.u16 q11,q10,#9
vmov.u16 r5,d22[0]
and r6,r5,#0x0000000f
vmov.u16 r5,d22[1]
orr r6,r6,r5,lsl #4
vmov.u16 r5,d22[2]
orr r6,r6,r5,lsl #8
vmov.u16 r5,d22[3]
orr r6,r6,r5,lsl #12
vmov.u16 r5,d23[0]
orr r6,r6,r5,lsl #16
vmov.u16 r5,d23[1]
orr r6,r6,r5,lsl #20
vmov.u16 r5,d23[2]
orr r6,r6,r5,lsl #24
vmov.u16 r5,d23[3]
orr r6,r6,r5,lsl #28
str r6,[r0],#4
add r9,r9,#16
add r3,r3,#16
cmp r3,r7
bne ADD_TO_LOOP_16
mov r3,#00
add r11,r11,r8,lsr #1
mov r0,r11
ADD_TO_LOOP_16:
cmp r9,r2
blo LOOP_16
pop {r4,r5,r6,r7,r8,r9,r10,r11,pc}
.text
.global neon_bgr888_to_gray16ARM_32
neon_bgr888_to_gray16ARM_32:
@ r0 = dest
@ r1 = src
@ r2 = h
@ r3 = w
mov r12,r13
push {r4,r5,r6,r7,r8,r9,r10,r11,lr}
ldmfd r12,{r7} @r7 = vir_width
mov r8,r3
mul r2,r2,r7
mov r3,#11
mov r4,#16
mov r5,#5
mov r9,#0x00
mov r11,r0
vdup.8 d4,r3
vdup.8 d5,r4
vdup.8 d6,r5
mov r3,#00
LOOP_BGR:
vld4.8 {d0-d3},[r1]!
vld4.8 {d7-d10},[r1]!
vld4.8 {d11-d14},[r1]!
vld4.8 {d15-d18},[r1]!
vmull.u8 q10,d0,d6
vmlal.u8 q10,d1,d5
vmlal.u8 q10,d2,d4
vshr.u16 q11,q10,#9
vmov.u16 r5,d22[0]
and r6,r5,#0x0000000f
vmov.u16 r5,d22[1]
orr r6,r6,r5,lsl #4
vmov.u16 r5,d22[2]
orr r6,r6,r5,lsl #8
vmov.u16 r5,d22[3]
orr r6,r6,r5,lsl #12
vmov.u16 r5,d23[0]
orr r6,r6,r5,lsl #16
vmov.u16 r5,d23[1]
orr r6,r6,r5,lsl #20
vmov.u16 r5,d23[2]
orr r6,r6,r5,lsl #24
vmov.u16 r5,d23[3]
orr r6,r6,r5,lsl #28
str r6,[r0],#4
vmull.u8 q10,d7,d6
vmlal.u8 q10,d8,d5
vmlal.u8 q10,d9,d4
vshr.u16 q11,q10,#9
vmov.u16 r5,d22[0]
and r6,r5,#0x0000000f
vmov.u16 r5,d22[1]
orr r6,r6,r5,lsl #4
vmov.u16 r5,d22[2]
orr r6,r6,r5,lsl #8
vmov.u16 r5,d22[3]
orr r6,r6,r5,lsl #12
vmov.u16 r5,d23[0]
orr r6,r6,r5,lsl #16
vmov.u16 r5,d23[1]
orr r6,r6,r5,lsl #20
vmov.u16 r5,d23[2]
orr r6,r6,r5,lsl #24
vmov.u16 r5,d23[3]
orr r6,r6,r5,lsl #28
str r6,[r0],#4
vmull.u8 q10,d11,d6
vmlal.u8 q10,d12,d5
vmlal.u8 q10,d13,d4
vshr.u16 q11,q10,#9
vmov.u16 r5,d22[0]
and r6,r5,#0x0000000f
vmov.u16 r5,d22[1]
orr r6,r6,r5,lsl #4
vmov.u16 r5,d22[2]
orr r6,r6,r5,lsl #8
vmov.u16 r5,d22[3]
orr r6,r6,r5,lsl #12
vmov.u16 r5,d23[0]
orr r6,r6,r5,lsl #16
vmov.u16 r5,d23[1]
orr r6,r6,r5,lsl #20
vmov.u16 r5,d23[2]
orr r6,r6,r5,lsl #24
vmov.u16 r5,d23[3]
orr r6,r6,r5,lsl #28
str r6,[r0],#4
vmull.u8 q10,d15,d6
vmlal.u8 q10,d16,d5
vmlal.u8 q10,d17,d4
vshr.u16 q11,q10,#9
vmov.u16 r5,d22[0]
and r6,r5,#0x0000000f
vmov.u16 r5,d22[1]
orr r6,r6,r5,lsl #4
vmov.u16 r5,d22[2]
orr r6,r6,r5,lsl #8
vmov.u16 r5,d22[3]
orr r6,r6,r5,lsl #12
vmov.u16 r5,d23[0]
orr r6,r6,r5,lsl #16
vmov.u16 r5,d23[1]
orr r6,r6,r5,lsl #20
vmov.u16 r5,d23[2]
orr r6,r6,r5,lsl #24
vmov.u16 r5,d23[3]
orr r6,r6,r5,lsl #28
str r6,[r0],#4
add r9,r9,#32
add r3,r3,#32
cmp r3,r7
bne ADD_TO_LOOP_BGR
mov r3,#00
add r11,r11,r8,lsr #1
mov r0,r11
ADD_TO_LOOP_BGR:
cmp r9,r2
blo LOOP_BGR
pop {r4,r5,r6,r7,r8,r9,r10,r11,pc}
.text
.global neon_bgr888_to_gray16ARM_16
neon_bgr888_to_gray16ARM_16:
@ r0 = dest
@ r1 = src
@ r2 = h
@ r3 = w
mov r12,r13
push {r4,r5,r6,r7,r8,r9,r10,r11,lr}
ldmfd r12,{r7} @r7 = vir_width
mov r8,r3
mul r2,r2,r7
mov r3,#11
mov r4,#16
mov r5,#5
mov r9,#0x00
mov r11,r0
vdup.8 d4,r3
vdup.8 d5,r4
vdup.8 d6,r5
mov r3,#00
LOOP_16_BGR:
vld4.8 {d0-d3},[r1]!
vld4.8 {d7-d10},[r1]!
vmull.u8 q10,d0,d6
vmlal.u8 q10,d1,d5
vmlal.u8 q10,d2,d4
vshr.u16 q11,q10,#9
vmov.u16 r5,d22[0]
and r6,r5,#0x0000000f
vmov.u16 r5,d22[1]
orr r6,r6,r5,lsl #4
vmov.u16 r5,d22[2]
orr r6,r6,r5,lsl #8
vmov.u16 r5,d22[3]
orr r6,r6,r5,lsl #12
vmov.u16 r5,d23[0]
orr r6,r6,r5,lsl #16
vmov.u16 r5,d23[1]
orr r6,r6,r5,lsl #20
vmov.u16 r5,d23[2]
orr r6,r6,r5,lsl #24
vmov.u16 r5,d23[3]
orr r6,r6,r5,lsl #28
str r6,[r0],#4
vmull.u8 q10,d7,d6
vmlal.u8 q10,d8,d5
vmlal.u8 q10,d9,d4
vshr.u16 q11,q10,#9
vmov.u16 r5,d22[0]
and r6,r5,#0x0000000f
vmov.u16 r5,d22[1]
orr r6,r6,r5,lsl #4
vmov.u16 r5,d22[2]
orr r6,r6,r5,lsl #8
vmov.u16 r5,d22[3]
orr r6,r6,r5,lsl #12
vmov.u16 r5,d23[0]
orr r6,r6,r5,lsl #16
vmov.u16 r5,d23[1]
orr r6,r6,r5,lsl #20
vmov.u16 r5,d23[2]
orr r6,r6,r5,lsl #24
vmov.u16 r5,d23[3]
orr r6,r6,r5,lsl #28
str r6,[r0],#4
add r9,r9,#16
add r3,r3,#16
cmp r3,r7
bne ADD_TO_LOOP_16_BGR
mov r3,#00
add r11,r11,r8,lsr #1
mov r0,r11
ADD_TO_LOOP_16_BGR:
cmp r9,r2
blo LOOP_16_BGR
pop {r4,r5,r6,r7,r8,r9,r10,r11,pc}
.text
.global neon_rgb888_to_gray256ARM_16
neon_rgb888_to_gray256ARM_16:
@ r0 = dest
@ r1 = src
@ r2 = h
@ r3 = w
mov r12,r13
push {r4,r5,r6,r7,r8,r9,r10,r11,lr}
ldmfd r12,{r7} @r7 = vir_width
mov r8,r3
mul r2,r2,r7
@mul r2,r2,r3
@lsr r2,r2,#5
mov r3,#77
mov r4,#151
mov r5,#28
vdup.8 d4,r3
vdup.8 d5,r4
vdup.8 d6,r5
mov r11,r0
mov r3,#00
mov r9,#0x00
LOOP256:
vld4.8 {d0-d3},[r1]!
vld4.8 {d7-d10},[r1]!
vmull.u8 q10,d0,d4
vmlal.u8 q10,d1,d5
vmlal.u8 q10,d2,d6
vshrn.u16 d19,q10,#8
vst1.8 {d19},[r0]!
vmull.u8 q10,d7,d4
vmlal.u8 q10,d8,d5
vmlal.u8 q10,d9,d6
vshrn.u16 d19,q10,#8
vst1.8 {d19},[r0]!
add r9,r9,#16
add r3,r3,#16
cmp r3,r7
bne ADD_TO_256LOOP
mov r3,#00
add r11,r11,r8 @,lsr #1
mov r0,r11
ADD_TO_256LOOP:
cmp r9,r2
blo LOOP256
pop {r4,r5,r6,r7,r8,r9,r10,r11,pc}
.text
.global neon_rgb888_to_gray256ARM_32
neon_rgb888_to_gray256ARM_32:
@ r0 = dest
@ r1 = src
@ r2 = h
@ r3 = w
mov r12,r13
push {r4,r5,r6,r7,r8,r9,r10,r11,lr}
ldmfd r12,{r7} @r7 = vir_width
mov r8,r3
mul r2,r2,r7
@mul r2,r2,r3
@lsr r2,r2,#5
mov r3,#77
mov r4,#151
mov r5,#28
vdup.8 d4,r3
vdup.8 d5,r4
vdup.8 d6,r5
mov r11,r0
mov r3,#00
mov r9,#0x00
LOOP256_32:
vld4.8 {d0-d3},[r1]!
vld4.8 {d7-d10},[r1]!
vld4.8 {d11-d14},[r1]!
vld4.8 {d15-d18},[r1]!
vmull.u8 q10,d0,d4
vmlal.u8 q10,d1,d5
vmlal.u8 q10,d2,d6
vshrn.u16 d19,q10,#8
vst1.8 {d19},[r0]!
vmull.u8 q10,d7,d4
vmlal.u8 q10,d8,d5
vmlal.u8 q10,d9,d6
vshrn.u16 d19,q10,#8
vst1.8 {d19},[r0]!
vmull.u8 q10,d11,d4
vmlal.u8 q10,d12,d5
vmlal.u8 q10,d13,d6
vshrn.u16 d19,q10,#8
vst1.8 {d19},[r0]!
vmull.u8 q10,d15,d4
vmlal.u8 q10,d16,d5
vmlal.u8 q10,d17,d6
vshrn.u16 d19,q10,#8
vst1.8 {d19},[r0]!
add r9,r9,#32
add r3,r3,#32
cmp r3,r7
bne ADD_TO_256LOOP_32
mov r3,#00
add r11,r11,r8 @,lsr #1
mov r0,r11
ADD_TO_256LOOP_32:
cmp r9,r2
blo LOOP256_32
pop {r4,r5,r6,r7,r8,r9,r10,r11,pc}
.text
.global neon_gray16_to_gray2ARM
neon_gray16_to_gray2ARM:
@ r0 = dest
@ r1 = w
@ r2 = h
push {r4,r5,r6,lr}
mul r2,r1,r2
lsr r2,r2,#3
LOOP2gray:
ldr r4,[r0]
mov r6,r4
and r5,r4,#0x0f
cmp r5,#0x0f
andne r6,r6,#0xfffffff0
and r5,r4,#0xf0
cmp r5,#0xf0
andne r6,r6,#0xffffff0f
and r5,r4,#0x0f00
cmp r5,#0x0f00
andne r6,r6,#0xfffff0ff
and r5,r4,#0xf000
cmp r5,#0xf000
andne r6,r6,#0xffff0fff
and r5,r4,#0xf0000
cmp r5,#0xf0000
andne r6,r6,#0xfff0ffff
and r5,r4,#0xf00000
cmp r5,#0xf00000
andne r6,r6,#0xff0fffff
and r5,r4,#0xf000000
cmp r5,#0xf000000
andne r6,r6,#0xf0ffffff
and r5,r4,#0xf0000000
cmp r5,#0xf0000000
andne r6,r6,#0x0fffffff
str r6,[r0],#4
subs r2,r2,#1
bne LOOP2gray
pop {r4,r5,r6,pc}
.text
.global neon_rgb256_to_gray16DITHER
neon_rgb256_to_gray16DITHER:
@ r0 = src
@ r1 = dst
@ r2 = res0
@ r3 = res1
mov r12,r13
@push {r4,r5,r6,r7,r8,r9,r10,r11,lr}
STMFD r13!,{r4-r11,r14}
ldmfd r12,{r4} @r4 = w
mov r5,#0x00
mov r14,#0x00
LOOP_DITHER:
ldr r9,[r0],#4
mov r12,#0x00
and r10,r9,#0xff
ldrh r11,[r2]
add r10,r10,r11
add r10,r10,r5
cmp r10,#0xff
movhs r10,#0xff
and r11,r10,#0xf0
mov r12,r11,lsr #4
sub r10,r10,r11 @ e
mov r11,#0x07
mul r5,r10,r11
mov r5,r5,lsr #4
ldrh r11,[r3]
mov r6,#0x05
mul r6,r10,r6
mov r6,r6,lsr #4
add r11,r11,r6
strh r11,[r3]
ldrh r11,[r3,#2]
mov r6,#0x01
mul r6,r10,r6
mov r6,r6,lsr #4
add r11,r11,r6
strh r11,[r3,#2]
cmp r14,#0x00
beq PIX2
ldrh r11,[r3,#-2]
mov r6,#0x03
mul r6,r10,r6
mov r6,r6,lsr #4
add r11,r11,r6
strh r11,[r3,#-2]
PIX2:
and r10,r9,#0x0000ff00
mov r10,r10,lsr #8
ldrh r11,[r2,#2]
add r10,r10,r11
add r10,r10,r5
mov r11,#00
str r11,[r2],#4
cmp r10,#0xff
movhs r10,#0xff
and r11,r10,#0xf0
orr r12,r12,r11
sub r10,r10,r11 @ e
mov r11,#0x07
mul r5,r10,r11
mov r5,r5,lsr #4
ldrh r11,[r3,#2]
mov r6,#0x05
mul r6,r10,r6
mov r6,r6,lsr #4
add r11,r11,r6
strh r11,[r3,#2]
ldrh r11,[r3,#4]
mov r6,#0x01
mul r6,r10,r6
mov r6,r6,lsr #4
add r11,r11,r6
strh r11,[r3,#4]
ldrh r11,[r3]
mov r6,#0x03
mul r6,r10,r6
mov r6,r6,lsr #4
add r11,r11,r6
strh r11,[r3]
PIX3:
and r10,r9,#0x00ff0000
mov r10,r10,lsr #16
ldrh r11,[r2]
add r10,r10,r11
add r10,r10,r5
cmp r10,#0xff
movhs r10,#0xff
and r11,r10,#0xf0
orr r12,r12,r11,lsl #4
sub r10,r10,r11 @ e
mov r11,#0x07
mul r5,r10,r11
mov r5,r5,lsr #4
ldrh r11,[r3,#4]
mov r6,#0x05
mul r6,r10,r6
mov r6,r6,lsr #4
add r11,r11,r6
strh r11,[r3,#4]
ldrh r11,[r3,#6]
mov r6,#0x01
mul r6,r10,r6
mov r6,r6,lsr #4
add r11,r11,r6
strh r11,[r3,#6]
ldrh r11,[r3,#2]
mov r6,#0x03
mul r6,r10,r6
mov r6,r6,lsr #4
add r11,r11,r6
strh r11,[r3,#2]
PIX4:
and r10,r9,#0xff000000
mov r10,r10,lsr #24
ldrh r11,[r2,#2]
add r10,r10,r11
add r10,r10,r5
mov r11,#00
str r11,[r2],#4
cmp r10,#0xff
movhs r10,#0xff
and r11,r10,#0xf0
orr r12,r12,r11,lsl #8
sub r10,r10,r11 @ e
mov r11,#0x07
mul r5,r10,r11
mov r5,r5,lsr #4
ldrh r11,[r3,#4]
mov r6,#0x03
mul r6,r10,r6
mov r6,r6,lsr #4
add r11,r11,r6
strh r11,[r3,#4]
ldrh r11,[r3,#6]
mov r6,#0x05
mul r6,r10,r6
mov r6,r6,lsr #4
add r11,r11,r6
strh r11,[r3,#6]
sub r11,r4,#4
cmp r14,r11
beq PIXEND
ldrh r11,[r3,#8]
mov r6,#0x01
mul r6,r10,r6
mov r6,r6,lsr #4
add r11,r11,r6
strh r11,[r3,#8]
PIXEND:
strh r12,[r1],#2
add r14,r14,#0x04
add r3,r3,#4
cmp r14,r4
blo LOOP_DITHER
@pop {r4,r5,r6,r7,r8,r9,r10,r11,pc}
LDMFD r13!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.text
.global neon_gray256_to_gray16ARM_16
neon_gray256_to_gray16ARM_16:
@ r0 = dest
@ r1 = src
@ r2 = h
@ r3 = w
mov r12,r13
STMFD r13!,{r4-r11,r14}
ldmfd r12,{r7} @r7 = vir_width
mov r8,r3
mul r2,r2,r7
mov r9,#0x00
mov r11,r0
mov r3,#00
mov r10,#0xf0
vdup.8 d2,r10
LOOP_256_16:
mov r6,#0x00000000
vld2.8 {d0,d1},[r1]!
vand d0,d0,d2
vand d1,d1,d2
vshr.u8 d0,d0,#4
vorr d1,d1,d0
vst1.8 {d1},[r0]!
add r9,r9,#16
add r3,r3,#16
cmp r3,r7
bne ADD_TO_LOOP_256_16
mov r3,#00
add r11,r11,r8,lsr #1
mov r0,r11
ADD_TO_LOOP_256_16:
cmp r9,r2
blo LOOP_256_16
LDMFD r13!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.text
.global neon_gray256_to_gray16ARM_32
neon_gray256_to_gray16ARM_32:
@ r0 = dest
@ r1 = src
@ r2 = h
@ r3 = w
mov r12,r13
STMFD r13!,{r4-r11,r14}
ldmfd r12,{r7} @r7 = vir_width
mov r8,r3
mul r2,r2,r7
mov r9,#0x00
mov r11,r0
mov r3,#00
mov r10,#0xf0
vdup.8 d2,r10
LOOP_256_32:
mov r6,#0x00000000
vld2.8 {d0,d1},[r1]!
vld2.8 {d3,d4},[r1]!
vand d0,d0,d2
vand d1,d1,d2
vand d3,d3,d2
vand d4,d4,d2
vshr.u8 d0,d0,#4
vorr d1,d1,d0
vst1.8 {d1},[r0]!
vshr.u8 d3,d3,#4
vorr d4,d4,d3
vst1.8 {d4},[r0]!
add r9,r9,#32
add r3,r3,#32
cmp r3,r7
bne ADD_TO_LOOP_256_32
mov r3,#00
add r11,r11,r8,lsr #1
mov r0,r11
ADD_TO_LOOP_256_32:
cmp r9,r2
blo LOOP_256_32
LDMFD r13!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.text
.global neon_gray256_to_gray256
neon_gray256_to_gray256:
@ r0 = dest
@ r1 = src
@ r2 = h
@ r3 = w
STMFD r13!,{r4-r11,r14}
mul r2,r2,r3
LOOP_256_TO_256:
vld4.8 {d0-d3},[r1]!
vst4.8 {d0-d3},[r0]!
add r9,r9,#32
cmp r9,r2
blo LOOP_256_TO_256
LDMFD r13!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}