10407 lines
		
	
	
		
			422 KiB
		
	
	
	
		
			LLVM
		
	
	
	
			
		
		
	
	
			10407 lines
		
	
	
		
			422 KiB
		
	
	
	
		
			LLVM
		
	
	
	
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 | 
						|
; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx < %s | FileCheck %s --check-prefix=AVX  --check-prefix=AVX1
 | 
						|
; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx2 < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
 | 
						|
; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx512f < %s | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
 | 
						|
; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl < %s | FileCheck %s --check-prefix=AVX512 --check-prefix=SKX
 | 
						|
 | 
						|
; To test for the case where masked load/store is not legal, we should add a run with a target
 | 
						|
; that does not have AVX, but that case should probably be a separate test file using less tests
 | 
						|
; because it takes over 1.2 seconds to codegen these tests on Haswell 4GHz if there's no maskmov.
 | 
						|
 | 
						|
define <16 x i32> @test1(<16 x i32> %trigger, <16 x i32>* %addr) {
 | 
						|
; AVX1-LABEL: test1:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovps 32(%rdi), %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test1:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:    vpmaskmovd 32(%rdi), %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:    vpmaskmovd (%rdi), %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512-LABEL: test1:
 | 
						|
; AVX512:       ## BB#0:
 | 
						|
; AVX512-NEXT:    vpxord %zmm1, %zmm1, %zmm1
 | 
						|
; AVX512-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
 | 
						|
; AVX512-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z}
 | 
						|
; AVX512-NEXT:    retq
 | 
						|
  %mask = icmp eq <16 x i32> %trigger, zeroinitializer
 | 
						|
  %res = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %addr, i32 4, <16 x i1>%mask, <16 x i32>undef)
 | 
						|
  ret <16 x i32> %res
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i32> @test2(<16 x i32> %trigger, <16 x i32>* %addr) {
 | 
						|
; AVX1-LABEL: test2:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovps 32(%rdi), %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test2:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:    vpmaskmovd 32(%rdi), %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:    vpmaskmovd (%rdi), %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512-LABEL: test2:
 | 
						|
; AVX512:       ## BB#0:
 | 
						|
; AVX512-NEXT:    vpxord %zmm1, %zmm1, %zmm1
 | 
						|
; AVX512-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
 | 
						|
; AVX512-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z}
 | 
						|
; AVX512-NEXT:    retq
 | 
						|
  %mask = icmp eq <16 x i32> %trigger, zeroinitializer
 | 
						|
  %res = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %addr, i32 4, <16 x i1>%mask, <16 x i32>zeroinitializer)
 | 
						|
  ret <16 x i32> %res
 | 
						|
}
 | 
						|
 | 
						|
define void @test3(<16 x i32> %trigger, <16 x i32>* %addr, <16 x i32> %val) {
 | 
						|
; AVX1-LABEL: test3:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
 | 
						|
; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovps %ymm3, %ymm1, 32(%rdi)
 | 
						|
; AVX1-NEXT:    vmaskmovps %ymm2, %ymm0, (%rdi)
 | 
						|
; AVX1-NEXT:    vzeroupper
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test3:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %ymm4, %ymm4, %ymm4
 | 
						|
; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:    vpmaskmovd %ymm3, %ymm1, 32(%rdi)
 | 
						|
; AVX2-NEXT:    vpmaskmovd %ymm2, %ymm0, (%rdi)
 | 
						|
; AVX2-NEXT:    vzeroupper
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512-LABEL: test3:
 | 
						|
; AVX512:       ## BB#0:
 | 
						|
; AVX512-NEXT:    vpxord %zmm2, %zmm2, %zmm2
 | 
						|
; AVX512-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
 | 
						|
; AVX512-NEXT:    vmovdqu32 %zmm1, (%rdi) {%k1}
 | 
						|
; AVX512-NEXT:    retq
 | 
						|
  %mask = icmp eq <16 x i32> %trigger, zeroinitializer
 | 
						|
  call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>%val, <16 x i32>* %addr, i32 4, <16 x i1>%mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
define <16 x float> @test4(<16 x i32> %trigger, <16 x float>* %addr, <16 x float> %dst) {
 | 
						|
; AVX1-LABEL: test4:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
 | 
						|
; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm4
 | 
						|
; AVX1-NEXT:    vblendvps %ymm0, %ymm4, %ymm2, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovps 32(%rdi), %ymm1, %ymm2
 | 
						|
; AVX1-NEXT:    vblendvps %ymm1, %ymm2, %ymm3, %ymm1
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test4:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %ymm4, %ymm4, %ymm4
 | 
						|
; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm4
 | 
						|
; AVX2-NEXT:    vblendvps %ymm0, %ymm4, %ymm2, %ymm0
 | 
						|
; AVX2-NEXT:    vmaskmovps 32(%rdi), %ymm1, %ymm2
 | 
						|
; AVX2-NEXT:    vblendvps %ymm1, %ymm2, %ymm3, %ymm1
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512-LABEL: test4:
 | 
						|
; AVX512:       ## BB#0:
 | 
						|
; AVX512-NEXT:    vpxord %zmm2, %zmm2, %zmm2
 | 
						|
; AVX512-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
 | 
						|
; AVX512-NEXT:    vmovups (%rdi), %zmm1 {%k1}
 | 
						|
; AVX512-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; AVX512-NEXT:    retq
 | 
						|
  %mask = icmp eq <16 x i32> %trigger, zeroinitializer
 | 
						|
  %res = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* %addr, i32 4, <16 x i1>%mask, <16 x float> %dst)
 | 
						|
  ret <16 x float> %res
 | 
						|
}
 | 
						|
 | 
						|
define <8 x double> @test5(<8 x i32> %trigger, <8 x double>* %addr, <8 x double> %dst) {
 | 
						|
; AVX1-LABEL: test5:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 | 
						|
; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm5
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm5, %ymm3
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm4
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm4, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm4
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm0, %ymm4, %ymm1, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovpd 32(%rdi), %ymm3, %ymm1
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test5:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
 | 
						|
; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vpcmpeqd %xmm4, %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm3, %ymm3
 | 
						|
; AVX2-NEXT:    vpcmpeqd %xmm4, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
 | 
						|
; AVX2-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm4
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm0, %ymm4, %ymm1, %ymm0
 | 
						|
; AVX2-NEXT:    vmaskmovpd 32(%rdi), %ymm3, %ymm1
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test5:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
 | 
						|
; AVX512F-NEXT:    vpxor %ymm2, %ymm2, %ymm2
 | 
						|
; AVX512F-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
 | 
						|
; AVX512F-NEXT:    vmovupd (%rdi), %zmm1 {%k1}
 | 
						|
; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test5:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpxord %ymm2, %ymm2, %ymm2
 | 
						|
; SKX-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1
 | 
						|
; SKX-NEXT:    vmovupd (%rdi), %zmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %mask = icmp eq <8 x i32> %trigger, zeroinitializer
 | 
						|
  %res = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %addr, i32 4, <8 x i1>%mask, <8 x double>%dst)
 | 
						|
  ret <8 x double> %res
 | 
						|
}
 | 
						|
 | 
						|
define <2 x double> @test6(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
 | 
						|
; AVX-LABEL: test6:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
 | 
						|
; AVX-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2
 | 
						|
; AVX-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test6:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2
 | 
						|
; AVX512F-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test6:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
 | 
						|
; SKX-NEXT:    vpcmpeqq %xmm2, %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovupd (%rdi), %xmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %mask = icmp eq <2 x i64> %trigger, zeroinitializer
 | 
						|
  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
 | 
						|
  ret <2 x double> %res
 | 
						|
}
 | 
						|
 | 
						|
define <4 x float> @test7(<4 x i32> %trigger, <4 x float>* %addr, <4 x float> %dst) {
 | 
						|
; AVX-LABEL: test7:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
 | 
						|
; AVX-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2
 | 
						|
; AVX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test7:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2
 | 
						|
; AVX512F-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test7:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
 | 
						|
; SKX-NEXT:    vpcmpeqd %xmm2, %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovups (%rdi), %xmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
 | 
						|
  %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1>%mask, <4 x float>%dst)
 | 
						|
  ret <4 x float> %res
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @test8(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
 | 
						|
; AVX1-LABEL: test8:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2
 | 
						|
; AVX1-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test8:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpmaskmovd (%rdi), %xmm0, %xmm2
 | 
						|
; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test8:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vpmaskmovd (%rdi), %xmm0, %xmm2
 | 
						|
; AVX512F-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test8:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
 | 
						|
; SKX-NEXT:    vpcmpeqd %xmm2, %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu32 (%rdi), %xmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
 | 
						|
  %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
 | 
						|
  ret <4 x i32> %res
 | 
						|
}
 | 
						|
 | 
						|
define void @test9(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
 | 
						|
; AVX1-LABEL: test9:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi)
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test9:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi)
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test9:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi)
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test9:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
 | 
						|
; SKX-NEXT:    vpcmpeqd %xmm2, %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu32 %xmm1, (%rdi) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
 | 
						|
  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
define <4 x double> @test10(<4 x i32> %trigger, <4 x double>* %addr, <4 x double> %dst) {
 | 
						|
; AVX1-LABEL: test10:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test10:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
 | 
						|
; AVX2-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test10:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vpmovsxdq %xmm0, %ymm0
 | 
						|
; AVX512F-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2
 | 
						|
; AVX512F-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test10:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
 | 
						|
; SKX-NEXT:    vpcmpeqd %xmm2, %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovapd (%rdi), %ymm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
 | 
						|
  %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 32, <4 x i1>%mask, <4 x double>%dst)
 | 
						|
  ret <4 x double> %res
 | 
						|
}
 | 
						|
 | 
						|
define <4 x double> @test10b(<4 x i32> %trigger, <4 x double>* %addr, <4 x double> %dst) {
 | 
						|
; AVX1-LABEL: test10b:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test10b:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
 | 
						|
; AVX2-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test10b:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vpmovsxdq %xmm0, %ymm0
 | 
						|
; AVX512F-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test10b:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpxord %xmm1, %xmm1, %xmm1
 | 
						|
; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovapd (%rdi), %ymm0 {%k1} {z}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
 | 
						|
  %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 32, <4 x i1>%mask, <4 x double>zeroinitializer)
 | 
						|
  ret <4 x double> %res
 | 
						|
}
 | 
						|
 | 
						|
define <8 x float> @test11a(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
 | 
						|
; AVX1-LABEL: test11a:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2
 | 
						|
; AVX1-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test11a:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2
 | 
						|
; AVX2-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test11a:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
 | 
						|
; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
 | 
						|
; AVX512F-NEXT:    vpxor %ymm2, %ymm2, %ymm2
 | 
						|
; AVX512F-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
 | 
						|
; AVX512F-NEXT:    vmovups (%rdi), %zmm1 {%k1}
 | 
						|
; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test11a:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpxord %ymm2, %ymm2, %ymm2
 | 
						|
; SKX-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1
 | 
						|
; SKX-NEXT:    vmovaps (%rdi), %ymm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %mask = icmp eq <8 x i32> %trigger, zeroinitializer
 | 
						|
  %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 32, <8 x i1>%mask, <8 x float>%dst)
 | 
						|
  ret <8 x float> %res
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i32> @test11b(<8 x i1> %mask, <8 x i32>* %addr, <8 x i32> %dst) {
 | 
						|
; AVX1-LABEL: test11b:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2
 | 
						|
; AVX1-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test11b:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpmaskmovd (%rdi), %ymm0, %ymm2
 | 
						|
; AVX2-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test11b:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
 | 
						|
; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
 | 
						|
; AVX512F-NEXT:    vmovdqu32 (%rdi), %zmm1 {%k1}
 | 
						|
; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test11b:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovw2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu32 (%rdi), %ymm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1>%mask, <8 x i32>%dst)
 | 
						|
  ret <8 x i32> %res
 | 
						|
}
 | 
						|
 | 
						|
define <8 x float> @test11c(<8 x i1> %mask, <8 x float>* %addr) {
 | 
						|
; AVX1-LABEL: test11c:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test11c:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test11c:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
 | 
						|
; AVX512F-NEXT:    vmovups (%rdi), %zmm0 {%k1} {z}
 | 
						|
; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test11c:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovw2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovaps (%rdi), %ymm0 {%k1} {z}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 32, <8 x i1> %mask, <8 x float> zeroinitializer)
 | 
						|
  ret <8 x float> %res
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i32> @test11d(<8 x i1> %mask, <8 x i32>* %addr) {
 | 
						|
; AVX1-LABEL: test11d:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test11d:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpmaskmovd (%rdi), %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test11d:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
 | 
						|
; AVX512F-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z}
 | 
						|
; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test11d:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovw2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu32 (%rdi), %ymm0 {%k1} {z}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1> %mask, <8 x i32> zeroinitializer)
 | 
						|
  ret <8 x i32> %res
 | 
						|
}
 | 
						|
 | 
						|
define void @test12(<8 x i32> %trigger, <8 x i32>* %addr, <8 x i32> %val) {
 | 
						|
; AVX1-LABEL: test12:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi)
 | 
						|
; AVX1-NEXT:    vzeroupper
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test12:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpmaskmovd %ymm1, %ymm0, (%rdi)
 | 
						|
; AVX2-NEXT:    vzeroupper
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test12:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
 | 
						|
; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
 | 
						|
; AVX512F-NEXT:    vpxor %ymm2, %ymm2, %ymm2
 | 
						|
; AVX512F-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
 | 
						|
; AVX512F-NEXT:    vmovdqu32 %zmm1, (%rdi) {%k1}
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test12:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpxord %ymm2, %ymm2, %ymm2
 | 
						|
; SKX-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu32 %ymm1, (%rdi) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %mask = icmp eq <8 x i32> %trigger, zeroinitializer
 | 
						|
  call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>%val, <8 x i32>* %addr, i32 4, <8 x i1>%mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
define void @test13(<16 x i32> %trigger, <16 x float>* %addr, <16 x float> %val) {
 | 
						|
; AVX1-LABEL: test13:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
 | 
						|
; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovps %ymm3, %ymm1, 32(%rdi)
 | 
						|
; AVX1-NEXT:    vmaskmovps %ymm2, %ymm0, (%rdi)
 | 
						|
; AVX1-NEXT:    vzeroupper
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test13:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %ymm4, %ymm4, %ymm4
 | 
						|
; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:    vmaskmovps %ymm3, %ymm1, 32(%rdi)
 | 
						|
; AVX2-NEXT:    vmaskmovps %ymm2, %ymm0, (%rdi)
 | 
						|
; AVX2-NEXT:    vzeroupper
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512-LABEL: test13:
 | 
						|
; AVX512:       ## BB#0:
 | 
						|
; AVX512-NEXT:    vpxord %zmm2, %zmm2, %zmm2
 | 
						|
; AVX512-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
 | 
						|
; AVX512-NEXT:    vmovups %zmm1, (%rdi) {%k1}
 | 
						|
; AVX512-NEXT:    retq
 | 
						|
  %mask = icmp eq <16 x i32> %trigger, zeroinitializer
 | 
						|
  call void @llvm.masked.store.v16f32.p0v16f32(<16 x float>%val, <16 x float>* %addr, i32 4, <16 x i1>%mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
 | 
						|
; AVX1-LABEL: test14:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
 | 
						|
; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 | 
						|
; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 | 
						|
; AVX1-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi)
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test14:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
 | 
						|
; AVX2-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 | 
						|
; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 | 
						|
; AVX2-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi)
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test14:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
 | 
						|
; AVX512F-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 | 
						|
; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 | 
						|
; AVX512F-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi)
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test14:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
 | 
						|
; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
 | 
						|
; SKX-NEXT:    vpcmpeqq %xmm2, %xmm0, %k0
 | 
						|
; SKX-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; SKX-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; SKX-NEXT:    vmovups %xmm1, (%rdi) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
 | 
						|
  call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
 | 
						|
; AVX1-LABEL: test15:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
 | 
						|
; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 | 
						|
; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 | 
						|
; AVX1-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi)
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test15:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
 | 
						|
; AVX2-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 | 
						|
; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 | 
						|
; AVX2-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi)
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test15:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
 | 
						|
; AVX512F-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 | 
						|
; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 | 
						|
; AVX512F-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 | 
						|
; AVX512F-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi)
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test15:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
 | 
						|
; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
 | 
						|
; SKX-NEXT:    vpcmpeqq %xmm2, %xmm0, %k1
 | 
						|
; SKX-NEXT:    vpmovqd %xmm1, (%rdi) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
 | 
						|
  call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
 | 
						|
; AVX1-LABEL: test16:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
 | 
						|
; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 | 
						|
; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 | 
						|
; AVX1-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2
 | 
						|
; AVX1-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test16:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
 | 
						|
; AVX2-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 | 
						|
; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 | 
						|
; AVX2-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2
 | 
						|
; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test16:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
 | 
						|
; AVX512F-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 | 
						|
; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 | 
						|
; AVX512F-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2
 | 
						|
; AVX512F-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test16:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
 | 
						|
; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
 | 
						|
; SKX-NEXT:    vpcmpeqq %xmm2, %xmm0, %k0
 | 
						|
; SKX-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; SKX-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; SKX-NEXT:    vmovups (%rdi), %xmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
 | 
						|
  %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
 | 
						|
  ret <2 x float> %res
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
 | 
						|
; AVX1-LABEL: test17:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
 | 
						|
; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 | 
						|
; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 | 
						|
; AVX1-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 | 
						|
; AVX1-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test17:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
 | 
						|
; AVX2-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 | 
						|
; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 | 
						|
; AVX2-NEXT:    vpmaskmovd (%rdi), %xmm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 | 
						|
; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test17:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
 | 
						|
; AVX512F-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 | 
						|
; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 | 
						|
; AVX512F-NEXT:    vpmaskmovd (%rdi), %xmm0, %xmm2
 | 
						|
; AVX512F-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 | 
						|
; AVX512F-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
 | 
						|
; AVX512F-NEXT:    vpmovsxdq %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test17:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
 | 
						|
; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
 | 
						|
; SKX-NEXT:    vpcmpeqq %xmm2, %xmm0, %k0
 | 
						|
; SKX-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; SKX-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; SKX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
 | 
						|
; SKX-NEXT:    vmovdqu32 (%rdi), %xmm0 {%k1}
 | 
						|
; SKX-NEXT:    vpmovsxdq %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
 | 
						|
  %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
 | 
						|
  ret <2 x i32> %res
 | 
						|
}
 | 
						|
 | 
						|
define <2 x float> @test18(<2 x i32> %trigger, <2 x float>* %addr) {
 | 
						|
; AVX1-LABEL: test18:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
 | 
						|
; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 | 
						|
; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 | 
						|
; AVX1-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test18:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
 | 
						|
; AVX2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 | 
						|
; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 | 
						|
; AVX2-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test18:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
 | 
						|
; AVX512F-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 | 
						|
; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 | 
						|
; AVX512F-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test18:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpxord %xmm1, %xmm1, %xmm1
 | 
						|
; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
 | 
						|
; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
 | 
						|
; SKX-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; SKX-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; SKX-NEXT:    vmovups (%rdi), %xmm0 {%k1} {z}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
 | 
						|
  %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>undef)
 | 
						|
  ret <2 x float> %res
 | 
						|
}
 | 
						|
 | 
						|
define <4 x float> @load_all(<4 x i32> %trigger, <4 x float>* %addr) {
 | 
						|
; AVX-LABEL: load_all:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vmovups (%rdi), %xmm0
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: load_all:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: load_all:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX-NEXT:    vmovups (%rdi), %xmm0 {%k1} {z}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
 | 
						|
  %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>, <4 x float>undef)
 | 
						|
  ret <4 x float> %res
 | 
						|
}
 | 
						|
 | 
						|
;;; Loads with Constant Masks - these should be optimized to use something other than a variable blend.
 | 
						|
 | 
						|
; 128-bit FP vectors are supported with AVX.
 | 
						|
 | 
						|
define <4 x float> @mload_constmask_v4f32(<4 x float>* %addr, <4 x float> %dst) {
 | 
						|
; AVX-LABEL: mload_constmask_v4f32:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vblendps {{.*#+}} xmm0 = mem[0],xmm0[1],mem[2,3]
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: mload_constmask_v4f32:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vmovaps {{.*#+}} xmm1 = [4294967295,0,4294967295,4294967295]
 | 
						|
; AVX512F-NEXT:    vmaskmovps (%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: mload_constmask_v4f32:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    movb $13, %al
 | 
						|
; SKX-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX-NEXT:    vmovups (%rdi), %xmm0 {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1> <i1 1, i1 0, i1 1, i1 1>, <4 x float> %dst)
 | 
						|
  ret <4 x float> %res
 | 
						|
}
 | 
						|
 | 
						|
; 128-bit integer vectors are supported with AVX2.
 | 
						|
 | 
						|
define <4 x i32> @mload_constmask_v4i32(<4 x i32>* %addr, <4 x i32> %dst) {
 | 
						|
; AVX1-LABEL: mload_constmask_v4i32:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vmovaps {{.*#+}} xmm1 = [0,4294967295,4294967295,4294967295]
 | 
						|
; AVX1-NEXT:    vmaskmovps (%rdi), %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: mload_constmask_v4i32:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,4294967295,4294967295,4294967295]
 | 
						|
; AVX2-NEXT:    vpmaskmovd (%rdi), %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: mload_constmask_v4i32:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,4294967295,4294967295,4294967295]
 | 
						|
; AVX512F-NEXT:    vpmaskmovd (%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: mload_constmask_v4i32:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    movb $14, %al
 | 
						|
; SKX-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX-NEXT:    vmovdqu32 (%rdi), %xmm0 {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> <i1 0, i1 1, i1 1, i1 1>, <4 x i32> %dst)
 | 
						|
  ret <4 x i32> %res
 | 
						|
}
 | 
						|
 | 
						|
; 256-bit FP vectors are supported with AVX.
 | 
						|
 | 
						|
define <8 x float> @mload_constmask_v8f32(<8 x float>* %addr, <8 x float> %dst) {
 | 
						|
; AVX-LABEL: mload_constmask_v8f32:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vmovaps {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,0,0,0,0,0]
 | 
						|
; AVX-NEXT:    vmaskmovps (%rdi), %ymm1, %ymm1
 | 
						|
; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: mload_constmask_v8f32:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
 | 
						|
; AVX512F-NEXT:    movw $7, %ax
 | 
						|
; AVX512F-NEXT:    kmovw %eax, %k1
 | 
						|
; AVX512F-NEXT:    vmovups (%rdi), %zmm0 {%k1}
 | 
						|
; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: mload_constmask_v8f32:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    movb $7, %al
 | 
						|
; SKX-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX-NEXT:    vmovups (%rdi), %ymm0 {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x float> %dst)
 | 
						|
  ret <8 x float> %res
 | 
						|
}
 | 
						|
 | 
						|
define <4 x double> @mload_constmask_v4f64(<4 x double>* %addr, <4 x double> %dst) {
 | 
						|
; AVX-LABEL: mload_constmask_v4f64:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vmovapd {{.*#+}} ymm1 = [18446744073709551615,18446744073709551615,18446744073709551615,0]
 | 
						|
; AVX-NEXT:    vmaskmovpd (%rdi), %ymm1, %ymm1
 | 
						|
; AVX-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: mload_constmask_v4f64:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vmovapd {{.*#+}} ymm1 = [18446744073709551615,18446744073709551615,18446744073709551615,0]
 | 
						|
; AVX512F-NEXT:    vmaskmovpd (%rdi), %ymm1, %ymm2
 | 
						|
; AVX512F-NEXT:    vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: mload_constmask_v4f64:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    movb $7, %al
 | 
						|
; SKX-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX-NEXT:    vmovupd (%rdi), %ymm0 {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1> <i1 1, i1 1, i1 1, i1 0>, <4 x double> %dst)
 | 
						|
  ret <4 x double> %res
 | 
						|
}
 | 
						|
 | 
						|
; 256-bit integer vectors are supported with AVX2.
 | 
						|
 | 
						|
define <8 x i32> @mload_constmask_v8i32(<8 x i32>* %addr, <8 x i32> %dst) {
 | 
						|
; AVX1-LABEL: mload_constmask_v8i32:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2],ymm0[3,4,5,6],mem[7]
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: mload_constmask_v8i32:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = mem[0,1,2],ymm0[3,4,5,6],mem[7]
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: mload_constmask_v8i32:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
 | 
						|
; AVX512F-NEXT:    movw $135, %ax
 | 
						|
; AVX512F-NEXT:    kmovw %eax, %k1
 | 
						|
; AVX512F-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1}
 | 
						|
; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: mload_constmask_v8i32:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    movb $-121, %al
 | 
						|
; SKX-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX-NEXT:    vmovdqu32 (%rdi), %ymm0 {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x i32> %dst)
 | 
						|
  ret <8 x i32> %res
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i64> @mload_constmask_v4i64(<4 x i64>* %addr, <4 x i64> %dst) {
 | 
						|
; AVX1-LABEL: mload_constmask_v4i64:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = mem[0],ymm0[1,2],mem[3]
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: mload_constmask_v4i64:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = mem[0,1],ymm0[2,3,4,5],mem[6,7]
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: mload_constmask_v4i64:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [18446744073709551615,0,0,18446744073709551615]
 | 
						|
; AVX512F-NEXT:    vpmaskmovq (%rdi), %ymm1, %ymm2
 | 
						|
; AVX512F-NEXT:    vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: mload_constmask_v4i64:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    movb $9, %al
 | 
						|
; SKX-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX-NEXT:    vmovdqu64 (%rdi), %ymm0 {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x i64> %dst)
 | 
						|
  ret <4 x i64> %res
 | 
						|
}
 | 
						|
 | 
						|
; 512-bit FP vectors are supported with AVX512.
 | 
						|
 | 
						|
define <8 x double> @mload_constmask_v8f64(<8 x double>* %addr, <8 x double> %dst) {
 | 
						|
; AVX-LABEL: mload_constmask_v8f64:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vblendpd {{.*#+}} ymm1 = ymm1[0,1,2],mem[3]
 | 
						|
; AVX-NEXT:    vblendpd {{.*#+}} ymm0 = mem[0,1,2],ymm0[3]
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512-LABEL: mload_constmask_v8f64:
 | 
						|
; AVX512:       ## BB#0:
 | 
						|
; AVX512-NEXT:    movb $-121, %al
 | 
						|
; AVX512-NEXT:    kmovw %eax, %k1
 | 
						|
; AVX512-NEXT:    vmovupd (%rdi), %zmm0 {%k1}
 | 
						|
; AVX512-NEXT:    retq
 | 
						|
  %res = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x double> %dst)
 | 
						|
  ret <8 x double> %res
 | 
						|
}
 | 
						|
 | 
						|
; If the pass-through operand is undef, no blend is needed.
 | 
						|
 | 
						|
define <4 x double> @mload_constmask_v4f64_undef_passthrough(<4 x double>* %addr) {
 | 
						|
; AVX-LABEL: mload_constmask_v4f64_undef_passthrough:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,18446744073709551615,0]
 | 
						|
; AVX-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: mload_constmask_v4f64_undef_passthrough:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,18446744073709551615,0]
 | 
						|
; AVX512F-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: mload_constmask_v4f64_undef_passthrough:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    movb $7, %al
 | 
						|
; SKX-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX-NEXT:    vmovupd (%rdi), %ymm0 {%k1} {z}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1> <i1 1, i1 1, i1 1, i1 0>, <4 x double> undef)
 | 
						|
  ret <4 x double> %res
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i64> @mload_constmask_v4i64_undef_passthrough(<4 x i64>* %addr) {
 | 
						|
; AVX1-LABEL: mload_constmask_v4i64_undef_passthrough:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vmovapd {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551615,0]
 | 
						|
; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: mload_constmask_v4i64_undef_passthrough:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vmovdqa {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551615,0]
 | 
						|
; AVX2-NEXT:    vpmaskmovq (%rdi), %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: mload_constmask_v4i64_undef_passthrough:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551615,0]
 | 
						|
; AVX512F-NEXT:    vpmaskmovq (%rdi), %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: mload_constmask_v4i64_undef_passthrough:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    movb $6, %al
 | 
						|
; SKX-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX-NEXT:    vmovdqu64 (%rdi), %ymm0 {%k1} {z}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1> <i1 0, i1 1, i1 1, i1 0>, <4 x i64> undef)
 | 
						|
  ret <4 x i64> %res
 | 
						|
}
 | 
						|
 | 
						|
define void @test21(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
 | 
						|
; AVX1-LABEL: test21:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi)
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test21:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi)
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test21:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi)
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test21:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu32 %xmm1, (%rdi) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
 | 
						|
  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
;  When only one element of the mask is set, reduce to a scalar store.
 | 
						|
 | 
						|
define void @one_mask_bit_set1(<4 x i32>* %addr, <4 x i32> %val) {
 | 
						|
; AVX-LABEL: one_mask_bit_set1:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vmovd %xmm0, (%rdi)
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512-LABEL: one_mask_bit_set1:
 | 
						|
; AVX512:       ## BB#0:
 | 
						|
; AVX512-NEXT:    vmovd %xmm0, (%rdi)
 | 
						|
; AVX512-NEXT:    retq
 | 
						|
  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 false, i1 false, i1 false>)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; Choose a different element to show that the correct address offset is produced.
 | 
						|
 | 
						|
define void @one_mask_bit_set2(<4 x float>* %addr, <4 x float> %val) {
 | 
						|
; AVX-LABEL: one_mask_bit_set2:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vextractps $2, %xmm0, 8(%rdi)
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512-LABEL: one_mask_bit_set2:
 | 
						|
; AVX512:       ## BB#0:
 | 
						|
; AVX512-NEXT:    vextractps $2, %xmm0, 8(%rdi)
 | 
						|
; AVX512-NEXT:    retq
 | 
						|
  call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %val, <4 x float>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; Choose a different scalar type and a high element of a 256-bit vector because AVX doesn't support those evenly.
 | 
						|
 | 
						|
define void @one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) {
 | 
						|
; AVX-LABEL: one_mask_bit_set3:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
 | 
						|
; AVX-NEXT:    vmovlps %xmm0, 16(%rdi)
 | 
						|
; AVX-NEXT:    vzeroupper
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: one_mask_bit_set3:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vextractf128 $1, %ymm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vmovq %xmm0, 16(%rdi)
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: one_mask_bit_set3:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm0
 | 
						|
; SKX-NEXT:    vmovq %xmm0, 16(%rdi)
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %val, <4 x i64>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; Choose a different scalar type and a high element of a 256-bit vector because AVX doesn't support those evenly.
 | 
						|
 | 
						|
define void @one_mask_bit_set4(<4 x double>* %addr, <4 x double> %val) {
 | 
						|
; AVX-LABEL: one_mask_bit_set4:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
 | 
						|
; AVX-NEXT:    vmovhpd %xmm0, 24(%rdi)
 | 
						|
; AVX-NEXT:    vzeroupper
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: one_mask_bit_set4:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vextractf128 $1, %ymm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vmovhpd %xmm0, 24(%rdi)
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: one_mask_bit_set4:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vextractf32x4 $1, %ymm0, %xmm0
 | 
						|
; SKX-NEXT:    vmovhpd %xmm0, 24(%rdi)
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %val, <4 x double>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 false, i1 true>)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; Try a 512-bit vector to make sure AVX doesn't die and AVX512 works as expected.
 | 
						|
 | 
						|
define void @one_mask_bit_set5(<8 x double>* %addr, <8 x double> %val) {
 | 
						|
; AVX-LABEL: one_mask_bit_set5:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX-NEXT:    vmovlps %xmm0, 48(%rdi)
 | 
						|
; AVX-NEXT:    vzeroupper
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512-LABEL: one_mask_bit_set5:
 | 
						|
; AVX512:       ## BB#0:
 | 
						|
; AVX512-NEXT:    vextractf32x4 $3, %zmm0, %xmm0
 | 
						|
; AVX512-NEXT:    vmovlpd %xmm0, 48(%rdi)
 | 
						|
; AVX512-NEXT:    retq
 | 
						|
  call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %val, <8 x double>* %addr, i32 4, <8 x i1><i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false>)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
;  When only one element of the mask is set, reduce to a scalar load.
 | 
						|
 | 
						|
define <4 x i32> @load_one_mask_bit_set1(<4 x i32>* %addr, <4 x i32> %val) {
 | 
						|
; AVX-LABEL: load_one_mask_bit_set1:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vpinsrd $0, (%rdi), %xmm0, %xmm0
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512-LABEL: load_one_mask_bit_set1:
 | 
						|
; AVX512:       ## BB#0:
 | 
						|
; AVX512-NEXT:    vpinsrd $0, (%rdi), %xmm0, %xmm0
 | 
						|
; AVX512-NEXT:    retq
 | 
						|
  %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 false, i1 false, i1 false>, <4 x i32> %val)
 | 
						|
  ret <4 x i32> %res
 | 
						|
}
 | 
						|
 | 
						|
; Choose a different element to show that the correct address offset is produced.
 | 
						|
 | 
						|
define <4 x float> @load_one_mask_bit_set2(<4 x float>* %addr, <4 x float> %val) {
 | 
						|
; AVX-LABEL: load_one_mask_bit_set2:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512-LABEL: load_one_mask_bit_set2:
 | 
						|
; AVX512:       ## BB#0:
 | 
						|
; AVX512-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
 | 
						|
; AVX512-NEXT:    retq
 | 
						|
  %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>, <4 x float> %val)
 | 
						|
  ret <4 x float> %res
 | 
						|
}
 | 
						|
 | 
						|
; Choose a different scalar type and a high element of a 256-bit vector because AVX doesn't support those evenly.
 | 
						|
 | 
						|
define <4 x i64> @load_one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) {
 | 
						|
; AVX1-LABEL: load_one_mask_bit_set3:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 | 
						|
; AVX1-NEXT:    vpinsrq $0, 16(%rdi), %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: load_one_mask_bit_set3:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX2-NEXT:    vpinsrq $0, 16(%rdi), %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: load_one_mask_bit_set3:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrq $0, 16(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: load_one_mask_bit_set3:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm1
 | 
						|
; SKX-NEXT:    vpinsrq $0, 16(%rdi), %xmm1, %xmm1
 | 
						|
; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>, <4 x i64> %val)
 | 
						|
  ret <4 x i64> %res
 | 
						|
}
 | 
						|
 | 
						|
; Choose a different scalar type and a high element of a 256-bit vector because AVX doesn't support those evenly.
 | 
						|
 | 
						|
define <4 x double> @load_one_mask_bit_set4(<4 x double>* %addr, <4 x double> %val) {
 | 
						|
; AVX-LABEL: load_one_mask_bit_set4:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
 | 
						|
; AVX-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
 | 
						|
; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: load_one_mask_bit_set4:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vextractf128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
 | 
						|
; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: load_one_mask_bit_set4:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vextractf32x4 $1, %ymm0, %xmm1
 | 
						|
; SKX-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
 | 
						|
; SKX-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 false, i1 true>, <4 x double> %val)
 | 
						|
  ret <4 x double> %res
 | 
						|
}
 | 
						|
 | 
						|
; Try a 512-bit vector to make sure AVX doesn't die and AVX512 works as expected.
 | 
						|
 | 
						|
define <8 x double> @load_one_mask_bit_set5(<8 x double>* %addr, <8 x double> %val) {
 | 
						|
; AVX-LABEL: load_one_mask_bit_set5:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
 | 
						|
; AVX-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 | 
						|
; AVX-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512-LABEL: load_one_mask_bit_set5:
 | 
						|
; AVX512:       ## BB#0:
 | 
						|
; AVX512-NEXT:    vextractf32x4 $3, %zmm0, %xmm1
 | 
						|
; AVX512-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
 | 
						|
; AVX512-NEXT:    vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
 | 
						|
; AVX512-NEXT:    retq
 | 
						|
  %res = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %addr, i32 4, <8 x i1><i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x double> %val)
 | 
						|
  ret <8 x double> %res
 | 
						|
}
 | 
						|
 | 
						|
declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
 | 
						|
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
 | 
						|
declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
 | 
						|
declare <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>*, i32, <4 x i1>, <4 x i64>)
 | 
						|
declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
 | 
						|
declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
 | 
						|
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
 | 
						|
declare void @llvm.masked.store.v4i64.p0v4i64(<4 x i64>, <4 x i64>*, i32, <4 x i1>)
 | 
						|
declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
 | 
						|
declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
 | 
						|
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>)
 | 
						|
declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
 | 
						|
declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
 | 
						|
declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
 | 
						|
declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>)
 | 
						|
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
 | 
						|
declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
 | 
						|
declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
 | 
						|
declare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
 | 
						|
declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
 | 
						|
declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
 | 
						|
declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>)
 | 
						|
declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
 | 
						|
declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
 | 
						|
 | 
						|
declare <16 x i32*> @llvm.masked.load.v16p0i32.p0v16p0i32(<16 x i32*>*, i32, <16 x i1>, <16 x i32*>)
 | 
						|
 | 
						|
define <16 x i32*> @test23(<16 x i32*> %trigger, <16 x i32*>* %addr) {
 | 
						|
; AVX1-LABEL: test23:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
 | 
						|
; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
 | 
						|
; AVX1-NEXT:    vpcmpeqq %xmm5, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqq %xmm5, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqq %xmm5, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqq %xmm5, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqq %xmm5, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqq %xmm5, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqq %xmm5, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpeqq %xmm5, %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:    vmaskmovpd 96(%rdi), %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:    vmaskmovpd 64(%rdi), %ymm2, %ymm2
 | 
						|
; AVX1-NEXT:    vmaskmovpd 32(%rdi), %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test23:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpxor %ymm4, %ymm4, %ymm4
 | 
						|
; AVX2-NEXT:    vpcmpeqq %ymm4, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpcmpeqq %ymm4, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:    vpcmpeqq %ymm4, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:    vpcmpeqq %ymm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:    vpmaskmovq 96(%rdi), %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:    vpmaskmovq 64(%rdi), %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:    vpmaskmovq 32(%rdi), %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:    vpmaskmovq (%rdi), %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512-LABEL: test23:
 | 
						|
; AVX512:       ## BB#0:
 | 
						|
; AVX512-NEXT:    vpxord %zmm2, %zmm2, %zmm2
 | 
						|
; AVX512-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1
 | 
						|
; AVX512-NEXT:    vpcmpeqq %zmm2, %zmm1, %k2
 | 
						|
; AVX512-NEXT:    vmovdqu64 64(%rdi), %zmm1 {%k2} {z}
 | 
						|
; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z}
 | 
						|
; AVX512-NEXT:    retq
 | 
						|
  %mask = icmp eq <16 x i32*> %trigger, zeroinitializer
 | 
						|
  %res = call <16 x i32*> @llvm.masked.load.v16p0i32.p0v16p0i32(<16 x i32*>* %addr, i32 4, <16 x i1>%mask, <16 x i32*>zeroinitializer)
 | 
						|
  ret <16 x i32*> %res
 | 
						|
}
 | 
						|
 | 
						|
%mystruct = type { i16, i16, [1 x i8*] }
 | 
						|
 | 
						|
declare <16 x %mystruct*> @llvm.masked.load.v16p0mystruct.p0v16p0mystruct(<16 x %mystruct*>*, i32, <16 x i1>, <16 x %mystruct*>)
 | 
						|
 | 
						|
define <16 x %mystruct*> @test24(<16 x i1> %mask, <16 x %mystruct*>* %addr) {
 | 
						|
; AVX1-LABEL: test24:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm1, %ymm4
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovpd 96(%rdi), %ymm1, %ymm3
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovpd 64(%rdi), %ymm1, %ymm2
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovpd 32(%rdi), %ymm0, %ymm1
 | 
						|
; AVX1-NEXT:    vmovapd %ymm4, %ymm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test24:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
 | 
						|
; AVX2-NEXT:    vpmaskmovq (%rdi), %ymm1, %ymm4
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
 | 
						|
; AVX2-NEXT:    vpmaskmovq 96(%rdi), %ymm1, %ymm3
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
 | 
						|
; AVX2-NEXT:    vpmaskmovq 64(%rdi), %ymm1, %ymm2
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpmaskmovq 32(%rdi), %ymm0, %ymm1
 | 
						|
; AVX2-NEXT:    vmovdqa %ymm4, %ymm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test24:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
 | 
						|
; AVX512F-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z}
 | 
						|
; AVX512F-NEXT:    kshiftrw $8, %k1, %k1
 | 
						|
; AVX512F-NEXT:    vmovdqu64 64(%rdi), %zmm1 {%k1} {z}
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test24:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovb2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z}
 | 
						|
; SKX-NEXT:    kshiftrw $8, %k1, %k1
 | 
						|
; SKX-NEXT:    vmovdqu64 64(%rdi), %zmm1 {%k1} {z}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <16 x %mystruct*> @llvm.masked.load.v16p0mystruct.p0v16p0mystruct(<16 x %mystruct*>* %addr, i32 4, <16 x i1>%mask, <16 x %mystruct*>zeroinitializer)
 | 
						|
  ret <16 x %mystruct*> %res
 | 
						|
}
 | 
						|
 | 
						|
define void @test_store_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0)  {
 | 
						|
; AVX1-LABEL: test_store_16i64:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm5, %xmm5
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm5, %xmm5
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm5, %xmm6
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm5, %xmm5
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
 | 
						|
; AVX1-NEXT:    vmaskmovpd %ymm1, %ymm5, (%rdi)
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm5
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm5, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovpd %ymm4, %ymm1, 96(%rdi)
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm4
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovpd %ymm3, %ymm1, 64(%rdi)
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovpd %ymm2, %ymm0, 32(%rdi)
 | 
						|
; AVX1-NEXT:    vzeroupper
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test_store_16i64:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm5, %xmm5
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm5, %xmm5
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm5, %ymm5
 | 
						|
; AVX2-NEXT:    vpmaskmovq %ymm1, %ymm5, (%rdi)
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
 | 
						|
; AVX2-NEXT:    vpmaskmovq %ymm4, %ymm1, 96(%rdi)
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
 | 
						|
; AVX2-NEXT:    vpmaskmovq %ymm3, %ymm1, 64(%rdi)
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpmaskmovq %ymm2, %ymm0, 32(%rdi)
 | 
						|
; AVX2-NEXT:    vzeroupper
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_store_16i64:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
 | 
						|
; AVX512F-NEXT:    vmovdqu64 %zmm1, (%rdi) {%k1}
 | 
						|
; AVX512F-NEXT:    kshiftrw $8, %k1, %k1
 | 
						|
; AVX512F-NEXT:    vmovdqu64 %zmm2, 64(%rdi) {%k1}
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_store_16i64:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovb2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu64 %zmm1, (%rdi) {%k1}
 | 
						|
; SKX-NEXT:    kshiftrw $8, %k1, %k1
 | 
						|
; SKX-NEXT:    vmovdqu64 %zmm2, 64(%rdi) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  call void @llvm.masked.store.v16i64.p0v16i64(<16 x i64> %src0, <16 x i64>* %ptrs, i32 4, <16 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
declare void @llvm.masked.store.v16i64.p0v16i64(<16 x i64> %src0, <16 x i64>* %ptrs, i32, <16 x i1> %mask)
 | 
						|
 | 
						|
define void @test_store_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0)  {
 | 
						|
; AVX1-LABEL: test_store_16f64:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm5, %xmm5
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm5, %xmm5
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm5, %xmm6
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm5, %xmm5
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
 | 
						|
; AVX1-NEXT:    vmaskmovpd %ymm1, %ymm5, (%rdi)
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm5
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm5, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovpd %ymm4, %ymm1, 96(%rdi)
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm4
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovpd %ymm3, %ymm1, 64(%rdi)
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovpd %ymm2, %ymm0, 32(%rdi)
 | 
						|
; AVX1-NEXT:    vzeroupper
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test_store_16f64:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm5, %xmm5
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm5, %xmm5
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm5, %ymm5
 | 
						|
; AVX2-NEXT:    vmaskmovpd %ymm1, %ymm5, (%rdi)
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
 | 
						|
; AVX2-NEXT:    vmaskmovpd %ymm4, %ymm1, 96(%rdi)
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
 | 
						|
; AVX2-NEXT:    vmaskmovpd %ymm3, %ymm1, 64(%rdi)
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
 | 
						|
; AVX2-NEXT:    vmaskmovpd %ymm2, %ymm0, 32(%rdi)
 | 
						|
; AVX2-NEXT:    vzeroupper
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_store_16f64:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
 | 
						|
; AVX512F-NEXT:    vmovupd %zmm1, (%rdi) {%k1}
 | 
						|
; AVX512F-NEXT:    kshiftrw $8, %k1, %k1
 | 
						|
; AVX512F-NEXT:    vmovupd %zmm2, 64(%rdi) {%k1}
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_store_16f64:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovb2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovupd %zmm1, (%rdi) {%k1}
 | 
						|
; SKX-NEXT:    kshiftrw $8, %k1, %k1
 | 
						|
; SKX-NEXT:    vmovupd %zmm2, 64(%rdi) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  call void @llvm.masked.store.v16f64.p0v16f64(<16 x double> %src0, <16 x double>* %ptrs, i32 4, <16 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
declare void @llvm.masked.store.v16f64.p0v16f64(<16 x double> %src0, <16 x double>* %ptrs, i32, <16 x i1> %mask)
 | 
						|
 | 
						|
define <16 x i64> @test_load_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0)  {
 | 
						|
; AVX1-LABEL: test_load_16i64:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm5, %xmm5
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm5, %xmm5
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm5, %xmm6
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm5, %xmm5
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
 | 
						|
; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm5, %ymm6
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm5, %ymm6, %ymm1, %ymm5
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm6
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm6, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovpd 32(%rdi), %ymm1, %ymm6
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm1, %ymm6, %ymm2, %ymm1
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm6
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm6, %ymm2
 | 
						|
; AVX1-NEXT:    vmaskmovpd 64(%rdi), %ymm2, %ymm6
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm2, %ymm6, %ymm3, %ymm2
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm3
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovpd 96(%rdi), %ymm0, %ymm3
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm0, %ymm3, %ymm4, %ymm3
 | 
						|
; AVX1-NEXT:    vmovapd %ymm5, %ymm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test_load_16i64:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm5, %xmm5
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm5, %xmm5
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm5, %ymm5
 | 
						|
; AVX2-NEXT:    vpmaskmovq (%rdi), %ymm5, %ymm6
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm5, %ymm6, %ymm1, %ymm5
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
 | 
						|
; AVX2-NEXT:    vpmaskmovq 32(%rdi), %ymm1, %ymm6
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm1, %ymm6, %ymm2, %ymm1
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm2, %ymm2
 | 
						|
; AVX2-NEXT:    vpmaskmovq 64(%rdi), %ymm2, %ymm6
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm2, %ymm6, %ymm3, %ymm2
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpmaskmovq 96(%rdi), %ymm0, %ymm3
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm0, %ymm3, %ymm4, %ymm3
 | 
						|
; AVX2-NEXT:    vmovapd %ymm5, %ymm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_load_16i64:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
 | 
						|
; AVX512F-NEXT:    vmovdqu64 (%rdi), %zmm1 {%k1}
 | 
						|
; AVX512F-NEXT:    kshiftrw $8, %k1, %k1
 | 
						|
; AVX512F-NEXT:    vmovdqu64 64(%rdi), %zmm2 {%k1}
 | 
						|
; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; AVX512F-NEXT:    vmovaps %zmm2, %zmm1
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_load_16i64:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovb2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu64 (%rdi), %zmm1 {%k1}
 | 
						|
; SKX-NEXT:    kshiftrw $8, %k1, %k1
 | 
						|
; SKX-NEXT:    vmovdqu64 64(%rdi), %zmm2 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    vmovaps %zmm2, %zmm1
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64>* %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
 | 
						|
  ret <16 x i64> %res
 | 
						|
}
 | 
						|
declare <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64>* %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
 | 
						|
 | 
						|
define <16 x double> @test_load_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0)  {
 | 
						|
; AVX1-LABEL: test_load_16f64:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm5, %xmm5
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm5, %xmm5
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm5, %xmm6
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm5, %xmm5
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
 | 
						|
; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm5, %ymm6
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm5, %ymm6, %ymm1, %ymm5
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm6
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm6, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovpd 32(%rdi), %ymm1, %ymm6
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm1, %ymm6, %ymm2, %ymm1
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm6
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm6, %ymm2
 | 
						|
; AVX1-NEXT:    vmaskmovpd 64(%rdi), %ymm2, %ymm6
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm2, %ymm6, %ymm3, %ymm2
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm3
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovpd 96(%rdi), %ymm0, %ymm3
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm0, %ymm3, %ymm4, %ymm3
 | 
						|
; AVX1-NEXT:    vmovapd %ymm5, %ymm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test_load_16f64:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm5, %xmm5
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm5, %xmm5
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm5, %ymm5
 | 
						|
; AVX2-NEXT:    vmaskmovpd (%rdi), %ymm5, %ymm6
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm5, %ymm6, %ymm1, %ymm5
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
 | 
						|
; AVX2-NEXT:    vmaskmovpd 32(%rdi), %ymm1, %ymm6
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm1, %ymm6, %ymm2, %ymm1
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm2, %ymm2
 | 
						|
; AVX2-NEXT:    vmaskmovpd 64(%rdi), %ymm2, %ymm6
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm2, %ymm6, %ymm3, %ymm2
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
 | 
						|
; AVX2-NEXT:    vmaskmovpd 96(%rdi), %ymm0, %ymm3
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm0, %ymm3, %ymm4, %ymm3
 | 
						|
; AVX2-NEXT:    vmovapd %ymm5, %ymm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_load_16f64:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
 | 
						|
; AVX512F-NEXT:    vmovupd (%rdi), %zmm1 {%k1}
 | 
						|
; AVX512F-NEXT:    kshiftrw $8, %k1, %k1
 | 
						|
; AVX512F-NEXT:    vmovupd 64(%rdi), %zmm2 {%k1}
 | 
						|
; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; AVX512F-NEXT:    vmovaps %zmm2, %zmm1
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_load_16f64:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovb2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovupd (%rdi), %zmm1 {%k1}
 | 
						|
; SKX-NEXT:    kshiftrw $8, %k1, %k1
 | 
						|
; SKX-NEXT:    vmovupd 64(%rdi), %zmm2 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    vmovaps %zmm2, %zmm1
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
 | 
						|
  ret <16 x double> %res
 | 
						|
}
 | 
						|
declare <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
 | 
						|
 | 
						|
define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 x double> %src0)  {
 | 
						|
; AVX1-LABEL: test_load_32f64:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    pushq %rbp
 | 
						|
; AVX1-NEXT:  Ltmp0:
 | 
						|
; AVX1-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; AVX1-NEXT:  Ltmp1:
 | 
						|
; AVX1-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; AVX1-NEXT:    movq %rsp, %rbp
 | 
						|
; AVX1-NEXT:  Ltmp2:
 | 
						|
; AVX1-NEXT:    .cfi_def_cfa_register %rbp
 | 
						|
; AVX1-NEXT:    andq $-32, %rsp
 | 
						|
; AVX1-NEXT:    subq $32, %rsp
 | 
						|
; AVX1-NEXT:    vmovapd 16(%rbp), %ymm8
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm9 = xmm0[1,1,2,3]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm9 = xmm9[0],zero,zero,zero,xmm9[1],zero,zero,zero,xmm9[2],zero,zero,zero,xmm9[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm9, %xmm9
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm9, %xmm9
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm9, %xmm10
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm9 = xmm9[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm9, %xmm9
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm9, %ymm10, %ymm9
 | 
						|
; AVX1-NEXT:    vmaskmovpd 32(%rsi), %ymm9, %ymm10
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm9, %ymm10, %ymm2, %ymm9
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm10
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm10, %ymm2
 | 
						|
; AVX1-NEXT:    vmaskmovpd 64(%rsi), %ymm2, %ymm10
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm2, %ymm10, %ymm3, %ymm11
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm10
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm10, %ymm2
 | 
						|
; AVX1-NEXT:    vmaskmovpd 96(%rsi), %ymm2, %ymm10
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm2, %ymm10, %ymm4, %ymm4
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,2,3]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm10
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm10, %ymm3
 | 
						|
; AVX1-NEXT:    vmaskmovpd 160(%rsi), %ymm3, %ymm10
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm3, %ymm10, %ymm6, %ymm6
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm10
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm10, %ymm3
 | 
						|
; AVX1-NEXT:    vmaskmovpd 192(%rsi), %ymm3, %ymm10
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm3, %ymm10, %ymm7, %ymm7
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[3,1,2,3]
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm10
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm10, %ymm3
 | 
						|
; AVX1-NEXT:    vmaskmovpd 224(%rsi), %ymm3, %ymm10
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm3, %ymm10, %ymm8, %ymm3
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm8
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm8, %ymm0
 | 
						|
; AVX1-NEXT:    vmaskmovpd (%rsi), %ymm0, %ymm8
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm0, %ymm8, %ymm1, %ymm0
 | 
						|
; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 | 
						|
; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
 | 
						|
; AVX1-NEXT:    vmaskmovpd 128(%rsi), %ymm1, %ymm2
 | 
						|
; AVX1-NEXT:    vblendvpd %ymm1, %ymm2, %ymm5, %ymm1
 | 
						|
; AVX1-NEXT:    vmovapd %ymm1, 128(%rdi)
 | 
						|
; AVX1-NEXT:    vmovapd %ymm0, (%rdi)
 | 
						|
; AVX1-NEXT:    vmovapd %ymm3, 224(%rdi)
 | 
						|
; AVX1-NEXT:    vmovapd %ymm7, 192(%rdi)
 | 
						|
; AVX1-NEXT:    vmovapd %ymm6, 160(%rdi)
 | 
						|
; AVX1-NEXT:    vmovapd %ymm4, 96(%rdi)
 | 
						|
; AVX1-NEXT:    vmovapd %ymm11, 64(%rdi)
 | 
						|
; AVX1-NEXT:    vmovapd %ymm9, 32(%rdi)
 | 
						|
; AVX1-NEXT:    movq %rdi, %rax
 | 
						|
; AVX1-NEXT:    movq %rbp, %rsp
 | 
						|
; AVX1-NEXT:    popq %rbp
 | 
						|
; AVX1-NEXT:    vzeroupper
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test_load_32f64:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    pushq %rbp
 | 
						|
; AVX2-NEXT:  Ltmp0:
 | 
						|
; AVX2-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; AVX2-NEXT:  Ltmp1:
 | 
						|
; AVX2-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; AVX2-NEXT:    movq %rsp, %rbp
 | 
						|
; AVX2-NEXT:  Ltmp2:
 | 
						|
; AVX2-NEXT:    .cfi_def_cfa_register %rbp
 | 
						|
; AVX2-NEXT:    andq $-32, %rsp
 | 
						|
; AVX2-NEXT:    subq $32, %rsp
 | 
						|
; AVX2-NEXT:    vmovapd 16(%rbp), %ymm8
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm9 = xmm0[1,1,2,3]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm9 = xmm9[0],zero,zero,zero,xmm9[1],zero,zero,zero,xmm9[2],zero,zero,zero,xmm9[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm9, %xmm9
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm9, %xmm9
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm9, %ymm9
 | 
						|
; AVX2-NEXT:    vmaskmovpd 32(%rsi), %ymm9, %ymm10
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm9, %ymm10, %ymm2, %ymm9
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm2, %ymm2
 | 
						|
; AVX2-NEXT:    vmaskmovpd 64(%rsi), %ymm2, %ymm10
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm2, %ymm10, %ymm3, %ymm11
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm2, %ymm2
 | 
						|
; AVX2-NEXT:    vmaskmovpd 96(%rsi), %ymm2, %ymm10
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm2, %ymm10, %ymm4, %ymm4
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,2,3]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm3, %ymm3
 | 
						|
; AVX2-NEXT:    vmaskmovpd 160(%rsi), %ymm3, %ymm10
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm3, %ymm10, %ymm6, %ymm3
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm6 = xmm2[2,3,0,1]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm6, %ymm6
 | 
						|
; AVX2-NEXT:    vmaskmovpd 192(%rsi), %ymm6, %ymm10
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm6, %ymm10, %ymm7, %ymm6
 | 
						|
; AVX2-NEXT:    vpshufd {{.*#+}} xmm7 = xmm2[3,1,2,3]
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm7, %ymm7
 | 
						|
; AVX2-NEXT:    vmaskmovpd 224(%rsi), %ymm7, %ymm10
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm7, %ymm10, %ymm8, %ymm7
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
 | 
						|
; AVX2-NEXT:    vmaskmovpd (%rsi), %ymm0, %ymm8
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm0, %ymm8, %ymm1, %ymm0
 | 
						|
; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
 | 
						|
; AVX2-NEXT:    vmaskmovpd 128(%rsi), %ymm1, %ymm2
 | 
						|
; AVX2-NEXT:    vblendvpd %ymm1, %ymm2, %ymm5, %ymm1
 | 
						|
; AVX2-NEXT:    vmovapd %ymm1, 128(%rdi)
 | 
						|
; AVX2-NEXT:    vmovapd %ymm0, (%rdi)
 | 
						|
; AVX2-NEXT:    vmovapd %ymm7, 224(%rdi)
 | 
						|
; AVX2-NEXT:    vmovapd %ymm6, 192(%rdi)
 | 
						|
; AVX2-NEXT:    vmovapd %ymm3, 160(%rdi)
 | 
						|
; AVX2-NEXT:    vmovapd %ymm4, 96(%rdi)
 | 
						|
; AVX2-NEXT:    vmovapd %ymm11, 64(%rdi)
 | 
						|
; AVX2-NEXT:    vmovapd %ymm9, 32(%rdi)
 | 
						|
; AVX2-NEXT:    movq %rdi, %rax
 | 
						|
; AVX2-NEXT:    movq %rbp, %rsp
 | 
						|
; AVX2-NEXT:    popq %rbp
 | 
						|
; AVX2-NEXT:    vzeroupper
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_load_32f64:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vextractf128 $1, %ymm0, %xmm5
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm5, %zmm5
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm5, %zmm5
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm5, %zmm5, %k1
 | 
						|
; AVX512F-NEXT:    vmovupd 128(%rdi), %zmm3 {%k1}
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k2
 | 
						|
; AVX512F-NEXT:    vmovupd (%rdi), %zmm1 {%k2}
 | 
						|
; AVX512F-NEXT:    kshiftrw $8, %k1, %k1
 | 
						|
; AVX512F-NEXT:    vmovupd 192(%rdi), %zmm4 {%k1}
 | 
						|
; AVX512F-NEXT:    kshiftrw $8, %k2, %k1
 | 
						|
; AVX512F-NEXT:    vmovupd 64(%rdi), %zmm2 {%k1}
 | 
						|
; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; AVX512F-NEXT:    vmovaps %zmm2, %zmm1
 | 
						|
; AVX512F-NEXT:    vmovaps %zmm3, %zmm2
 | 
						|
; AVX512F-NEXT:    vmovaps %zmm4, %zmm3
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_load_32f64:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
 | 
						|
; SKX-NEXT:    vpmovb2m %ymm0, %k1
 | 
						|
; SKX-NEXT:    vmovupd (%rdi), %zmm1 {%k1}
 | 
						|
; SKX-NEXT:    kshiftrd $16, %k1, %k2
 | 
						|
; SKX-NEXT:    vmovupd 128(%rdi), %zmm3 {%k2}
 | 
						|
; SKX-NEXT:    kshiftrw $8, %k1, %k1
 | 
						|
; SKX-NEXT:    vmovupd 64(%rdi), %zmm2 {%k1}
 | 
						|
; SKX-NEXT:    kshiftrw $8, %k2, %k1
 | 
						|
; SKX-NEXT:    vmovupd 192(%rdi), %zmm4 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    vmovaps %zmm2, %zmm1
 | 
						|
; SKX-NEXT:    vmovaps %zmm3, %zmm2
 | 
						|
; SKX-NEXT:    vmovaps %zmm4, %zmm3
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
 | 
						|
  ret <32 x double> %res
 | 
						|
}
 | 
						|
 | 
						|
declare <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32, <32 x i1> %mask, <32 x double> %src0)
 | 
						|
 | 
						|
define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) {
 | 
						|
; AVX-LABEL: test_mask_load_16xi8:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX-NEXT:    ## implicit-def: %XMM1
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB50_2
 | 
						|
; AVX-NEXT:  ## BB#1: ## %cond.load
 | 
						|
; AVX-NEXT:    movzbl (%rdi), %eax
 | 
						|
; AVX-NEXT:    vmovd %eax, %xmm1
 | 
						|
; AVX-NEXT:  LBB50_2: ## %else
 | 
						|
; AVX-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB50_4
 | 
						|
; AVX-NEXT:  ## BB#3: ## %cond.load1
 | 
						|
; AVX-NEXT:    vpinsrb $1, 1(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB50_4: ## %else2
 | 
						|
; AVX-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB50_6
 | 
						|
; AVX-NEXT:  ## BB#5: ## %cond.load4
 | 
						|
; AVX-NEXT:    vpinsrb $2, 2(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB50_6: ## %else5
 | 
						|
; AVX-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB50_8
 | 
						|
; AVX-NEXT:  ## BB#7: ## %cond.load7
 | 
						|
; AVX-NEXT:    vpinsrb $3, 3(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB50_8: ## %else8
 | 
						|
; AVX-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB50_10
 | 
						|
; AVX-NEXT:  ## BB#9: ## %cond.load10
 | 
						|
; AVX-NEXT:    vpinsrb $4, 4(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB50_10: ## %else11
 | 
						|
; AVX-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB50_12
 | 
						|
; AVX-NEXT:  ## BB#11: ## %cond.load13
 | 
						|
; AVX-NEXT:    vpinsrb $5, 5(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB50_12: ## %else14
 | 
						|
; AVX-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB50_14
 | 
						|
; AVX-NEXT:  ## BB#13: ## %cond.load16
 | 
						|
; AVX-NEXT:    vpinsrb $6, 6(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB50_14: ## %else17
 | 
						|
; AVX-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB50_16
 | 
						|
; AVX-NEXT:  ## BB#15: ## %cond.load19
 | 
						|
; AVX-NEXT:    vpinsrb $7, 7(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB50_16: ## %else20
 | 
						|
; AVX-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB50_18
 | 
						|
; AVX-NEXT:  ## BB#17: ## %cond.load22
 | 
						|
; AVX-NEXT:    vpinsrb $8, 8(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB50_18: ## %else23
 | 
						|
; AVX-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB50_20
 | 
						|
; AVX-NEXT:  ## BB#19: ## %cond.load25
 | 
						|
; AVX-NEXT:    vpinsrb $9, 9(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB50_20: ## %else26
 | 
						|
; AVX-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB50_22
 | 
						|
; AVX-NEXT:  ## BB#21: ## %cond.load28
 | 
						|
; AVX-NEXT:    vpinsrb $10, 10(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB50_22: ## %else29
 | 
						|
; AVX-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB50_24
 | 
						|
; AVX-NEXT:  ## BB#23: ## %cond.load31
 | 
						|
; AVX-NEXT:    vpinsrb $11, 11(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB50_24: ## %else32
 | 
						|
; AVX-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB50_26
 | 
						|
; AVX-NEXT:  ## BB#25: ## %cond.load34
 | 
						|
; AVX-NEXT:    vpinsrb $12, 12(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB50_26: ## %else35
 | 
						|
; AVX-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB50_28
 | 
						|
; AVX-NEXT:  ## BB#27: ## %cond.load37
 | 
						|
; AVX-NEXT:    vpinsrb $13, 13(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB50_28: ## %else38
 | 
						|
; AVX-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB50_30
 | 
						|
; AVX-NEXT:  ## BB#29: ## %cond.load40
 | 
						|
; AVX-NEXT:    vpinsrb $14, 14(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB50_30: ## %else41
 | 
						|
; AVX-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB50_32
 | 
						|
; AVX-NEXT:  ## BB#31: ## %cond.load43
 | 
						|
; AVX-NEXT:    vpinsrb $15, 15(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB50_32: ## %else44
 | 
						|
; AVX-NEXT:    vpsllw $7, %xmm0, %xmm0
 | 
						|
; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 | 
						|
; AVX-NEXT:    vpblendvb %xmm0, %xmm1, %xmm0, %xmm0
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_mask_load_16xi8:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    ## implicit-def: %XMM0
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB50_2
 | 
						|
; AVX512F-NEXT:  ## BB#1: ## %cond.load
 | 
						|
; AVX512F-NEXT:    movzbl (%rdi), %eax
 | 
						|
; AVX512F-NEXT:    vmovd %eax, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB50_2: ## %else
 | 
						|
; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB50_4
 | 
						|
; AVX512F-NEXT:  ## BB#3: ## %cond.load1
 | 
						|
; AVX512F-NEXT:    vpinsrb $1, 1(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB50_4: ## %else2
 | 
						|
; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB50_6
 | 
						|
; AVX512F-NEXT:  ## BB#5: ## %cond.load4
 | 
						|
; AVX512F-NEXT:    vpinsrb $2, 2(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB50_6: ## %else5
 | 
						|
; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB50_8
 | 
						|
; AVX512F-NEXT:  ## BB#7: ## %cond.load7
 | 
						|
; AVX512F-NEXT:    vpinsrb $3, 3(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB50_8: ## %else8
 | 
						|
; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB50_10
 | 
						|
; AVX512F-NEXT:  ## BB#9: ## %cond.load10
 | 
						|
; AVX512F-NEXT:    vpinsrb $4, 4(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB50_10: ## %else11
 | 
						|
; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB50_12
 | 
						|
; AVX512F-NEXT:  ## BB#11: ## %cond.load13
 | 
						|
; AVX512F-NEXT:    vpinsrb $5, 5(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB50_12: ## %else14
 | 
						|
; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB50_14
 | 
						|
; AVX512F-NEXT:  ## BB#13: ## %cond.load16
 | 
						|
; AVX512F-NEXT:    vpinsrb $6, 6(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB50_14: ## %else17
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB50_16
 | 
						|
; AVX512F-NEXT:  ## BB#15: ## %cond.load19
 | 
						|
; AVX512F-NEXT:    vpinsrb $7, 7(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB50_16: ## %else20
 | 
						|
; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB50_18
 | 
						|
; AVX512F-NEXT:  ## BB#17: ## %cond.load22
 | 
						|
; AVX512F-NEXT:    vpinsrb $8, 8(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB50_18: ## %else23
 | 
						|
; AVX512F-NEXT:    kshiftlw $6, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB50_20
 | 
						|
; AVX512F-NEXT:  ## BB#19: ## %cond.load25
 | 
						|
; AVX512F-NEXT:    vpinsrb $9, 9(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB50_20: ## %else26
 | 
						|
; AVX512F-NEXT:    kshiftlw $5, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB50_22
 | 
						|
; AVX512F-NEXT:  ## BB#21: ## %cond.load28
 | 
						|
; AVX512F-NEXT:    vpinsrb $10, 10(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB50_22: ## %else29
 | 
						|
; AVX512F-NEXT:    kshiftlw $4, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB50_24
 | 
						|
; AVX512F-NEXT:  ## BB#23: ## %cond.load31
 | 
						|
; AVX512F-NEXT:    vpinsrb $11, 11(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB50_24: ## %else32
 | 
						|
; AVX512F-NEXT:    kshiftlw $3, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB50_26
 | 
						|
; AVX512F-NEXT:  ## BB#25: ## %cond.load34
 | 
						|
; AVX512F-NEXT:    vpinsrb $12, 12(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB50_26: ## %else35
 | 
						|
; AVX512F-NEXT:    kshiftlw $2, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB50_28
 | 
						|
; AVX512F-NEXT:  ## BB#27: ## %cond.load37
 | 
						|
; AVX512F-NEXT:    vpinsrb $13, 13(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB50_28: ## %else38
 | 
						|
; AVX512F-NEXT:    kshiftlw $1, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB50_30
 | 
						|
; AVX512F-NEXT:  ## BB#29: ## %cond.load40
 | 
						|
; AVX512F-NEXT:    vpinsrb $14, 14(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB50_30: ## %else41
 | 
						|
; AVX512F-NEXT:    kshiftlw $0, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB50_32
 | 
						|
; AVX512F-NEXT:  ## BB#31: ## %cond.load43
 | 
						|
; AVX512F-NEXT:    vpinsrb $15, 15(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB50_32: ## %else44
 | 
						|
; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
 | 
						|
; AVX512F-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
 | 
						|
; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vpblendvb %xmm1, %xmm0, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_mask_load_16xi8:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovb2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu8 (%rdi), %xmm0 {%k1} {z}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %addr, i32 4, <16 x i1>%mask, <16 x i8> undef)
 | 
						|
  ret <16 x i8> %res
 | 
						|
}
 | 
						|
declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
 | 
						|
 | 
						|
define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) {
 | 
						|
; AVX1-LABEL: test_mask_load_32xi8:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    ## implicit-def: %YMM1
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_2
 | 
						|
; AVX1-NEXT:  ## BB#1: ## %cond.load
 | 
						|
; AVX1-NEXT:    movzbl (%rdi), %eax
 | 
						|
; AVX1-NEXT:    vmovd %eax, %xmm1
 | 
						|
; AVX1-NEXT:  LBB51_2: ## %else
 | 
						|
; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_4
 | 
						|
; AVX1-NEXT:  ## BB#3: ## %cond.load1
 | 
						|
; AVX1-NEXT:    vpinsrb $1, 1(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB51_4: ## %else2
 | 
						|
; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_6
 | 
						|
; AVX1-NEXT:  ## BB#5: ## %cond.load4
 | 
						|
; AVX1-NEXT:    vpinsrb $2, 2(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB51_6: ## %else5
 | 
						|
; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_8
 | 
						|
; AVX1-NEXT:  ## BB#7: ## %cond.load7
 | 
						|
; AVX1-NEXT:    vpinsrb $3, 3(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB51_8: ## %else8
 | 
						|
; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_10
 | 
						|
; AVX1-NEXT:  ## BB#9: ## %cond.load10
 | 
						|
; AVX1-NEXT:    vpinsrb $4, 4(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB51_10: ## %else11
 | 
						|
; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_12
 | 
						|
; AVX1-NEXT:  ## BB#11: ## %cond.load13
 | 
						|
; AVX1-NEXT:    vpinsrb $5, 5(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB51_12: ## %else14
 | 
						|
; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_14
 | 
						|
; AVX1-NEXT:  ## BB#13: ## %cond.load16
 | 
						|
; AVX1-NEXT:    vpinsrb $6, 6(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB51_14: ## %else17
 | 
						|
; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_16
 | 
						|
; AVX1-NEXT:  ## BB#15: ## %cond.load19
 | 
						|
; AVX1-NEXT:    vpinsrb $7, 7(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB51_16: ## %else20
 | 
						|
; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_18
 | 
						|
; AVX1-NEXT:  ## BB#17: ## %cond.load22
 | 
						|
; AVX1-NEXT:    vpinsrb $8, 8(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB51_18: ## %else23
 | 
						|
; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_20
 | 
						|
; AVX1-NEXT:  ## BB#19: ## %cond.load25
 | 
						|
; AVX1-NEXT:    vpinsrb $9, 9(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB51_20: ## %else26
 | 
						|
; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_22
 | 
						|
; AVX1-NEXT:  ## BB#21: ## %cond.load28
 | 
						|
; AVX1-NEXT:    vpinsrb $10, 10(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB51_22: ## %else29
 | 
						|
; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_24
 | 
						|
; AVX1-NEXT:  ## BB#23: ## %cond.load31
 | 
						|
; AVX1-NEXT:    vpinsrb $11, 11(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB51_24: ## %else32
 | 
						|
; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_26
 | 
						|
; AVX1-NEXT:  ## BB#25: ## %cond.load34
 | 
						|
; AVX1-NEXT:    vpinsrb $12, 12(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB51_26: ## %else35
 | 
						|
; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_28
 | 
						|
; AVX1-NEXT:  ## BB#27: ## %cond.load37
 | 
						|
; AVX1-NEXT:    vpinsrb $13, 13(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB51_28: ## %else38
 | 
						|
; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_30
 | 
						|
; AVX1-NEXT:  ## BB#29: ## %cond.load40
 | 
						|
; AVX1-NEXT:    vpinsrb $14, 14(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB51_30: ## %else41
 | 
						|
; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_32
 | 
						|
; AVX1-NEXT:  ## BB#31: ## %cond.load43
 | 
						|
; AVX1-NEXT:    vpinsrb $15, 15(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB51_32: ## %else44
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $0, %xmm2, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_34
 | 
						|
; AVX1-NEXT:  ## BB#33: ## %cond.load46
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $0, 16(%rdi), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB51_34: ## %else47
 | 
						|
; AVX1-NEXT:    vpextrb $1, %xmm2, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_36
 | 
						|
; AVX1-NEXT:  ## BB#35: ## %cond.load49
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $1, 17(%rdi), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB51_36: ## %else50
 | 
						|
; AVX1-NEXT:    vpextrb $2, %xmm2, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_38
 | 
						|
; AVX1-NEXT:  ## BB#37: ## %cond.load52
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $2, 18(%rdi), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB51_38: ## %else53
 | 
						|
; AVX1-NEXT:    vpextrb $3, %xmm2, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_40
 | 
						|
; AVX1-NEXT:  ## BB#39: ## %cond.load55
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $3, 19(%rdi), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB51_40: ## %else56
 | 
						|
; AVX1-NEXT:    vpextrb $4, %xmm2, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_42
 | 
						|
; AVX1-NEXT:  ## BB#41: ## %cond.load58
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $4, 20(%rdi), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB51_42: ## %else59
 | 
						|
; AVX1-NEXT:    vpextrb $5, %xmm2, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_44
 | 
						|
; AVX1-NEXT:  ## BB#43: ## %cond.load61
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $5, 21(%rdi), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB51_44: ## %else62
 | 
						|
; AVX1-NEXT:    vpextrb $6, %xmm2, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_46
 | 
						|
; AVX1-NEXT:  ## BB#45: ## %cond.load64
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $6, 22(%rdi), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB51_46: ## %else65
 | 
						|
; AVX1-NEXT:    vpextrb $7, %xmm2, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_48
 | 
						|
; AVX1-NEXT:  ## BB#47: ## %cond.load67
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $7, 23(%rdi), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB51_48: ## %else68
 | 
						|
; AVX1-NEXT:    vpextrb $8, %xmm2, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_50
 | 
						|
; AVX1-NEXT:  ## BB#49: ## %cond.load70
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $8, 24(%rdi), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB51_50: ## %else71
 | 
						|
; AVX1-NEXT:    vpextrb $9, %xmm2, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_52
 | 
						|
; AVX1-NEXT:  ## BB#51: ## %cond.load73
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $9, 25(%rdi), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB51_52: ## %else74
 | 
						|
; AVX1-NEXT:    vpextrb $10, %xmm2, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_54
 | 
						|
; AVX1-NEXT:  ## BB#53: ## %cond.load76
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $10, 26(%rdi), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB51_54: ## %else77
 | 
						|
; AVX1-NEXT:    vpextrb $11, %xmm2, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_56
 | 
						|
; AVX1-NEXT:  ## BB#55: ## %cond.load79
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $11, 27(%rdi), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB51_56: ## %else80
 | 
						|
; AVX1-NEXT:    vpextrb $12, %xmm2, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_58
 | 
						|
; AVX1-NEXT:  ## BB#57: ## %cond.load82
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $12, 28(%rdi), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB51_58: ## %else83
 | 
						|
; AVX1-NEXT:    vpextrb $13, %xmm2, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_60
 | 
						|
; AVX1-NEXT:  ## BB#59: ## %cond.load85
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $13, 29(%rdi), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB51_60: ## %else86
 | 
						|
; AVX1-NEXT:    vpextrb $14, %xmm2, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_62
 | 
						|
; AVX1-NEXT:  ## BB#61: ## %cond.load88
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $14, 30(%rdi), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB51_62: ## %else89
 | 
						|
; AVX1-NEXT:    vpextrb $15, %xmm2, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB51_64
 | 
						|
; AVX1-NEXT:  ## BB#63: ## %cond.load91
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $15, 31(%rdi), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB51_64: ## %else92
 | 
						|
; AVX1-NEXT:    vpsllw $7, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
 | 
						|
; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm4, %xmm2
 | 
						|
; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm4, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test_mask_load_32xi8:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    ## implicit-def: %YMM1
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_2
 | 
						|
; AVX2-NEXT:  ## BB#1: ## %cond.load
 | 
						|
; AVX2-NEXT:    movzbl (%rdi), %eax
 | 
						|
; AVX2-NEXT:    vmovd %eax, %xmm1
 | 
						|
; AVX2-NEXT:  LBB51_2: ## %else
 | 
						|
; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_4
 | 
						|
; AVX2-NEXT:  ## BB#3: ## %cond.load1
 | 
						|
; AVX2-NEXT:    vpinsrb $1, 1(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB51_4: ## %else2
 | 
						|
; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_6
 | 
						|
; AVX2-NEXT:  ## BB#5: ## %cond.load4
 | 
						|
; AVX2-NEXT:    vpinsrb $2, 2(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB51_6: ## %else5
 | 
						|
; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_8
 | 
						|
; AVX2-NEXT:  ## BB#7: ## %cond.load7
 | 
						|
; AVX2-NEXT:    vpinsrb $3, 3(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB51_8: ## %else8
 | 
						|
; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_10
 | 
						|
; AVX2-NEXT:  ## BB#9: ## %cond.load10
 | 
						|
; AVX2-NEXT:    vpinsrb $4, 4(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB51_10: ## %else11
 | 
						|
; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_12
 | 
						|
; AVX2-NEXT:  ## BB#11: ## %cond.load13
 | 
						|
; AVX2-NEXT:    vpinsrb $5, 5(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB51_12: ## %else14
 | 
						|
; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_14
 | 
						|
; AVX2-NEXT:  ## BB#13: ## %cond.load16
 | 
						|
; AVX2-NEXT:    vpinsrb $6, 6(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB51_14: ## %else17
 | 
						|
; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_16
 | 
						|
; AVX2-NEXT:  ## BB#15: ## %cond.load19
 | 
						|
; AVX2-NEXT:    vpinsrb $7, 7(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB51_16: ## %else20
 | 
						|
; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_18
 | 
						|
; AVX2-NEXT:  ## BB#17: ## %cond.load22
 | 
						|
; AVX2-NEXT:    vpinsrb $8, 8(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB51_18: ## %else23
 | 
						|
; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_20
 | 
						|
; AVX2-NEXT:  ## BB#19: ## %cond.load25
 | 
						|
; AVX2-NEXT:    vpinsrb $9, 9(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB51_20: ## %else26
 | 
						|
; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_22
 | 
						|
; AVX2-NEXT:  ## BB#21: ## %cond.load28
 | 
						|
; AVX2-NEXT:    vpinsrb $10, 10(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB51_22: ## %else29
 | 
						|
; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_24
 | 
						|
; AVX2-NEXT:  ## BB#23: ## %cond.load31
 | 
						|
; AVX2-NEXT:    vpinsrb $11, 11(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB51_24: ## %else32
 | 
						|
; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_26
 | 
						|
; AVX2-NEXT:  ## BB#25: ## %cond.load34
 | 
						|
; AVX2-NEXT:    vpinsrb $12, 12(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB51_26: ## %else35
 | 
						|
; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_28
 | 
						|
; AVX2-NEXT:  ## BB#27: ## %cond.load37
 | 
						|
; AVX2-NEXT:    vpinsrb $13, 13(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB51_28: ## %else38
 | 
						|
; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_30
 | 
						|
; AVX2-NEXT:  ## BB#29: ## %cond.load40
 | 
						|
; AVX2-NEXT:    vpinsrb $14, 14(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB51_30: ## %else41
 | 
						|
; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_32
 | 
						|
; AVX2-NEXT:  ## BB#31: ## %cond.load43
 | 
						|
; AVX2-NEXT:    vpinsrb $15, 15(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB51_32: ## %else44
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $0, %xmm2, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_34
 | 
						|
; AVX2-NEXT:  ## BB#33: ## %cond.load46
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $0, 16(%rdi), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB51_34: ## %else47
 | 
						|
; AVX2-NEXT:    vpextrb $1, %xmm2, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_36
 | 
						|
; AVX2-NEXT:  ## BB#35: ## %cond.load49
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $1, 17(%rdi), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB51_36: ## %else50
 | 
						|
; AVX2-NEXT:    vpextrb $2, %xmm2, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_38
 | 
						|
; AVX2-NEXT:  ## BB#37: ## %cond.load52
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $2, 18(%rdi), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB51_38: ## %else53
 | 
						|
; AVX2-NEXT:    vpextrb $3, %xmm2, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_40
 | 
						|
; AVX2-NEXT:  ## BB#39: ## %cond.load55
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $3, 19(%rdi), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB51_40: ## %else56
 | 
						|
; AVX2-NEXT:    vpextrb $4, %xmm2, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_42
 | 
						|
; AVX2-NEXT:  ## BB#41: ## %cond.load58
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $4, 20(%rdi), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB51_42: ## %else59
 | 
						|
; AVX2-NEXT:    vpextrb $5, %xmm2, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_44
 | 
						|
; AVX2-NEXT:  ## BB#43: ## %cond.load61
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $5, 21(%rdi), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB51_44: ## %else62
 | 
						|
; AVX2-NEXT:    vpextrb $6, %xmm2, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_46
 | 
						|
; AVX2-NEXT:  ## BB#45: ## %cond.load64
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $6, 22(%rdi), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB51_46: ## %else65
 | 
						|
; AVX2-NEXT:    vpextrb $7, %xmm2, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_48
 | 
						|
; AVX2-NEXT:  ## BB#47: ## %cond.load67
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $7, 23(%rdi), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB51_48: ## %else68
 | 
						|
; AVX2-NEXT:    vpextrb $8, %xmm2, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_50
 | 
						|
; AVX2-NEXT:  ## BB#49: ## %cond.load70
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $8, 24(%rdi), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB51_50: ## %else71
 | 
						|
; AVX2-NEXT:    vpextrb $9, %xmm2, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_52
 | 
						|
; AVX2-NEXT:  ## BB#51: ## %cond.load73
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $9, 25(%rdi), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB51_52: ## %else74
 | 
						|
; AVX2-NEXT:    vpextrb $10, %xmm2, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_54
 | 
						|
; AVX2-NEXT:  ## BB#53: ## %cond.load76
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $10, 26(%rdi), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB51_54: ## %else77
 | 
						|
; AVX2-NEXT:    vpextrb $11, %xmm2, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_56
 | 
						|
; AVX2-NEXT:  ## BB#55: ## %cond.load79
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $11, 27(%rdi), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB51_56: ## %else80
 | 
						|
; AVX2-NEXT:    vpextrb $12, %xmm2, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_58
 | 
						|
; AVX2-NEXT:  ## BB#57: ## %cond.load82
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $12, 28(%rdi), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB51_58: ## %else83
 | 
						|
; AVX2-NEXT:    vpextrb $13, %xmm2, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_60
 | 
						|
; AVX2-NEXT:  ## BB#59: ## %cond.load85
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $13, 29(%rdi), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB51_60: ## %else86
 | 
						|
; AVX2-NEXT:    vpextrb $14, %xmm2, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_62
 | 
						|
; AVX2-NEXT:  ## BB#61: ## %cond.load88
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $14, 30(%rdi), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB51_62: ## %else89
 | 
						|
; AVX2-NEXT:    vpextrb $15, %xmm2, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB51_64
 | 
						|
; AVX2-NEXT:  ## BB#63: ## %cond.load91
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpinsrb $15, 31(%rdi), %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB51_64: ## %else92
 | 
						|
; AVX2-NEXT:    vpsllw $7, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
 | 
						|
; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_mask_load_32xi8:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    ## implicit-def: %YMM1
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_2
 | 
						|
; AVX512F-NEXT:  ## BB#1: ## %cond.load
 | 
						|
; AVX512F-NEXT:    movzbl (%rdi), %eax
 | 
						|
; AVX512F-NEXT:    vmovd %eax, %xmm1
 | 
						|
; AVX512F-NEXT:  LBB51_2: ## %else
 | 
						|
; AVX512F-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_4
 | 
						|
; AVX512F-NEXT:  ## BB#3: ## %cond.load1
 | 
						|
; AVX512F-NEXT:    vpinsrb $1, 1(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB51_4: ## %else2
 | 
						|
; AVX512F-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_6
 | 
						|
; AVX512F-NEXT:  ## BB#5: ## %cond.load4
 | 
						|
; AVX512F-NEXT:    vpinsrb $2, 2(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB51_6: ## %else5
 | 
						|
; AVX512F-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_8
 | 
						|
; AVX512F-NEXT:  ## BB#7: ## %cond.load7
 | 
						|
; AVX512F-NEXT:    vpinsrb $3, 3(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB51_8: ## %else8
 | 
						|
; AVX512F-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_10
 | 
						|
; AVX512F-NEXT:  ## BB#9: ## %cond.load10
 | 
						|
; AVX512F-NEXT:    vpinsrb $4, 4(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB51_10: ## %else11
 | 
						|
; AVX512F-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_12
 | 
						|
; AVX512F-NEXT:  ## BB#11: ## %cond.load13
 | 
						|
; AVX512F-NEXT:    vpinsrb $5, 5(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB51_12: ## %else14
 | 
						|
; AVX512F-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_14
 | 
						|
; AVX512F-NEXT:  ## BB#13: ## %cond.load16
 | 
						|
; AVX512F-NEXT:    vpinsrb $6, 6(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB51_14: ## %else17
 | 
						|
; AVX512F-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_16
 | 
						|
; AVX512F-NEXT:  ## BB#15: ## %cond.load19
 | 
						|
; AVX512F-NEXT:    vpinsrb $7, 7(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB51_16: ## %else20
 | 
						|
; AVX512F-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_18
 | 
						|
; AVX512F-NEXT:  ## BB#17: ## %cond.load22
 | 
						|
; AVX512F-NEXT:    vpinsrb $8, 8(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB51_18: ## %else23
 | 
						|
; AVX512F-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_20
 | 
						|
; AVX512F-NEXT:  ## BB#19: ## %cond.load25
 | 
						|
; AVX512F-NEXT:    vpinsrb $9, 9(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB51_20: ## %else26
 | 
						|
; AVX512F-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_22
 | 
						|
; AVX512F-NEXT:  ## BB#21: ## %cond.load28
 | 
						|
; AVX512F-NEXT:    vpinsrb $10, 10(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB51_22: ## %else29
 | 
						|
; AVX512F-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_24
 | 
						|
; AVX512F-NEXT:  ## BB#23: ## %cond.load31
 | 
						|
; AVX512F-NEXT:    vpinsrb $11, 11(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB51_24: ## %else32
 | 
						|
; AVX512F-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_26
 | 
						|
; AVX512F-NEXT:  ## BB#25: ## %cond.load34
 | 
						|
; AVX512F-NEXT:    vpinsrb $12, 12(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB51_26: ## %else35
 | 
						|
; AVX512F-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_28
 | 
						|
; AVX512F-NEXT:  ## BB#27: ## %cond.load37
 | 
						|
; AVX512F-NEXT:    vpinsrb $13, 13(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB51_28: ## %else38
 | 
						|
; AVX512F-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_30
 | 
						|
; AVX512F-NEXT:  ## BB#29: ## %cond.load40
 | 
						|
; AVX512F-NEXT:    vpinsrb $14, 14(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB51_30: ## %else41
 | 
						|
; AVX512F-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_32
 | 
						|
; AVX512F-NEXT:  ## BB#31: ## %cond.load43
 | 
						|
; AVX512F-NEXT:    vpinsrb $15, 15(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB51_32: ## %else44
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX512F-NEXT:    vpextrb $0, %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_34
 | 
						|
; AVX512F-NEXT:  ## BB#33: ## %cond.load46
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpinsrb $0, 16(%rdi), %xmm3, %xmm3
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB51_34: ## %else47
 | 
						|
; AVX512F-NEXT:    vpextrb $1, %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_36
 | 
						|
; AVX512F-NEXT:  ## BB#35: ## %cond.load49
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpinsrb $1, 17(%rdi), %xmm3, %xmm3
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB51_36: ## %else50
 | 
						|
; AVX512F-NEXT:    vpextrb $2, %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_38
 | 
						|
; AVX512F-NEXT:  ## BB#37: ## %cond.load52
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpinsrb $2, 18(%rdi), %xmm3, %xmm3
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB51_38: ## %else53
 | 
						|
; AVX512F-NEXT:    vpextrb $3, %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_40
 | 
						|
; AVX512F-NEXT:  ## BB#39: ## %cond.load55
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpinsrb $3, 19(%rdi), %xmm3, %xmm3
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB51_40: ## %else56
 | 
						|
; AVX512F-NEXT:    vpextrb $4, %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_42
 | 
						|
; AVX512F-NEXT:  ## BB#41: ## %cond.load58
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpinsrb $4, 20(%rdi), %xmm3, %xmm3
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB51_42: ## %else59
 | 
						|
; AVX512F-NEXT:    vpextrb $5, %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_44
 | 
						|
; AVX512F-NEXT:  ## BB#43: ## %cond.load61
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpinsrb $5, 21(%rdi), %xmm3, %xmm3
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB51_44: ## %else62
 | 
						|
; AVX512F-NEXT:    vpextrb $6, %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_46
 | 
						|
; AVX512F-NEXT:  ## BB#45: ## %cond.load64
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpinsrb $6, 22(%rdi), %xmm3, %xmm3
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB51_46: ## %else65
 | 
						|
; AVX512F-NEXT:    vpextrb $7, %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_48
 | 
						|
; AVX512F-NEXT:  ## BB#47: ## %cond.load67
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpinsrb $7, 23(%rdi), %xmm3, %xmm3
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB51_48: ## %else68
 | 
						|
; AVX512F-NEXT:    vpextrb $8, %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_50
 | 
						|
; AVX512F-NEXT:  ## BB#49: ## %cond.load70
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpinsrb $8, 24(%rdi), %xmm3, %xmm3
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB51_50: ## %else71
 | 
						|
; AVX512F-NEXT:    vpextrb $9, %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_52
 | 
						|
; AVX512F-NEXT:  ## BB#51: ## %cond.load73
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpinsrb $9, 25(%rdi), %xmm3, %xmm3
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB51_52: ## %else74
 | 
						|
; AVX512F-NEXT:    vpextrb $10, %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_54
 | 
						|
; AVX512F-NEXT:  ## BB#53: ## %cond.load76
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpinsrb $10, 26(%rdi), %xmm3, %xmm3
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB51_54: ## %else77
 | 
						|
; AVX512F-NEXT:    vpextrb $11, %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_56
 | 
						|
; AVX512F-NEXT:  ## BB#55: ## %cond.load79
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpinsrb $11, 27(%rdi), %xmm3, %xmm3
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB51_56: ## %else80
 | 
						|
; AVX512F-NEXT:    vpextrb $12, %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_58
 | 
						|
; AVX512F-NEXT:  ## BB#57: ## %cond.load82
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpinsrb $12, 28(%rdi), %xmm3, %xmm3
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB51_58: ## %else83
 | 
						|
; AVX512F-NEXT:    vpextrb $13, %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_60
 | 
						|
; AVX512F-NEXT:  ## BB#59: ## %cond.load85
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpinsrb $13, 29(%rdi), %xmm3, %xmm3
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB51_60: ## %else86
 | 
						|
; AVX512F-NEXT:    vpextrb $14, %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_62
 | 
						|
; AVX512F-NEXT:  ## BB#61: ## %cond.load88
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpinsrb $14, 30(%rdi), %xmm3, %xmm3
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB51_62: ## %else89
 | 
						|
; AVX512F-NEXT:    vpextrb $15, %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB51_64
 | 
						|
; AVX512F-NEXT:  ## BB#63: ## %cond.load91
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $15, 31(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB51_64: ## %else92
 | 
						|
; AVX512F-NEXT:    vpsllw $7, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:    vpxor %ymm2, %ymm2, %ymm2
 | 
						|
; AVX512F-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
 | 
						|
; AVX512F-NEXT:    vpand %ymm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_mask_load_32xi8:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
 | 
						|
; SKX-NEXT:    vpmovb2m %ymm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu8 (%rdi), %ymm0 {%k1} {z}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> zeroinitializer)
 | 
						|
  ret <32 x i8> %res
 | 
						|
}
 | 
						|
declare <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>)
 | 
						|
 | 
						|
define <64 x i8> @test_mask_load_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x i8> %val) {
 | 
						|
; AVX1-LABEL: test_mask_load_64xi8:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    pushq %rbp
 | 
						|
; AVX1-NEXT:  Ltmp3:
 | 
						|
; AVX1-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; AVX1-NEXT:    pushq %r15
 | 
						|
; AVX1-NEXT:  Ltmp4:
 | 
						|
; AVX1-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; AVX1-NEXT:    pushq %r14
 | 
						|
; AVX1-NEXT:  Ltmp5:
 | 
						|
; AVX1-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; AVX1-NEXT:    pushq %r13
 | 
						|
; AVX1-NEXT:  Ltmp6:
 | 
						|
; AVX1-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; AVX1-NEXT:    pushq %r12
 | 
						|
; AVX1-NEXT:  Ltmp7:
 | 
						|
; AVX1-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; AVX1-NEXT:    pushq %rbx
 | 
						|
; AVX1-NEXT:  Ltmp8:
 | 
						|
; AVX1-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; AVX1-NEXT:    pushq %rax
 | 
						|
; AVX1-NEXT:  Ltmp9:
 | 
						|
; AVX1-NEXT:    .cfi_def_cfa_offset 64
 | 
						|
; AVX1-NEXT:  Ltmp10:
 | 
						|
; AVX1-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; AVX1-NEXT:  Ltmp11:
 | 
						|
; AVX1-NEXT:    .cfi_offset %r12, -48
 | 
						|
; AVX1-NEXT:  Ltmp12:
 | 
						|
; AVX1-NEXT:    .cfi_offset %r13, -40
 | 
						|
; AVX1-NEXT:  Ltmp13:
 | 
						|
; AVX1-NEXT:    .cfi_offset %r14, -32
 | 
						|
; AVX1-NEXT:  Ltmp14:
 | 
						|
; AVX1-NEXT:    .cfi_offset %r15, -24
 | 
						|
; AVX1-NEXT:  Ltmp15:
 | 
						|
; AVX1-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; AVX1-NEXT:    movq {{[0-9]+}}(%rsp), %rax
 | 
						|
; AVX1-NEXT:    movl %edi, %r13d
 | 
						|
; AVX1-NEXT:    testb $1, %dil
 | 
						|
; AVX1-NEXT:    je LBB52_2
 | 
						|
; AVX1-NEXT:  ## BB#1: ## %cond.load
 | 
						|
; AVX1-NEXT:    movzbl (%rax), %ebp
 | 
						|
; AVX1-NEXT:    vmovd %ebp, %xmm9
 | 
						|
; AVX1-NEXT:  LBB52_2: ## %else
 | 
						|
; AVX1-NEXT:    testb $1, %sil
 | 
						|
; AVX1-NEXT:    je LBB52_4
 | 
						|
; AVX1-NEXT:  ## BB#3: ## %cond.load1
 | 
						|
; AVX1-NEXT:    vpinsrb $1, 1(%rax), %xmm9, %xmm3
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_4: ## %else2
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB52_6
 | 
						|
; AVX1-NEXT:  ## BB#5: ## %cond.load4
 | 
						|
; AVX1-NEXT:    vpinsrb $2, 2(%rax), %xmm9, %xmm3
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_6: ## %else5
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB52_8
 | 
						|
; AVX1-NEXT:  ## BB#7: ## %cond.load7
 | 
						|
; AVX1-NEXT:    vpinsrb $3, 3(%rax), %xmm9, %xmm3
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_8: ## %else8
 | 
						|
; AVX1-NEXT:    testb $1, %r8b
 | 
						|
; AVX1-NEXT:    je LBB52_10
 | 
						|
; AVX1-NEXT:  ## BB#9: ## %cond.load10
 | 
						|
; AVX1-NEXT:    vpinsrb $4, 4(%rax), %xmm9, %xmm3
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_10: ## %else11
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
 | 
						|
; AVX1-NEXT:    testb $1, %r9b
 | 
						|
; AVX1-NEXT:    je LBB52_12
 | 
						|
; AVX1-NEXT:  ## BB#11: ## %cond.load13
 | 
						|
; AVX1-NEXT:    vpinsrb $5, 5(%rax), %xmm9, %xmm3
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_12: ## %else14
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
 | 
						|
; AVX1-NEXT:    testb $1, %r10b
 | 
						|
; AVX1-NEXT:    je LBB52_14
 | 
						|
; AVX1-NEXT:  ## BB#13: ## %cond.load16
 | 
						|
; AVX1-NEXT:    vpinsrb $6, 6(%rax), %xmm9, %xmm3
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_14: ## %else17
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
 | 
						|
; AVX1-NEXT:    testb $1, %r11b
 | 
						|
; AVX1-NEXT:    je LBB52_16
 | 
						|
; AVX1-NEXT:  ## BB#15: ## %cond.load19
 | 
						|
; AVX1-NEXT:    vpinsrb $7, 7(%rax), %xmm9, %xmm3
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_16: ## %else20
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
 | 
						|
; AVX1-NEXT:    testb $1, %r14b
 | 
						|
; AVX1-NEXT:    je LBB52_18
 | 
						|
; AVX1-NEXT:  ## BB#17: ## %cond.load22
 | 
						|
; AVX1-NEXT:    vpinsrb $8, 8(%rax), %xmm9, %xmm3
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_18: ## %else23
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
 | 
						|
; AVX1-NEXT:    testb $1, %r15b
 | 
						|
; AVX1-NEXT:    je LBB52_20
 | 
						|
; AVX1-NEXT:  ## BB#19: ## %cond.load25
 | 
						|
; AVX1-NEXT:    vpinsrb $9, 9(%rax), %xmm9, %xmm3
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_20: ## %else26
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 | 
						|
; AVX1-NEXT:    testb $1, %r12b
 | 
						|
; AVX1-NEXT:    je LBB52_22
 | 
						|
; AVX1-NEXT:  ## BB#21: ## %cond.load28
 | 
						|
; AVX1-NEXT:    vpinsrb $10, 10(%rax), %xmm9, %xmm3
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_22: ## %else29
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
 | 
						|
; AVX1-NEXT:    testb $1, %dil
 | 
						|
; AVX1-NEXT:    je LBB52_24
 | 
						|
; AVX1-NEXT:  ## BB#23: ## %cond.load31
 | 
						|
; AVX1-NEXT:    vpinsrb $11, 11(%rax), %xmm9, %xmm3
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_24: ## %else32
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
 | 
						|
; AVX1-NEXT:    testb $1, %bpl
 | 
						|
; AVX1-NEXT:    je LBB52_26
 | 
						|
; AVX1-NEXT:  ## BB#25: ## %cond.load34
 | 
						|
; AVX1-NEXT:    vpinsrb $12, 12(%rax), %xmm9, %xmm3
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_26: ## %else35
 | 
						|
; AVX1-NEXT:    testb $1, %bl
 | 
						|
; AVX1-NEXT:    je LBB52_28
 | 
						|
; AVX1-NEXT:  ## BB#27: ## %cond.load37
 | 
						|
; AVX1-NEXT:    vpinsrb $13, 13(%rax), %xmm9, %xmm3
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_28: ## %else38
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_30
 | 
						|
; AVX1-NEXT:  ## BB#29: ## %cond.load40
 | 
						|
; AVX1-NEXT:    vpinsrb $14, 14(%rax), %xmm9, %xmm3
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_30: ## %else41
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_32
 | 
						|
; AVX1-NEXT:  ## BB#31: ## %cond.load43
 | 
						|
; AVX1-NEXT:    vpinsrb $15, 15(%rax), %xmm9, %xmm3
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_32: ## %else44
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_34
 | 
						|
; AVX1-NEXT:  ## BB#33: ## %cond.load46
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $0, 16(%rax), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
 | 
						|
; AVX1-NEXT:  LBB52_34: ## %else47
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_36
 | 
						|
; AVX1-NEXT:  ## BB#35: ## %cond.load49
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $1, 17(%rax), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
 | 
						|
; AVX1-NEXT:  LBB52_36: ## %else50
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_38
 | 
						|
; AVX1-NEXT:  ## BB#37: ## %cond.load52
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $2, 18(%rax), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
 | 
						|
; AVX1-NEXT:  LBB52_38: ## %else53
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_40
 | 
						|
; AVX1-NEXT:  ## BB#39: ## %cond.load55
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $3, 19(%rax), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
 | 
						|
; AVX1-NEXT:  LBB52_40: ## %else56
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_42
 | 
						|
; AVX1-NEXT:  ## BB#41: ## %cond.load58
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $4, 20(%rax), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
 | 
						|
; AVX1-NEXT:  LBB52_42: ## %else59
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_44
 | 
						|
; AVX1-NEXT:  ## BB#43: ## %cond.load61
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $5, 21(%rax), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
 | 
						|
; AVX1-NEXT:  LBB52_44: ## %else62
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_46
 | 
						|
; AVX1-NEXT:  ## BB#45: ## %cond.load64
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $6, 22(%rax), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
 | 
						|
; AVX1-NEXT:  LBB52_46: ## %else65
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_48
 | 
						|
; AVX1-NEXT:  ## BB#47: ## %cond.load67
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $7, 23(%rax), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
 | 
						|
; AVX1-NEXT:  LBB52_48: ## %else68
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_50
 | 
						|
; AVX1-NEXT:  ## BB#49: ## %cond.load70
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $8, 24(%rax), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
 | 
						|
; AVX1-NEXT:  LBB52_50: ## %else71
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_52
 | 
						|
; AVX1-NEXT:  ## BB#51: ## %cond.load73
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $9, 25(%rax), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
 | 
						|
; AVX1-NEXT:  LBB52_52: ## %else74
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_54
 | 
						|
; AVX1-NEXT:  ## BB#53: ## %cond.load76
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $10, 26(%rax), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
 | 
						|
; AVX1-NEXT:  LBB52_54: ## %else77
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_56
 | 
						|
; AVX1-NEXT:  ## BB#55: ## %cond.load79
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $11, 27(%rax), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
 | 
						|
; AVX1-NEXT:  LBB52_56: ## %else80
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_58
 | 
						|
; AVX1-NEXT:  ## BB#57: ## %cond.load82
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $12, 28(%rax), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
 | 
						|
; AVX1-NEXT:  LBB52_58: ## %else83
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_60
 | 
						|
; AVX1-NEXT:  ## BB#59: ## %cond.load85
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $13, 29(%rax), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
 | 
						|
; AVX1-NEXT:  LBB52_60: ## %else86
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_62
 | 
						|
; AVX1-NEXT:  ## BB#61: ## %cond.load88
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $14, 30(%rax), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
 | 
						|
; AVX1-NEXT:  LBB52_62: ## %else89
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_64
 | 
						|
; AVX1-NEXT:  ## BB#63: ## %cond.load91
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
 | 
						|
; AVX1-NEXT:    vpinsrb $15, 31(%rax), %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
 | 
						|
; AVX1-NEXT:  LBB52_64: ## %else92
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_66
 | 
						|
; AVX1-NEXT:  ## BB#65: ## %cond.load94
 | 
						|
; AVX1-NEXT:    vpinsrb $0, 32(%rax), %xmm0, %xmm3
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_66: ## %else95
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_68
 | 
						|
; AVX1-NEXT:  ## BB#67: ## %cond.load97
 | 
						|
; AVX1-NEXT:    vpinsrb $1, 33(%rax), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_68: ## %else98
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_70
 | 
						|
; AVX1-NEXT:  ## BB#69: ## %cond.load100
 | 
						|
; AVX1-NEXT:    vpinsrb $2, 34(%rax), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_70: ## %else101
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_72
 | 
						|
; AVX1-NEXT:  ## BB#71: ## %cond.load103
 | 
						|
; AVX1-NEXT:    vpinsrb $3, 35(%rax), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_72: ## %else104
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_74
 | 
						|
; AVX1-NEXT:  ## BB#73: ## %cond.load106
 | 
						|
; AVX1-NEXT:    vpinsrb $4, 36(%rax), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_74: ## %else107
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_76
 | 
						|
; AVX1-NEXT:  ## BB#75: ## %cond.load109
 | 
						|
; AVX1-NEXT:    vpinsrb $5, 37(%rax), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_76: ## %else110
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_78
 | 
						|
; AVX1-NEXT:  ## BB#77: ## %cond.load112
 | 
						|
; AVX1-NEXT:    vpinsrb $6, 38(%rax), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_78: ## %else113
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_80
 | 
						|
; AVX1-NEXT:  ## BB#79: ## %cond.load115
 | 
						|
; AVX1-NEXT:    vpinsrb $7, 39(%rax), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_80: ## %else116
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_82
 | 
						|
; AVX1-NEXT:  ## BB#81: ## %cond.load118
 | 
						|
; AVX1-NEXT:    vpinsrb $8, 40(%rax), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_82: ## %else119
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_84
 | 
						|
; AVX1-NEXT:  ## BB#83: ## %cond.load121
 | 
						|
; AVX1-NEXT:    vpinsrb $9, 41(%rax), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_84: ## %else122
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_86
 | 
						|
; AVX1-NEXT:  ## BB#85: ## %cond.load124
 | 
						|
; AVX1-NEXT:    vpinsrb $10, 42(%rax), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_86: ## %else125
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_88
 | 
						|
; AVX1-NEXT:  ## BB#87: ## %cond.load127
 | 
						|
; AVX1-NEXT:    vpinsrb $11, 43(%rax), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_88: ## %else128
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_90
 | 
						|
; AVX1-NEXT:  ## BB#89: ## %cond.load130
 | 
						|
; AVX1-NEXT:    vpinsrb $12, 44(%rax), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_90: ## %else131
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_92
 | 
						|
; AVX1-NEXT:  ## BB#91: ## %cond.load133
 | 
						|
; AVX1-NEXT:    vpinsrb $13, 45(%rax), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_92: ## %else134
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_94
 | 
						|
; AVX1-NEXT:  ## BB#93: ## %cond.load136
 | 
						|
; AVX1-NEXT:    vpinsrb $14, 46(%rax), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_94: ## %else137
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_96
 | 
						|
; AVX1-NEXT:  ## BB#95: ## %cond.load139
 | 
						|
; AVX1-NEXT:    vpinsrb $15, 47(%rax), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB52_96: ## %else140
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_98
 | 
						|
; AVX1-NEXT:  ## BB#97: ## %cond.load142
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $0, 48(%rax), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB52_98: ## %else143
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_100
 | 
						|
; AVX1-NEXT:  ## BB#99: ## %cond.load145
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $1, 49(%rax), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB52_100: ## %else146
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_102
 | 
						|
; AVX1-NEXT:  ## BB#101: ## %cond.load148
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $2, 50(%rax), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB52_102: ## %else149
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_104
 | 
						|
; AVX1-NEXT:  ## BB#103: ## %cond.load151
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $3, 51(%rax), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB52_104: ## %else152
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_106
 | 
						|
; AVX1-NEXT:  ## BB#105: ## %cond.load154
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $4, 52(%rax), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB52_106: ## %else155
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_108
 | 
						|
; AVX1-NEXT:  ## BB#107: ## %cond.load157
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $5, 53(%rax), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB52_108: ## %else158
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_110
 | 
						|
; AVX1-NEXT:  ## BB#109: ## %cond.load160
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $6, 54(%rax), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB52_110: ## %else161
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_112
 | 
						|
; AVX1-NEXT:  ## BB#111: ## %cond.load163
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $7, 55(%rax), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB52_112: ## %else164
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_114
 | 
						|
; AVX1-NEXT:  ## BB#113: ## %cond.load166
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $8, 56(%rax), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB52_114: ## %else167
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_116
 | 
						|
; AVX1-NEXT:  ## BB#115: ## %cond.load169
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $9, 57(%rax), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB52_116: ## %else170
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_118
 | 
						|
; AVX1-NEXT:  ## BB#117: ## %cond.load172
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $10, 58(%rax), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB52_118: ## %else173
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_120
 | 
						|
; AVX1-NEXT:  ## BB#119: ## %cond.load175
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $11, 59(%rax), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB52_120: ## %else176
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_122
 | 
						|
; AVX1-NEXT:  ## BB#121: ## %cond.load178
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $12, 60(%rax), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB52_122: ## %else179
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_124
 | 
						|
; AVX1-NEXT:  ## BB#123: ## %cond.load181
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $13, 61(%rax), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB52_124: ## %else182
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    je LBB52_126
 | 
						|
; AVX1-NEXT:  ## BB#125: ## %cond.load184
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $14, 62(%rax), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB52_126: ## %else185
 | 
						|
; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX1-NEXT:    movl %r9d, {{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movl %r8d, (%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movl %edx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    je LBB52_128
 | 
						|
; AVX1-NEXT:  ## BB#127: ## %cond.load187
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $15, 63(%rax), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB52_128: ## %else188
 | 
						|
; AVX1-NEXT:    movzbl %r10b, %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl %r11b, %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl %r14b, %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl %r15b, %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl %r12b, %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl %dil, %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl %bpl, %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl %bl, %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r8d
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r9d
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
 | 
						|
; AVX1-NEXT:    movzbl %r13b, %r13d
 | 
						|
; AVX1-NEXT:    vmovd %r13d, %xmm4
 | 
						|
; AVX1-NEXT:    movl -{{[0-9]+}}(%rsp), %edi ## 4-byte Reload
 | 
						|
; AVX1-NEXT:    movzbl %dil, %ebp
 | 
						|
; AVX1-NEXT:    vpinsrb $1, %ebp, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload
 | 
						|
; AVX1-NEXT:    movzbl %bpl, %ebp
 | 
						|
; AVX1-NEXT:    vpinsrb $2, %ebp, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload
 | 
						|
; AVX1-NEXT:    movzbl %bpl, %ebp
 | 
						|
; AVX1-NEXT:    vpinsrb $3, %ebp, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    movl (%rsp), %ebp ## 4-byte Reload
 | 
						|
; AVX1-NEXT:    movzbl %bpl, %ebp
 | 
						|
; AVX1-NEXT:    vpinsrb $4, %ebp, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %ebp ## 4-byte Reload
 | 
						|
; AVX1-NEXT:    movzbl %bpl, %ebp
 | 
						|
; AVX1-NEXT:    vpinsrb $5, %ebp, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vmovd -{{[0-9]+}}(%rsp), %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    ## xmm5 = mem[0],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $3, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm5, %xmm8 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vmovd -{{[0-9]+}}(%rsp), %xmm6 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    ## xmm6 = mem[0],zero,zero,zero
 | 
						|
; AVX1-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
 | 
						|
; AVX1-NEXT:    vpinsrb $2, %r12d, %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vpinsrb $3, %r15d, %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vpinsrb $4, %r14d, %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vpinsrb $5, %r11d, %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vpinsrb $6, %r8d, %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vpinsrb $7, %edx, %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vpinsrb $8, %eax, %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r13d
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
 | 
						|
; AVX1-NEXT:    vpinsrb $9, %ecx, %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
 | 
						|
; AVX1-NEXT:    vpinsrb $10, %esi, %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
 | 
						|
; AVX1-NEXT:    vpinsrb $11, %r9d, %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r8d
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
 | 
						|
; AVX1-NEXT:    vpinsrb $12, %r10d, %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r9d
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
 | 
						|
; AVX1-NEXT:    vpinsrb $13, %ebx, %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX1-NEXT:    vpinsrb $14, %r13d, %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r13d
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
 | 
						|
; AVX1-NEXT:    vpinsrb $15, %r14d, %xmm6, %xmm10
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
 | 
						|
; AVX1-NEXT:    vmovd %edi, %xmm7
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
 | 
						|
; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
 | 
						|
; AVX1-NEXT:    vpinsrb $1, %r11d, %xmm7, %xmm7
 | 
						|
; AVX1-NEXT:    vpinsrb $2, %r15d, %xmm7, %xmm7
 | 
						|
; AVX1-NEXT:    vpinsrb $3, %r12d, %xmm7, %xmm7
 | 
						|
; AVX1-NEXT:    vpinsrb $4, %r8d, %xmm7, %xmm7
 | 
						|
; AVX1-NEXT:    vpinsrb $5, %ecx, %xmm7, %xmm7
 | 
						|
; AVX1-NEXT:    vpinsrb $6, %r9d, %xmm7, %xmm7
 | 
						|
; AVX1-NEXT:    vpinsrb $7, %esi, %xmm7, %xmm7
 | 
						|
; AVX1-NEXT:    vpinsrb $8, %r10d, %xmm7, %xmm7
 | 
						|
; AVX1-NEXT:    vpinsrb $9, %eax, %xmm7, %xmm7
 | 
						|
; AVX1-NEXT:    vpinsrb $10, %r13d, %xmm7, %xmm7
 | 
						|
; AVX1-NEXT:    vpinsrb $11, %edx, %xmm7, %xmm7
 | 
						|
; AVX1-NEXT:    vpinsrb $12, %r14d, %xmm7, %xmm7
 | 
						|
; AVX1-NEXT:    vpinsrb $13, %ebx, %xmm7, %xmm7
 | 
						|
; AVX1-NEXT:    vpinsrb $14, %edi, %xmm7, %xmm7
 | 
						|
; AVX1-NEXT:    vpinsrb $15, %ebp, %xmm7, %xmm7
 | 
						|
; AVX1-NEXT:    vpsllw $7, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
 | 
						|
; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpcmpgtb %xmm4, %xmm2, %xmm4
 | 
						|
; AVX1-NEXT:    vpsllw $7, %xmm8, %xmm6
 | 
						|
; AVX1-NEXT:    vpand %xmm5, %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vpcmpgtb %xmm6, %xmm2, %xmm6
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm4, %ymm4
 | 
						|
; AVX1-NEXT:    vandnps %ymm0, %ymm4, %ymm0
 | 
						|
; AVX1-NEXT:    vandps %ymm4, %ymm9, %ymm4
 | 
						|
; AVX1-NEXT:    vorps %ymm0, %ymm4, %ymm0
 | 
						|
; AVX1-NEXT:    vpsllw $7, %xmm10, %xmm4
 | 
						|
; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vpcmpgtb %xmm4, %xmm2, %xmm4
 | 
						|
; AVX1-NEXT:    vpsllw $7, %xmm7, %xmm6
 | 
						|
; AVX1-NEXT:    vpand %xmm5, %xmm6, %xmm5
 | 
						|
; AVX1-NEXT:    vpcmpgtb %xmm5, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm4, %ymm2
 | 
						|
; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
 | 
						|
; AVX1-NEXT:    vandps %ymm2, %ymm3, %ymm2
 | 
						|
; AVX1-NEXT:    vorps %ymm1, %ymm2, %ymm1
 | 
						|
; AVX1-NEXT:    addq $8, %rsp
 | 
						|
; AVX1-NEXT:    popq %rbx
 | 
						|
; AVX1-NEXT:    popq %r12
 | 
						|
; AVX1-NEXT:    popq %r13
 | 
						|
; AVX1-NEXT:    popq %r14
 | 
						|
; AVX1-NEXT:    popq %r15
 | 
						|
; AVX1-NEXT:    popq %rbp
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test_mask_load_64xi8:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    pushq %rbp
 | 
						|
; AVX2-NEXT:  Ltmp3:
 | 
						|
; AVX2-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; AVX2-NEXT:    pushq %r15
 | 
						|
; AVX2-NEXT:  Ltmp4:
 | 
						|
; AVX2-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; AVX2-NEXT:    pushq %r14
 | 
						|
; AVX2-NEXT:  Ltmp5:
 | 
						|
; AVX2-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; AVX2-NEXT:    pushq %r13
 | 
						|
; AVX2-NEXT:  Ltmp6:
 | 
						|
; AVX2-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; AVX2-NEXT:    pushq %r12
 | 
						|
; AVX2-NEXT:  Ltmp7:
 | 
						|
; AVX2-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; AVX2-NEXT:    pushq %rbx
 | 
						|
; AVX2-NEXT:  Ltmp8:
 | 
						|
; AVX2-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; AVX2-NEXT:    pushq %rax
 | 
						|
; AVX2-NEXT:  Ltmp9:
 | 
						|
; AVX2-NEXT:    .cfi_def_cfa_offset 64
 | 
						|
; AVX2-NEXT:  Ltmp10:
 | 
						|
; AVX2-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; AVX2-NEXT:  Ltmp11:
 | 
						|
; AVX2-NEXT:    .cfi_offset %r12, -48
 | 
						|
; AVX2-NEXT:  Ltmp12:
 | 
						|
; AVX2-NEXT:    .cfi_offset %r13, -40
 | 
						|
; AVX2-NEXT:  Ltmp13:
 | 
						|
; AVX2-NEXT:    .cfi_offset %r14, -32
 | 
						|
; AVX2-NEXT:  Ltmp14:
 | 
						|
; AVX2-NEXT:    .cfi_offset %r15, -24
 | 
						|
; AVX2-NEXT:  Ltmp15:
 | 
						|
; AVX2-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
 | 
						|
; AVX2-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    testb $1, %dil
 | 
						|
; AVX2-NEXT:    je LBB52_2
 | 
						|
; AVX2-NEXT:  ## BB#1: ## %cond.load
 | 
						|
; AVX2-NEXT:    movzbl (%rax), %ebp
 | 
						|
; AVX2-NEXT:    vmovd %ebp, %xmm2
 | 
						|
; AVX2-NEXT:  LBB52_2: ## %else
 | 
						|
; AVX2-NEXT:    testb $1, %sil
 | 
						|
; AVX2-NEXT:    je LBB52_4
 | 
						|
; AVX2-NEXT:  ## BB#3: ## %cond.load1
 | 
						|
; AVX2-NEXT:    vpinsrb $1, 1(%rax), %xmm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_4: ## %else2
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB52_6
 | 
						|
; AVX2-NEXT:  ## BB#5: ## %cond.load4
 | 
						|
; AVX2-NEXT:    vpinsrb $2, 2(%rax), %xmm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_6: ## %else5
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB52_8
 | 
						|
; AVX2-NEXT:  ## BB#7: ## %cond.load7
 | 
						|
; AVX2-NEXT:    vpinsrb $3, 3(%rax), %xmm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_8: ## %else8
 | 
						|
; AVX2-NEXT:    testb $1, %r8b
 | 
						|
; AVX2-NEXT:    je LBB52_10
 | 
						|
; AVX2-NEXT:  ## BB#9: ## %cond.load10
 | 
						|
; AVX2-NEXT:    vpinsrb $4, 4(%rax), %xmm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_10: ## %else11
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
 | 
						|
; AVX2-NEXT:    testb $1, %r9b
 | 
						|
; AVX2-NEXT:    je LBB52_12
 | 
						|
; AVX2-NEXT:  ## BB#11: ## %cond.load13
 | 
						|
; AVX2-NEXT:    vpinsrb $5, 5(%rax), %xmm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_12: ## %else14
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
 | 
						|
; AVX2-NEXT:    testb $1, %r10b
 | 
						|
; AVX2-NEXT:    je LBB52_14
 | 
						|
; AVX2-NEXT:  ## BB#13: ## %cond.load16
 | 
						|
; AVX2-NEXT:    vpinsrb $6, 6(%rax), %xmm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_14: ## %else17
 | 
						|
; AVX2-NEXT:    testb $1, %r11b
 | 
						|
; AVX2-NEXT:    je LBB52_16
 | 
						|
; AVX2-NEXT:  ## BB#15: ## %cond.load19
 | 
						|
; AVX2-NEXT:    vpinsrb $7, 7(%rax), %xmm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_16: ## %else20
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_18
 | 
						|
; AVX2-NEXT:  ## BB#17: ## %cond.load22
 | 
						|
; AVX2-NEXT:    vpinsrb $8, 8(%rax), %xmm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_18: ## %else23
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_20
 | 
						|
; AVX2-NEXT:  ## BB#19: ## %cond.load25
 | 
						|
; AVX2-NEXT:    vpinsrb $9, 9(%rax), %xmm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_20: ## %else26
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_22
 | 
						|
; AVX2-NEXT:  ## BB#21: ## %cond.load28
 | 
						|
; AVX2-NEXT:    vpinsrb $10, 10(%rax), %xmm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_22: ## %else29
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_24
 | 
						|
; AVX2-NEXT:  ## BB#23: ## %cond.load31
 | 
						|
; AVX2-NEXT:    vpinsrb $11, 11(%rax), %xmm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_24: ## %else32
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %bl
 | 
						|
; AVX2-NEXT:    testb $1, %bpl
 | 
						|
; AVX2-NEXT:    je LBB52_26
 | 
						|
; AVX2-NEXT:  ## BB#25: ## %cond.load34
 | 
						|
; AVX2-NEXT:    vpinsrb $12, 12(%rax), %xmm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_26: ## %else35
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
 | 
						|
; AVX2-NEXT:    testb $1, %bl
 | 
						|
; AVX2-NEXT:    je LBB52_28
 | 
						|
; AVX2-NEXT:  ## BB#27: ## %cond.load37
 | 
						|
; AVX2-NEXT:    vpinsrb $13, 13(%rax), %xmm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_28: ## %else38
 | 
						|
; AVX2-NEXT:    testb $1, %r14b
 | 
						|
; AVX2-NEXT:    je LBB52_30
 | 
						|
; AVX2-NEXT:  ## BB#29: ## %cond.load40
 | 
						|
; AVX2-NEXT:    vpinsrb $14, 14(%rax), %xmm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_30: ## %else41
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_32
 | 
						|
; AVX2-NEXT:  ## BB#31: ## %cond.load43
 | 
						|
; AVX2-NEXT:    vpinsrb $15, 15(%rax), %xmm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_32: ## %else44
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
 | 
						|
; AVX2-NEXT:    testb $1, %r13b
 | 
						|
; AVX2-NEXT:    je LBB52_34
 | 
						|
; AVX2-NEXT:  ## BB#33: ## %cond.load46
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $0, 16(%rax), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:  LBB52_34: ## %else47
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
 | 
						|
; AVX2-NEXT:    testb $1, %r12b
 | 
						|
; AVX2-NEXT:    je LBB52_36
 | 
						|
; AVX2-NEXT:  ## BB#35: ## %cond.load49
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $1, 17(%rax), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:  LBB52_36: ## %else50
 | 
						|
; AVX2-NEXT:    testb $1, %r15b
 | 
						|
; AVX2-NEXT:    je LBB52_38
 | 
						|
; AVX2-NEXT:  ## BB#37: ## %cond.load52
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $2, 18(%rax), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:  LBB52_38: ## %else53
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_40
 | 
						|
; AVX2-NEXT:  ## BB#39: ## %cond.load55
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $3, 19(%rax), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:  LBB52_40: ## %else56
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_42
 | 
						|
; AVX2-NEXT:  ## BB#41: ## %cond.load58
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $4, 20(%rax), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:  LBB52_42: ## %else59
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_44
 | 
						|
; AVX2-NEXT:  ## BB#43: ## %cond.load61
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $5, 21(%rax), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:  LBB52_44: ## %else62
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_46
 | 
						|
; AVX2-NEXT:  ## BB#45: ## %cond.load64
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $6, 22(%rax), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:  LBB52_46: ## %else65
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_48
 | 
						|
; AVX2-NEXT:  ## BB#47: ## %cond.load67
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $7, 23(%rax), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:  LBB52_48: ## %else68
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_50
 | 
						|
; AVX2-NEXT:  ## BB#49: ## %cond.load70
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $8, 24(%rax), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:  LBB52_50: ## %else71
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_52
 | 
						|
; AVX2-NEXT:  ## BB#51: ## %cond.load73
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $9, 25(%rax), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:  LBB52_52: ## %else74
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_54
 | 
						|
; AVX2-NEXT:  ## BB#53: ## %cond.load76
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $10, 26(%rax), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:  LBB52_54: ## %else77
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_56
 | 
						|
; AVX2-NEXT:  ## BB#55: ## %cond.load79
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $11, 27(%rax), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:  LBB52_56: ## %else80
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_58
 | 
						|
; AVX2-NEXT:  ## BB#57: ## %cond.load82
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $12, 28(%rax), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:  LBB52_58: ## %else83
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_60
 | 
						|
; AVX2-NEXT:  ## BB#59: ## %cond.load85
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $13, 29(%rax), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:  LBB52_60: ## %else86
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_62
 | 
						|
; AVX2-NEXT:  ## BB#61: ## %cond.load88
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $14, 30(%rax), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:  LBB52_62: ## %else89
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_64
 | 
						|
; AVX2-NEXT:  ## BB#63: ## %cond.load91
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 | 
						|
; AVX2-NEXT:    vpinsrb $15, 31(%rax), %xmm3, %xmm3
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:  LBB52_64: ## %else92
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_66
 | 
						|
; AVX2-NEXT:  ## BB#65: ## %cond.load94
 | 
						|
; AVX2-NEXT:    vpinsrb $0, 32(%rax), %xmm0, %xmm3
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm3[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_66: ## %else95
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_68
 | 
						|
; AVX2-NEXT:  ## BB#67: ## %cond.load97
 | 
						|
; AVX2-NEXT:    vpinsrb $1, 33(%rax), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_68: ## %else98
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_70
 | 
						|
; AVX2-NEXT:  ## BB#69: ## %cond.load100
 | 
						|
; AVX2-NEXT:    vpinsrb $2, 34(%rax), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_70: ## %else101
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_72
 | 
						|
; AVX2-NEXT:  ## BB#71: ## %cond.load103
 | 
						|
; AVX2-NEXT:    vpinsrb $3, 35(%rax), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_72: ## %else104
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_74
 | 
						|
; AVX2-NEXT:  ## BB#73: ## %cond.load106
 | 
						|
; AVX2-NEXT:    vpinsrb $4, 36(%rax), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_74: ## %else107
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_76
 | 
						|
; AVX2-NEXT:  ## BB#75: ## %cond.load109
 | 
						|
; AVX2-NEXT:    vpinsrb $5, 37(%rax), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_76: ## %else110
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_78
 | 
						|
; AVX2-NEXT:  ## BB#77: ## %cond.load112
 | 
						|
; AVX2-NEXT:    vpinsrb $6, 38(%rax), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_78: ## %else113
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_80
 | 
						|
; AVX2-NEXT:  ## BB#79: ## %cond.load115
 | 
						|
; AVX2-NEXT:    vpinsrb $7, 39(%rax), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_80: ## %else116
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_82
 | 
						|
; AVX2-NEXT:  ## BB#81: ## %cond.load118
 | 
						|
; AVX2-NEXT:    vpinsrb $8, 40(%rax), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_82: ## %else119
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_84
 | 
						|
; AVX2-NEXT:  ## BB#83: ## %cond.load121
 | 
						|
; AVX2-NEXT:    vpinsrb $9, 41(%rax), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_84: ## %else122
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_86
 | 
						|
; AVX2-NEXT:  ## BB#85: ## %cond.load124
 | 
						|
; AVX2-NEXT:    vpinsrb $10, 42(%rax), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_86: ## %else125
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_88
 | 
						|
; AVX2-NEXT:  ## BB#87: ## %cond.load127
 | 
						|
; AVX2-NEXT:    vpinsrb $11, 43(%rax), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_88: ## %else128
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_90
 | 
						|
; AVX2-NEXT:  ## BB#89: ## %cond.load130
 | 
						|
; AVX2-NEXT:    vpinsrb $12, 44(%rax), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_90: ## %else131
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_92
 | 
						|
; AVX2-NEXT:  ## BB#91: ## %cond.load133
 | 
						|
; AVX2-NEXT:    vpinsrb $13, 45(%rax), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_92: ## %else134
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_94
 | 
						|
; AVX2-NEXT:  ## BB#93: ## %cond.load136
 | 
						|
; AVX2-NEXT:    vpinsrb $14, 46(%rax), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_94: ## %else137
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_96
 | 
						|
; AVX2-NEXT:  ## BB#95: ## %cond.load139
 | 
						|
; AVX2-NEXT:    vpinsrb $15, 47(%rax), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB52_96: ## %else140
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_98
 | 
						|
; AVX2-NEXT:  ## BB#97: ## %cond.load142
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrb $0, 48(%rax), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB52_98: ## %else143
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_100
 | 
						|
; AVX2-NEXT:  ## BB#99: ## %cond.load145
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrb $1, 49(%rax), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB52_100: ## %else146
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_102
 | 
						|
; AVX2-NEXT:  ## BB#101: ## %cond.load148
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrb $2, 50(%rax), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB52_102: ## %else149
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_104
 | 
						|
; AVX2-NEXT:  ## BB#103: ## %cond.load151
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrb $3, 51(%rax), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB52_104: ## %else152
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_106
 | 
						|
; AVX2-NEXT:  ## BB#105: ## %cond.load154
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrb $4, 52(%rax), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB52_106: ## %else155
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_108
 | 
						|
; AVX2-NEXT:  ## BB#107: ## %cond.load157
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrb $5, 53(%rax), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB52_108: ## %else158
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_110
 | 
						|
; AVX2-NEXT:  ## BB#109: ## %cond.load160
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrb $6, 54(%rax), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB52_110: ## %else161
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_112
 | 
						|
; AVX2-NEXT:  ## BB#111: ## %cond.load163
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrb $7, 55(%rax), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB52_112: ## %else164
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_114
 | 
						|
; AVX2-NEXT:  ## BB#113: ## %cond.load166
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrb $8, 56(%rax), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB52_114: ## %else167
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_116
 | 
						|
; AVX2-NEXT:  ## BB#115: ## %cond.load169
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrb $9, 57(%rax), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB52_116: ## %else170
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_118
 | 
						|
; AVX2-NEXT:  ## BB#117: ## %cond.load172
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrb $10, 58(%rax), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB52_118: ## %else173
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_120
 | 
						|
; AVX2-NEXT:  ## BB#119: ## %cond.load175
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrb $11, 59(%rax), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB52_120: ## %else176
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_122
 | 
						|
; AVX2-NEXT:  ## BB#121: ## %cond.load178
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrb $12, 60(%rax), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB52_122: ## %else179
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    je LBB52_124
 | 
						|
; AVX2-NEXT:  ## BB#123: ## %cond.load181
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrb $13, 61(%rax), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB52_124: ## %else182
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    jne LBB52_126
 | 
						|
; AVX2-NEXT:  ## BB#125:
 | 
						|
; AVX2-NEXT:    movq %rax, %rdi
 | 
						|
; AVX2-NEXT:    jmp LBB52_127
 | 
						|
; AVX2-NEXT:  LBB52_126: ## %cond.load184
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    movq %rax, %rdi
 | 
						|
; AVX2-NEXT:    vpinsrb $14, 62(%rax), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB52_127: ## %else185
 | 
						|
; AVX2-NEXT:    movl %ebp, %eax
 | 
						|
; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 | 
						|
; AVX2-NEXT:    movl %r9d, {{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movl %r8d, (%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movl %edx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movl %esi, %ebp
 | 
						|
; AVX2-NEXT:    je LBB52_129
 | 
						|
; AVX2-NEXT:  ## BB#128: ## %cond.load187
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrb $15, 63(%rdi), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB52_129: ## %else188
 | 
						|
; AVX2-NEXT:    movzbl %r10b, %ecx
 | 
						|
; AVX2-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl %r11b, %ecx
 | 
						|
; AVX2-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
 | 
						|
; AVX2-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
 | 
						|
; AVX2-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
 | 
						|
; AVX2-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
 | 
						|
; AVX2-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl %al, %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl %bl, %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl %r14b, %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl %r12b, %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl %r13b, %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl %r15b, %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r9d
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r8d
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
 | 
						|
; AVX2-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX2-NEXT:    movl -{{[0-9]+}}(%rsp), %edi ## 4-byte Reload
 | 
						|
; AVX2-NEXT:    movzbl %dil, %r13d
 | 
						|
; AVX2-NEXT:    vmovd %r13d, %xmm4
 | 
						|
; AVX2-NEXT:    movzbl %bpl, %ebp
 | 
						|
; AVX2-NEXT:    vpinsrb $1, %ebp, %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload
 | 
						|
; AVX2-NEXT:    movzbl %bpl, %ebp
 | 
						|
; AVX2-NEXT:    vpinsrb $2, %ebp, %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload
 | 
						|
; AVX2-NEXT:    movzbl %bpl, %ebp
 | 
						|
; AVX2-NEXT:    vpinsrb $3, %ebp, %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    movl (%rsp), %ebp ## 4-byte Reload
 | 
						|
; AVX2-NEXT:    movzbl %bpl, %ebp
 | 
						|
; AVX2-NEXT:    vpinsrb $4, %ebp, %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ebp ## 4-byte Reload
 | 
						|
; AVX2-NEXT:    movzbl %bpl, %ebp
 | 
						|
; AVX2-NEXT:    vpinsrb $5, %ebp, %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vmovd -{{[0-9]+}}(%rsp), %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    ## xmm5 = mem[0],zero,zero,zero
 | 
						|
; AVX2-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $3, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vmovd %r12d, %xmm6
 | 
						|
; AVX2-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    vpinsrb $2, %r15d, %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    vpinsrb $3, %r14d, %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    vpinsrb $4, %ebx, %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    vpinsrb $5, %r11d, %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    vpinsrb $6, %r9d, %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    vpinsrb $7, %esi, %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    vpinsrb $8, %eax, %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
 | 
						|
; AVX2-NEXT:    vpinsrb $9, %ecx, %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r9d
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
 | 
						|
; AVX2-NEXT:    vpinsrb $10, %edx, %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
 | 
						|
; AVX2-NEXT:    vpinsrb $11, %r8d, %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r13d
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
 | 
						|
; AVX2-NEXT:    vpinsrb $12, %r10d, %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r8d
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
 | 
						|
; AVX2-NEXT:    vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
 | 
						|
; AVX2-NEXT:    vpinsrb $14, %eax, %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
 | 
						|
; AVX2-NEXT:    vpinsrb $15, %r15d, %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
 | 
						|
; AVX2-NEXT:    vmovd %r12d, %xmm7
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
 | 
						|
; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
 | 
						|
; AVX2-NEXT:    vpinsrb $1, %r9d, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vpinsrb $2, %r11d, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vpinsrb $3, %r14d, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vpinsrb $4, %r13d, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vpinsrb $5, %ecx, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vpinsrb $6, %r8d, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vpinsrb $7, %ebx, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vpinsrb $8, %r10d, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vpinsrb $9, %ebp, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vpinsrb $10, %eax, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vpinsrb $11, %edi, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vpinsrb $12, %r15d, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vpinsrb $13, %esi, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vpinsrb $14, %r12d, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vpinsrb $15, %edx, %xmm7, %xmm7
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm5, %ymm4, %ymm4
 | 
						|
; AVX2-NEXT:    vpsllw $7, %ymm4, %ymm4
 | 
						|
; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
 | 
						|
; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm4
 | 
						|
; AVX2-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm7, %ymm6, %ymm2
 | 
						|
; AVX2-NEXT:    vpsllw $7, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
 | 
						|
; AVX2-NEXT:    vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:    addq $8, %rsp
 | 
						|
; AVX2-NEXT:    popq %rbx
 | 
						|
; AVX2-NEXT:    popq %r12
 | 
						|
; AVX2-NEXT:    popq %r13
 | 
						|
; AVX2-NEXT:    popq %r14
 | 
						|
; AVX2-NEXT:    popq %r15
 | 
						|
; AVX2-NEXT:    popq %rbp
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_mask_load_64xi8:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    pushq %rbp
 | 
						|
; AVX512F-NEXT:  Ltmp0:
 | 
						|
; AVX512F-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; AVX512F-NEXT:    pushq %r15
 | 
						|
; AVX512F-NEXT:  Ltmp1:
 | 
						|
; AVX512F-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; AVX512F-NEXT:    pushq %r14
 | 
						|
; AVX512F-NEXT:  Ltmp2:
 | 
						|
; AVX512F-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; AVX512F-NEXT:    pushq %r13
 | 
						|
; AVX512F-NEXT:  Ltmp3:
 | 
						|
; AVX512F-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; AVX512F-NEXT:    pushq %r12
 | 
						|
; AVX512F-NEXT:  Ltmp4:
 | 
						|
; AVX512F-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; AVX512F-NEXT:    pushq %rbx
 | 
						|
; AVX512F-NEXT:  Ltmp5:
 | 
						|
; AVX512F-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; AVX512F-NEXT:    subq $76, %rsp
 | 
						|
; AVX512F-NEXT:  Ltmp6:
 | 
						|
; AVX512F-NEXT:    .cfi_def_cfa_offset 132
 | 
						|
; AVX512F-NEXT:  Ltmp7:
 | 
						|
; AVX512F-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; AVX512F-NEXT:  Ltmp8:
 | 
						|
; AVX512F-NEXT:    .cfi_offset %r12, -48
 | 
						|
; AVX512F-NEXT:  Ltmp9:
 | 
						|
; AVX512F-NEXT:    .cfi_offset %r13, -40
 | 
						|
; AVX512F-NEXT:  Ltmp10:
 | 
						|
; AVX512F-NEXT:    .cfi_offset %r14, -32
 | 
						|
; AVX512F-NEXT:  Ltmp11:
 | 
						|
; AVX512F-NEXT:    .cfi_offset %r15, -24
 | 
						|
; AVX512F-NEXT:  Ltmp12:
 | 
						|
; AVX512F-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_2
 | 
						|
; AVX512F-NEXT:  ## BB#1: ## %cond.load
 | 
						|
; AVX512F-NEXT:    movzbl (%rdi), %eax
 | 
						|
; AVX512F-NEXT:    vmovd %eax, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB52_2: ## %else
 | 
						|
; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_4
 | 
						|
; AVX512F-NEXT:  ## BB#3: ## %cond.load1
 | 
						|
; AVX512F-NEXT:    vpinsrb $1, 1(%rdi), %xmm0, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_4: ## %else2
 | 
						|
; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_6
 | 
						|
; AVX512F-NEXT:  ## BB#5: ## %cond.load4
 | 
						|
; AVX512F-NEXT:    vpinsrb $2, 2(%rdi), %xmm0, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_6: ## %else5
 | 
						|
; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_8
 | 
						|
; AVX512F-NEXT:  ## BB#7: ## %cond.load7
 | 
						|
; AVX512F-NEXT:    vpinsrb $3, 3(%rdi), %xmm0, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_8: ## %else8
 | 
						|
; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_10
 | 
						|
; AVX512F-NEXT:  ## BB#9: ## %cond.load10
 | 
						|
; AVX512F-NEXT:    vpinsrb $4, 4(%rdi), %xmm0, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_10: ## %else11
 | 
						|
; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_12
 | 
						|
; AVX512F-NEXT:  ## BB#11: ## %cond.load13
 | 
						|
; AVX512F-NEXT:    vpinsrb $5, 5(%rdi), %xmm0, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_12: ## %else14
 | 
						|
; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_14
 | 
						|
; AVX512F-NEXT:  ## BB#13: ## %cond.load16
 | 
						|
; AVX512F-NEXT:    vpinsrb $6, 6(%rdi), %xmm0, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_14: ## %else17
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_16
 | 
						|
; AVX512F-NEXT:  ## BB#15: ## %cond.load19
 | 
						|
; AVX512F-NEXT:    vpinsrb $7, 7(%rdi), %xmm0, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_16: ## %else20
 | 
						|
; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_18
 | 
						|
; AVX512F-NEXT:  ## BB#17: ## %cond.load22
 | 
						|
; AVX512F-NEXT:    vpinsrb $8, 8(%rdi), %xmm0, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_18: ## %else23
 | 
						|
; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, (%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_20
 | 
						|
; AVX512F-NEXT:  ## BB#19: ## %cond.load25
 | 
						|
; AVX512F-NEXT:    vpinsrb $9, 9(%rdi), %xmm0, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_20: ## %else26
 | 
						|
; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_22
 | 
						|
; AVX512F-NEXT:  ## BB#21: ## %cond.load28
 | 
						|
; AVX512F-NEXT:    vpinsrb $10, 10(%rdi), %xmm0, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_22: ## %else29
 | 
						|
; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_24
 | 
						|
; AVX512F-NEXT:  ## BB#23: ## %cond.load31
 | 
						|
; AVX512F-NEXT:    vpinsrb $11, 11(%rdi), %xmm0, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_24: ## %else32
 | 
						|
; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_26
 | 
						|
; AVX512F-NEXT:  ## BB#25: ## %cond.load34
 | 
						|
; AVX512F-NEXT:    vpinsrb $12, 12(%rdi), %xmm0, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_26: ## %else35
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_28
 | 
						|
; AVX512F-NEXT:  ## BB#27: ## %cond.load37
 | 
						|
; AVX512F-NEXT:    vpinsrb $13, 13(%rdi), %xmm0, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_28: ## %else38
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_30
 | 
						|
; AVX512F-NEXT:  ## BB#29: ## %cond.load40
 | 
						|
; AVX512F-NEXT:    vpinsrb $14, 14(%rdi), %xmm0, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_30: ## %else41
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k1
 | 
						|
; AVX512F-NEXT:    kshiftlw $0, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_32
 | 
						|
; AVX512F-NEXT:  ## BB#31: ## %cond.load43
 | 
						|
; AVX512F-NEXT:    vpinsrb $15, 15(%rdi), %xmm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_32: ## %else44
 | 
						|
; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_34
 | 
						|
; AVX512F-NEXT:  ## BB#33: ## %cond.load46
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrb $0, 16(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB52_34: ## %else47
 | 
						|
; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_36
 | 
						|
; AVX512F-NEXT:  ## BB#35: ## %cond.load49
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrb $1, 17(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB52_36: ## %else50
 | 
						|
; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_38
 | 
						|
; AVX512F-NEXT:  ## BB#37: ## %cond.load52
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrb $2, 18(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB52_38: ## %else53
 | 
						|
; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_40
 | 
						|
; AVX512F-NEXT:  ## BB#39: ## %cond.load55
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrb $3, 19(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB52_40: ## %else56
 | 
						|
; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_42
 | 
						|
; AVX512F-NEXT:  ## BB#41: ## %cond.load58
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrb $4, 20(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB52_42: ## %else59
 | 
						|
; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_44
 | 
						|
; AVX512F-NEXT:  ## BB#43: ## %cond.load61
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrb $5, 21(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB52_44: ## %else62
 | 
						|
; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_46
 | 
						|
; AVX512F-NEXT:  ## BB#45: ## %cond.load64
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrb $6, 22(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB52_46: ## %else65
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_48
 | 
						|
; AVX512F-NEXT:  ## BB#47: ## %cond.load67
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrb $7, 23(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB52_48: ## %else68
 | 
						|
; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_50
 | 
						|
; AVX512F-NEXT:  ## BB#49: ## %cond.load70
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrb $8, 24(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB52_50: ## %else71
 | 
						|
; AVX512F-NEXT:    kshiftlw $6, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_52
 | 
						|
; AVX512F-NEXT:  ## BB#51: ## %cond.load73
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrb $9, 25(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB52_52: ## %else74
 | 
						|
; AVX512F-NEXT:    kshiftlw $5, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_54
 | 
						|
; AVX512F-NEXT:  ## BB#53: ## %cond.load76
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrb $10, 26(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB52_54: ## %else77
 | 
						|
; AVX512F-NEXT:    kshiftlw $4, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_56
 | 
						|
; AVX512F-NEXT:  ## BB#55: ## %cond.load79
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrb $11, 27(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB52_56: ## %else80
 | 
						|
; AVX512F-NEXT:    kshiftlw $3, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_58
 | 
						|
; AVX512F-NEXT:  ## BB#57: ## %cond.load82
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrb $12, 28(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB52_58: ## %else83
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm2, %zmm1
 | 
						|
; AVX512F-NEXT:    kshiftlw $2, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_60
 | 
						|
; AVX512F-NEXT:  ## BB#59: ## %cond.load85
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $13, 29(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB52_60: ## %else86
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; AVX512F-NEXT:    kshiftlw $1, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_62
 | 
						|
; AVX512F-NEXT:  ## BB#61: ## %cond.load88
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $14, 30(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB52_62: ## %else89
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftlw $0, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_64
 | 
						|
; AVX512F-NEXT:  ## BB#63: ## %cond.load91
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrb $15, 31(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB52_64: ## %else92
 | 
						|
; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_66
 | 
						|
; AVX512F-NEXT:  ## BB#65: ## %cond.load94
 | 
						|
; AVX512F-NEXT:    vpinsrb $0, 32(%rdi), %xmm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_66: ## %else95
 | 
						|
; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_68
 | 
						|
; AVX512F-NEXT:  ## BB#67: ## %cond.load97
 | 
						|
; AVX512F-NEXT:    vpinsrb $1, 33(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_68: ## %else98
 | 
						|
; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_70
 | 
						|
; AVX512F-NEXT:  ## BB#69: ## %cond.load100
 | 
						|
; AVX512F-NEXT:    vpinsrb $2, 34(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_70: ## %else101
 | 
						|
; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_72
 | 
						|
; AVX512F-NEXT:  ## BB#71: ## %cond.load103
 | 
						|
; AVX512F-NEXT:    vpinsrb $3, 35(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_72: ## %else104
 | 
						|
; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_74
 | 
						|
; AVX512F-NEXT:  ## BB#73: ## %cond.load106
 | 
						|
; AVX512F-NEXT:    vpinsrb $4, 36(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_74: ## %else107
 | 
						|
; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_76
 | 
						|
; AVX512F-NEXT:  ## BB#75: ## %cond.load109
 | 
						|
; AVX512F-NEXT:    vpinsrb $5, 37(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_76: ## %else110
 | 
						|
; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_78
 | 
						|
; AVX512F-NEXT:  ## BB#77: ## %cond.load112
 | 
						|
; AVX512F-NEXT:    vpinsrb $6, 38(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_78: ## %else113
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_80
 | 
						|
; AVX512F-NEXT:  ## BB#79: ## %cond.load115
 | 
						|
; AVX512F-NEXT:    vpinsrb $7, 39(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_80: ## %else116
 | 
						|
; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_82
 | 
						|
; AVX512F-NEXT:  ## BB#81: ## %cond.load118
 | 
						|
; AVX512F-NEXT:    vpinsrb $8, 40(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_82: ## %else119
 | 
						|
; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_84
 | 
						|
; AVX512F-NEXT:  ## BB#83: ## %cond.load121
 | 
						|
; AVX512F-NEXT:    vpinsrb $9, 41(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_84: ## %else122
 | 
						|
; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_86
 | 
						|
; AVX512F-NEXT:  ## BB#85: ## %cond.load124
 | 
						|
; AVX512F-NEXT:    vpinsrb $10, 42(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_86: ## %else125
 | 
						|
; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_88
 | 
						|
; AVX512F-NEXT:  ## BB#87: ## %cond.load127
 | 
						|
; AVX512F-NEXT:    vpinsrb $11, 43(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_88: ## %else128
 | 
						|
; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_90
 | 
						|
; AVX512F-NEXT:  ## BB#89: ## %cond.load130
 | 
						|
; AVX512F-NEXT:    vpinsrb $12, 44(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_90: ## %else131
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm3, %zmm2
 | 
						|
; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_92
 | 
						|
; AVX512F-NEXT:  ## BB#91: ## %cond.load133
 | 
						|
; AVX512F-NEXT:    vpinsrb $13, 45(%rdi), %xmm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_92: ## %else134
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_94
 | 
						|
; AVX512F-NEXT:  ## BB#93: ## %cond.load136
 | 
						|
; AVX512F-NEXT:    vpinsrb $14, 46(%rdi), %xmm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_94: ## %else137
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm2, %zmm2, %k1
 | 
						|
; AVX512F-NEXT:    kshiftlw $0, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_96
 | 
						|
; AVX512F-NEXT:  ## BB#95: ## %cond.load139
 | 
						|
; AVX512F-NEXT:    vpinsrb $15, 47(%rdi), %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB52_96: ## %else140
 | 
						|
; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_98
 | 
						|
; AVX512F-NEXT:  ## BB#97: ## %cond.load142
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $0, 48(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB52_98: ## %else143
 | 
						|
; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_100
 | 
						|
; AVX512F-NEXT:  ## BB#99: ## %cond.load145
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $1, 49(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB52_100: ## %else146
 | 
						|
; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_102
 | 
						|
; AVX512F-NEXT:  ## BB#101: ## %cond.load148
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $2, 50(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB52_102: ## %else149
 | 
						|
; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_104
 | 
						|
; AVX512F-NEXT:  ## BB#103: ## %cond.load151
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $3, 51(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB52_104: ## %else152
 | 
						|
; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_106
 | 
						|
; AVX512F-NEXT:  ## BB#105: ## %cond.load154
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $4, 52(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB52_106: ## %else155
 | 
						|
; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_108
 | 
						|
; AVX512F-NEXT:  ## BB#107: ## %cond.load157
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $5, 53(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB52_108: ## %else158
 | 
						|
; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_110
 | 
						|
; AVX512F-NEXT:  ## BB#109: ## %cond.load160
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $6, 54(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB52_110: ## %else161
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_112
 | 
						|
; AVX512F-NEXT:  ## BB#111: ## %cond.load163
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $7, 55(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB52_112: ## %else164
 | 
						|
; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_114
 | 
						|
; AVX512F-NEXT:  ## BB#113: ## %cond.load166
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $8, 56(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB52_114: ## %else167
 | 
						|
; AVX512F-NEXT:    kshiftlw $6, %k1, %k2
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k2, %k2
 | 
						|
; AVX512F-NEXT:    kmovw %k2, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_116
 | 
						|
; AVX512F-NEXT:  ## BB#115: ## %cond.load169
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $9, 57(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB52_116: ## %else170
 | 
						|
; AVX512F-NEXT:    kshiftlw $5, %k1, %k3
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k3, %k3
 | 
						|
; AVX512F-NEXT:    kmovw %k3, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_118
 | 
						|
; AVX512F-NEXT:  ## BB#117: ## %cond.load172
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $10, 58(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB52_118: ## %else173
 | 
						|
; AVX512F-NEXT:    kshiftlw $4, %k1, %k4
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k4, %k4
 | 
						|
; AVX512F-NEXT:    kmovw %k4, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_120
 | 
						|
; AVX512F-NEXT:  ## BB#119: ## %cond.load175
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $11, 59(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB52_120: ## %else176
 | 
						|
; AVX512F-NEXT:    kshiftlw $3, %k1, %k5
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k5, %k5
 | 
						|
; AVX512F-NEXT:    kmovw %k5, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_122
 | 
						|
; AVX512F-NEXT:  ## BB#121: ## %cond.load178
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $12, 60(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB52_122: ## %else179
 | 
						|
; AVX512F-NEXT:    kshiftlw $2, %k1, %k6
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k6, %k6
 | 
						|
; AVX512F-NEXT:    kmovw %k6, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_124
 | 
						|
; AVX512F-NEXT:  ## BB#123: ## %cond.load181
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $13, 61(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB52_124: ## %else182
 | 
						|
; AVX512F-NEXT:    kshiftlw $1, %k1, %k7
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k7, %k7
 | 
						|
; AVX512F-NEXT:    kmovw %k7, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_126
 | 
						|
; AVX512F-NEXT:  ## BB#125: ## %cond.load184
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $14, 62(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB52_126: ## %else185
 | 
						|
; AVX512F-NEXT:    kshiftlw $0, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB52_128
 | 
						|
; AVX512F-NEXT:  ## BB#127: ## %cond.load187
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $15, 63(%rdi), %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:  LBB52_128: ## %else188
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw (%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, (%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k2, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw %k3, %r12d
 | 
						|
; AVX512F-NEXT:    kmovw %k4, %r15d
 | 
						|
; AVX512F-NEXT:    kmovw %k5, %r14d
 | 
						|
; AVX512F-NEXT:    kmovw %k6, %ebx
 | 
						|
; AVX512F-NEXT:    kmovw %k7, %r11d
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %r10d
 | 
						|
; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %r8d
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %r9d
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %edi
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %esi
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %edx
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %ecx
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    movl -{{[0-9]+}}(%rsp), %r13d ## 4-byte Reload
 | 
						|
; AVX512F-NEXT:    vmovd %r13d, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $3, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    movl {{[0-9]+}}(%rsp), %ebp ## 4-byte Reload
 | 
						|
; AVX512F-NEXT:    vmovd %ebp, %xmm3
 | 
						|
; AVX512F-NEXT:    vpinsrb $1, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $2, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $3, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $4, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $5, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $7, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $9, (%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload
 | 
						|
; AVX512F-NEXT:    vmovd %ebp, %xmm6
 | 
						|
; AVX512F-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $3, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $9, {{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %r13d
 | 
						|
; AVX512F-NEXT:    vpinsrb $10, %r12d, %xmm6, %xmm6
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %r12d
 | 
						|
; AVX512F-NEXT:    vpinsrb $11, %r15d, %xmm6, %xmm6
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %r15d
 | 
						|
; AVX512F-NEXT:    vpinsrb $12, %r14d, %xmm6, %xmm6
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %r14d
 | 
						|
; AVX512F-NEXT:    vpinsrb $13, %ebx, %xmm6, %xmm6
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %ebx
 | 
						|
; AVX512F-NEXT:    vpinsrb $14, %r11d, %xmm6, %xmm6
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %r11d
 | 
						|
; AVX512F-NEXT:    vpinsrb $15, %r10d, %xmm6, %xmm6
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %r10d
 | 
						|
; AVX512F-NEXT:    vmovd %r8d, %xmm7
 | 
						|
; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %r8d
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm3, %ymm2
 | 
						|
; AVX512F-NEXT:    vpsllw $7, %ymm2, %ymm2
 | 
						|
; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
 | 
						|
; AVX512F-NEXT:    vpand %ymm3, %ymm2, %ymm2
 | 
						|
; AVX512F-NEXT:    vpblendvb %ymm2, %ymm0, %ymm4, %ymm0
 | 
						|
; AVX512F-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm7, %xmm2 ## 4-byte Folded Reload
 | 
						|
; AVX512F-NEXT:    vpinsrb $2, %r9d, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $3, %edi, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $4, %esi, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $5, %edx, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $6, %ecx, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $8, %r13d, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $9, %r12d, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $10, %r15d, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $11, %r14d, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $12, %ebx, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $13, %r11d, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $14, %r10d, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpinsrb $15, %r8d, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm2, %ymm2
 | 
						|
; AVX512F-NEXT:    vpsllw $7, %ymm2, %ymm2
 | 
						|
; AVX512F-NEXT:    vpand %ymm3, %ymm2, %ymm2
 | 
						|
; AVX512F-NEXT:    vpblendvb %ymm2, %ymm1, %ymm5, %ymm1
 | 
						|
; AVX512F-NEXT:    addq $76, %rsp
 | 
						|
; AVX512F-NEXT:    popq %rbx
 | 
						|
; AVX512F-NEXT:    popq %r12
 | 
						|
; AVX512F-NEXT:    popq %r13
 | 
						|
; AVX512F-NEXT:    popq %r14
 | 
						|
; AVX512F-NEXT:    popq %r15
 | 
						|
; AVX512F-NEXT:    popq %rbp
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_mask_load_64xi8:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $7, %zmm0, %zmm0
 | 
						|
; SKX-NEXT:    vpmovb2m %zmm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu8 (%rdi), %zmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* %addr, i32 4, <64 x i1>%mask, <64 x i8> %val)
 | 
						|
  ret <64 x i8> %res
 | 
						|
}
 | 
						|
declare <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>*, i32, <64 x i1>, <64 x i8>)
 | 
						|
 | 
						|
define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) {
 | 
						|
; AVX-LABEL: test_mask_load_8xi16:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX-NEXT:    ## implicit-def: %XMM1
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB53_2
 | 
						|
; AVX-NEXT:  ## BB#1: ## %cond.load
 | 
						|
; AVX-NEXT:    movzwl (%rdi), %eax
 | 
						|
; AVX-NEXT:    vmovd %eax, %xmm1
 | 
						|
; AVX-NEXT:  LBB53_2: ## %else
 | 
						|
; AVX-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB53_4
 | 
						|
; AVX-NEXT:  ## BB#3: ## %cond.load1
 | 
						|
; AVX-NEXT:    vpinsrw $1, 2(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB53_4: ## %else2
 | 
						|
; AVX-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB53_6
 | 
						|
; AVX-NEXT:  ## BB#5: ## %cond.load4
 | 
						|
; AVX-NEXT:    vpinsrw $2, 4(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB53_6: ## %else5
 | 
						|
; AVX-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB53_8
 | 
						|
; AVX-NEXT:  ## BB#7: ## %cond.load7
 | 
						|
; AVX-NEXT:    vpinsrw $3, 6(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB53_8: ## %else8
 | 
						|
; AVX-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB53_10
 | 
						|
; AVX-NEXT:  ## BB#9: ## %cond.load10
 | 
						|
; AVX-NEXT:    vpinsrw $4, 8(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB53_10: ## %else11
 | 
						|
; AVX-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB53_12
 | 
						|
; AVX-NEXT:  ## BB#11: ## %cond.load13
 | 
						|
; AVX-NEXT:    vpinsrw $5, 10(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB53_12: ## %else14
 | 
						|
; AVX-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB53_14
 | 
						|
; AVX-NEXT:  ## BB#13: ## %cond.load16
 | 
						|
; AVX-NEXT:    vpinsrw $6, 12(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB53_14: ## %else17
 | 
						|
; AVX-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB53_16
 | 
						|
; AVX-NEXT:  ## BB#15: ## %cond.load19
 | 
						|
; AVX-NEXT:    vpinsrw $7, 14(%rdi), %xmm1, %xmm1
 | 
						|
; AVX-NEXT:  LBB53_16: ## %else20
 | 
						|
; AVX-NEXT:    vpsllw $15, %xmm0, %xmm0
 | 
						|
; AVX-NEXT:    vpsraw $15, %xmm0, %xmm0
 | 
						|
; AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
 | 
						|
; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm2
 | 
						|
; AVX-NEXT:    vpand %xmm0, %xmm1, %xmm0
 | 
						|
; AVX-NEXT:    vpor %xmm2, %xmm0, %xmm0
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_mask_load_8xi16:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    ## implicit-def: %XMM0
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB53_2
 | 
						|
; AVX512F-NEXT:  ## BB#1: ## %cond.load
 | 
						|
; AVX512F-NEXT:    movzwl (%rdi), %eax
 | 
						|
; AVX512F-NEXT:    vmovd %eax, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB53_2: ## %else
 | 
						|
; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB53_4
 | 
						|
; AVX512F-NEXT:  ## BB#3: ## %cond.load1
 | 
						|
; AVX512F-NEXT:    vpinsrw $1, 2(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB53_4: ## %else2
 | 
						|
; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB53_6
 | 
						|
; AVX512F-NEXT:  ## BB#5: ## %cond.load4
 | 
						|
; AVX512F-NEXT:    vpinsrw $2, 4(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB53_6: ## %else5
 | 
						|
; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB53_8
 | 
						|
; AVX512F-NEXT:  ## BB#7: ## %cond.load7
 | 
						|
; AVX512F-NEXT:    vpinsrw $3, 6(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB53_8: ## %else8
 | 
						|
; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB53_10
 | 
						|
; AVX512F-NEXT:  ## BB#9: ## %cond.load10
 | 
						|
; AVX512F-NEXT:    vpinsrw $4, 8(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB53_10: ## %else11
 | 
						|
; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB53_12
 | 
						|
; AVX512F-NEXT:  ## BB#11: ## %cond.load13
 | 
						|
; AVX512F-NEXT:    vpinsrw $5, 10(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB53_12: ## %else14
 | 
						|
; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB53_14
 | 
						|
; AVX512F-NEXT:  ## BB#13: ## %cond.load16
 | 
						|
; AVX512F-NEXT:    vpinsrw $6, 12(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB53_14: ## %else17
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB53_16
 | 
						|
; AVX512F-NEXT:  ## BB#15: ## %cond.load19
 | 
						|
; AVX512F-NEXT:    vpinsrw $7, 14(%rdi), %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB53_16: ## %else20
 | 
						|
; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
 | 
						|
; AVX512F-NEXT:    vmovdqa64 %zmm1, %zmm1 {%k1} {z}
 | 
						|
; AVX512F-NEXT:    vpmovqw %zmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
 | 
						|
; AVX512F-NEXT:    vpxor %xmm2, %xmm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpand %xmm1, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vpor %xmm2, %xmm0, %xmm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_mask_load_8xi16:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovw2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu16 (%rdi), %xmm0 {%k1} {z}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %addr, i32 4, <8 x i1>%mask, <8 x i16> undef)
 | 
						|
  ret <8 x i16> %res
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
 | 
						|
 | 
						|
define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) {
 | 
						|
; AVX1-LABEL: test_mask_load_16xi16:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    ## implicit-def: %YMM1
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB54_2
 | 
						|
; AVX1-NEXT:  ## BB#1: ## %cond.load
 | 
						|
; AVX1-NEXT:    movzwl (%rdi), %eax
 | 
						|
; AVX1-NEXT:    vmovd %eax, %xmm1
 | 
						|
; AVX1-NEXT:  LBB54_2: ## %else
 | 
						|
; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB54_4
 | 
						|
; AVX1-NEXT:  ## BB#3: ## %cond.load1
 | 
						|
; AVX1-NEXT:    vpinsrw $1, 2(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB54_4: ## %else2
 | 
						|
; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB54_6
 | 
						|
; AVX1-NEXT:  ## BB#5: ## %cond.load4
 | 
						|
; AVX1-NEXT:    vpinsrw $2, 4(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB54_6: ## %else5
 | 
						|
; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB54_8
 | 
						|
; AVX1-NEXT:  ## BB#7: ## %cond.load7
 | 
						|
; AVX1-NEXT:    vpinsrw $3, 6(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB54_8: ## %else8
 | 
						|
; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB54_10
 | 
						|
; AVX1-NEXT:  ## BB#9: ## %cond.load10
 | 
						|
; AVX1-NEXT:    vpinsrw $4, 8(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB54_10: ## %else11
 | 
						|
; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB54_12
 | 
						|
; AVX1-NEXT:  ## BB#11: ## %cond.load13
 | 
						|
; AVX1-NEXT:    vpinsrw $5, 10(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB54_12: ## %else14
 | 
						|
; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB54_14
 | 
						|
; AVX1-NEXT:  ## BB#13: ## %cond.load16
 | 
						|
; AVX1-NEXT:    vpinsrw $6, 12(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB54_14: ## %else17
 | 
						|
; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB54_16
 | 
						|
; AVX1-NEXT:  ## BB#15: ## %cond.load19
 | 
						|
; AVX1-NEXT:    vpinsrw $7, 14(%rdi), %xmm1, %xmm2
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB54_16: ## %else20
 | 
						|
; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB54_18
 | 
						|
; AVX1-NEXT:  ## BB#17: ## %cond.load22
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpinsrw $0, 16(%rdi), %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB54_18: ## %else23
 | 
						|
; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB54_20
 | 
						|
; AVX1-NEXT:  ## BB#19: ## %cond.load25
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpinsrw $1, 18(%rdi), %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB54_20: ## %else26
 | 
						|
; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB54_22
 | 
						|
; AVX1-NEXT:  ## BB#21: ## %cond.load28
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpinsrw $2, 20(%rdi), %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB54_22: ## %else29
 | 
						|
; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB54_24
 | 
						|
; AVX1-NEXT:  ## BB#23: ## %cond.load31
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpinsrw $3, 22(%rdi), %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB54_24: ## %else32
 | 
						|
; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB54_26
 | 
						|
; AVX1-NEXT:  ## BB#25: ## %cond.load34
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpinsrw $4, 24(%rdi), %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB54_26: ## %else35
 | 
						|
; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB54_28
 | 
						|
; AVX1-NEXT:  ## BB#27: ## %cond.load37
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpinsrw $5, 26(%rdi), %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB54_28: ## %else38
 | 
						|
; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB54_30
 | 
						|
; AVX1-NEXT:  ## BB#29: ## %cond.load40
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpinsrw $6, 28(%rdi), %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB54_30: ## %else41
 | 
						|
; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB54_32
 | 
						|
; AVX1-NEXT:  ## BB#31: ## %cond.load43
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpinsrw $7, 30(%rdi), %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:  LBB54_32: ## %else44
 | 
						|
; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 | 
						|
; AVX1-NEXT:    vpsllw $15, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpsraw $15, %xmm2, %xmm2
 | 
						|
; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 | 
						|
; AVX1-NEXT:    vpsllw $15, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
 | 
						|
; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test_mask_load_16xi16:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    ## implicit-def: %YMM1
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB54_2
 | 
						|
; AVX2-NEXT:  ## BB#1: ## %cond.load
 | 
						|
; AVX2-NEXT:    movzwl (%rdi), %eax
 | 
						|
; AVX2-NEXT:    vmovd %eax, %xmm1
 | 
						|
; AVX2-NEXT:  LBB54_2: ## %else
 | 
						|
; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB54_4
 | 
						|
; AVX2-NEXT:  ## BB#3: ## %cond.load1
 | 
						|
; AVX2-NEXT:    vpinsrw $1, 2(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB54_4: ## %else2
 | 
						|
; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB54_6
 | 
						|
; AVX2-NEXT:  ## BB#5: ## %cond.load4
 | 
						|
; AVX2-NEXT:    vpinsrw $2, 4(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB54_6: ## %else5
 | 
						|
; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB54_8
 | 
						|
; AVX2-NEXT:  ## BB#7: ## %cond.load7
 | 
						|
; AVX2-NEXT:    vpinsrw $3, 6(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB54_8: ## %else8
 | 
						|
; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB54_10
 | 
						|
; AVX2-NEXT:  ## BB#9: ## %cond.load10
 | 
						|
; AVX2-NEXT:    vpinsrw $4, 8(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB54_10: ## %else11
 | 
						|
; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB54_12
 | 
						|
; AVX2-NEXT:  ## BB#11: ## %cond.load13
 | 
						|
; AVX2-NEXT:    vpinsrw $5, 10(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB54_12: ## %else14
 | 
						|
; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB54_14
 | 
						|
; AVX2-NEXT:  ## BB#13: ## %cond.load16
 | 
						|
; AVX2-NEXT:    vpinsrw $6, 12(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB54_14: ## %else17
 | 
						|
; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB54_16
 | 
						|
; AVX2-NEXT:  ## BB#15: ## %cond.load19
 | 
						|
; AVX2-NEXT:    vpinsrw $7, 14(%rdi), %xmm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB54_16: ## %else20
 | 
						|
; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB54_18
 | 
						|
; AVX2-NEXT:  ## BB#17: ## %cond.load22
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpinsrw $0, 16(%rdi), %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB54_18: ## %else23
 | 
						|
; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB54_20
 | 
						|
; AVX2-NEXT:  ## BB#19: ## %cond.load25
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpinsrw $1, 18(%rdi), %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB54_20: ## %else26
 | 
						|
; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB54_22
 | 
						|
; AVX2-NEXT:  ## BB#21: ## %cond.load28
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpinsrw $2, 20(%rdi), %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB54_22: ## %else29
 | 
						|
; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB54_24
 | 
						|
; AVX2-NEXT:  ## BB#23: ## %cond.load31
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpinsrw $3, 22(%rdi), %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB54_24: ## %else32
 | 
						|
; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB54_26
 | 
						|
; AVX2-NEXT:  ## BB#25: ## %cond.load34
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpinsrw $4, 24(%rdi), %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB54_26: ## %else35
 | 
						|
; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB54_28
 | 
						|
; AVX2-NEXT:  ## BB#27: ## %cond.load37
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpinsrw $5, 26(%rdi), %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB54_28: ## %else38
 | 
						|
; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB54_30
 | 
						|
; AVX2-NEXT:  ## BB#29: ## %cond.load40
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpinsrw $6, 28(%rdi), %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB54_30: ## %else41
 | 
						|
; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB54_32
 | 
						|
; AVX2-NEXT:  ## BB#31: ## %cond.load43
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpinsrw $7, 30(%rdi), %xmm2, %xmm2
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:  LBB54_32: ## %else44
 | 
						|
; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
 | 
						|
; AVX2-NEXT:    vpsllw $15, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_mask_load_16xi16:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    ## implicit-def: %YMM0
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB54_2
 | 
						|
; AVX512F-NEXT:  ## BB#1: ## %cond.load
 | 
						|
; AVX512F-NEXT:    movzwl (%rdi), %eax
 | 
						|
; AVX512F-NEXT:    vmovd %eax, %xmm0
 | 
						|
; AVX512F-NEXT:  LBB54_2: ## %else
 | 
						|
; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB54_4
 | 
						|
; AVX512F-NEXT:  ## BB#3: ## %cond.load1
 | 
						|
; AVX512F-NEXT:    vpinsrw $1, 2(%rdi), %xmm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB54_4: ## %else2
 | 
						|
; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB54_6
 | 
						|
; AVX512F-NEXT:  ## BB#5: ## %cond.load4
 | 
						|
; AVX512F-NEXT:    vpinsrw $2, 4(%rdi), %xmm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB54_6: ## %else5
 | 
						|
; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB54_8
 | 
						|
; AVX512F-NEXT:  ## BB#7: ## %cond.load7
 | 
						|
; AVX512F-NEXT:    vpinsrw $3, 6(%rdi), %xmm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB54_8: ## %else8
 | 
						|
; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB54_10
 | 
						|
; AVX512F-NEXT:  ## BB#9: ## %cond.load10
 | 
						|
; AVX512F-NEXT:    vpinsrw $4, 8(%rdi), %xmm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB54_10: ## %else11
 | 
						|
; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB54_12
 | 
						|
; AVX512F-NEXT:  ## BB#11: ## %cond.load13
 | 
						|
; AVX512F-NEXT:    vpinsrw $5, 10(%rdi), %xmm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB54_12: ## %else14
 | 
						|
; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB54_14
 | 
						|
; AVX512F-NEXT:  ## BB#13: ## %cond.load16
 | 
						|
; AVX512F-NEXT:    vpinsrw $6, 12(%rdi), %xmm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB54_14: ## %else17
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB54_16
 | 
						|
; AVX512F-NEXT:  ## BB#15: ## %cond.load19
 | 
						|
; AVX512F-NEXT:    vpinsrw $7, 14(%rdi), %xmm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB54_16: ## %else20
 | 
						|
; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB54_18
 | 
						|
; AVX512F-NEXT:  ## BB#17: ## %cond.load22
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrw $0, 16(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB54_18: ## %else23
 | 
						|
; AVX512F-NEXT:    kshiftlw $6, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB54_20
 | 
						|
; AVX512F-NEXT:  ## BB#19: ## %cond.load25
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrw $1, 18(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB54_20: ## %else26
 | 
						|
; AVX512F-NEXT:    kshiftlw $5, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB54_22
 | 
						|
; AVX512F-NEXT:  ## BB#21: ## %cond.load28
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrw $2, 20(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB54_22: ## %else29
 | 
						|
; AVX512F-NEXT:    kshiftlw $4, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB54_24
 | 
						|
; AVX512F-NEXT:  ## BB#23: ## %cond.load31
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrw $3, 22(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB54_24: ## %else32
 | 
						|
; AVX512F-NEXT:    kshiftlw $3, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB54_26
 | 
						|
; AVX512F-NEXT:  ## BB#25: ## %cond.load34
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrw $4, 24(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB54_26: ## %else35
 | 
						|
; AVX512F-NEXT:    kshiftlw $2, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB54_28
 | 
						|
; AVX512F-NEXT:  ## BB#27: ## %cond.load37
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrw $5, 26(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB54_28: ## %else38
 | 
						|
; AVX512F-NEXT:    kshiftlw $1, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB54_30
 | 
						|
; AVX512F-NEXT:  ## BB#29: ## %cond.load40
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrw $6, 28(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB54_30: ## %else41
 | 
						|
; AVX512F-NEXT:    kshiftlw $0, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB54_32
 | 
						|
; AVX512F-NEXT:  ## BB#31: ## %cond.load43
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
 | 
						|
; AVX512F-NEXT:    vpinsrw $7, 30(%rdi), %xmm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:  LBB54_32: ## %else44
 | 
						|
; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
 | 
						|
; AVX512F-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
 | 
						|
; AVX512F-NEXT:    vpmovdw %zmm1, %ymm1
 | 
						|
; AVX512F-NEXT:    vpand %ymm0, %ymm1, %ymm0
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_mask_load_16xi16:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovb2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu16 (%rdi), %ymm0 {%k1} {z}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* %addr, i32 4, <16 x i1>%mask, <16 x i16> zeroinitializer)
 | 
						|
  ret <16 x i16> %res
 | 
						|
}
 | 
						|
declare <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>)
 | 
						|
 | 
						|
define <32 x i16> @test_mask_load_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i16> %val) {
 | 
						|
; AVX1-LABEL: test_mask_load_32xi16:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_2
 | 
						|
; AVX1-NEXT:  ## BB#1: ## %cond.load
 | 
						|
; AVX1-NEXT:    movzwl (%rdi), %eax
 | 
						|
; AVX1-NEXT:    vmovd %eax, %xmm3
 | 
						|
; AVX1-NEXT:  LBB55_2: ## %else
 | 
						|
; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_4
 | 
						|
; AVX1-NEXT:  ## BB#3: ## %cond.load1
 | 
						|
; AVX1-NEXT:    vpinsrw $1, 2(%rdi), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB55_4: ## %else2
 | 
						|
; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_6
 | 
						|
; AVX1-NEXT:  ## BB#5: ## %cond.load4
 | 
						|
; AVX1-NEXT:    vpinsrw $2, 4(%rdi), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB55_6: ## %else5
 | 
						|
; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_8
 | 
						|
; AVX1-NEXT:  ## BB#7: ## %cond.load7
 | 
						|
; AVX1-NEXT:    vpinsrw $3, 6(%rdi), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB55_8: ## %else8
 | 
						|
; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_10
 | 
						|
; AVX1-NEXT:  ## BB#9: ## %cond.load10
 | 
						|
; AVX1-NEXT:    vpinsrw $4, 8(%rdi), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB55_10: ## %else11
 | 
						|
; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_12
 | 
						|
; AVX1-NEXT:  ## BB#11: ## %cond.load13
 | 
						|
; AVX1-NEXT:    vpinsrw $5, 10(%rdi), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB55_12: ## %else14
 | 
						|
; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_14
 | 
						|
; AVX1-NEXT:  ## BB#13: ## %cond.load16
 | 
						|
; AVX1-NEXT:    vpinsrw $6, 12(%rdi), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB55_14: ## %else17
 | 
						|
; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_16
 | 
						|
; AVX1-NEXT:  ## BB#15: ## %cond.load19
 | 
						|
; AVX1-NEXT:    vpinsrw $7, 14(%rdi), %xmm3, %xmm4
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB55_16: ## %else20
 | 
						|
; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_18
 | 
						|
; AVX1-NEXT:  ## BB#17: ## %cond.load22
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrw $0, 16(%rdi), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB55_18: ## %else23
 | 
						|
; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_20
 | 
						|
; AVX1-NEXT:  ## BB#19: ## %cond.load25
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrw $1, 18(%rdi), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB55_20: ## %else26
 | 
						|
; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_22
 | 
						|
; AVX1-NEXT:  ## BB#21: ## %cond.load28
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrw $2, 20(%rdi), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB55_22: ## %else29
 | 
						|
; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_24
 | 
						|
; AVX1-NEXT:  ## BB#23: ## %cond.load31
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrw $3, 22(%rdi), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB55_24: ## %else32
 | 
						|
; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_26
 | 
						|
; AVX1-NEXT:  ## BB#25: ## %cond.load34
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrw $4, 24(%rdi), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB55_26: ## %else35
 | 
						|
; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_28
 | 
						|
; AVX1-NEXT:  ## BB#27: ## %cond.load37
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrw $5, 26(%rdi), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB55_28: ## %else38
 | 
						|
; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_30
 | 
						|
; AVX1-NEXT:  ## BB#29: ## %cond.load40
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrw $6, 28(%rdi), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB55_30: ## %else41
 | 
						|
; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_32
 | 
						|
; AVX1-NEXT:  ## BB#31: ## %cond.load43
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 | 
						|
; AVX1-NEXT:    vpinsrw $7, 30(%rdi), %xmm4, %xmm4
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX1-NEXT:  LBB55_32: ## %else44
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
 | 
						|
; AVX1-NEXT:    vpextrb $0, %xmm4, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_34
 | 
						|
; AVX1-NEXT:  ## BB#33: ## %cond.load46
 | 
						|
; AVX1-NEXT:    vpinsrw $0, 32(%rdi), %xmm0, %xmm5
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm5 = ymm5[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB55_34: ## %else47
 | 
						|
; AVX1-NEXT:    vpextrb $1, %xmm4, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_36
 | 
						|
; AVX1-NEXT:  ## BB#35: ## %cond.load49
 | 
						|
; AVX1-NEXT:    vpinsrw $1, 34(%rdi), %xmm5, %xmm6
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB55_36: ## %else50
 | 
						|
; AVX1-NEXT:    vpextrb $2, %xmm4, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_38
 | 
						|
; AVX1-NEXT:  ## BB#37: ## %cond.load52
 | 
						|
; AVX1-NEXT:    vpinsrw $2, 36(%rdi), %xmm5, %xmm6
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB55_38: ## %else53
 | 
						|
; AVX1-NEXT:    vpextrb $3, %xmm4, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_40
 | 
						|
; AVX1-NEXT:  ## BB#39: ## %cond.load55
 | 
						|
; AVX1-NEXT:    vpinsrw $3, 38(%rdi), %xmm5, %xmm6
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB55_40: ## %else56
 | 
						|
; AVX1-NEXT:    vpextrb $4, %xmm4, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_42
 | 
						|
; AVX1-NEXT:  ## BB#41: ## %cond.load58
 | 
						|
; AVX1-NEXT:    vpinsrw $4, 40(%rdi), %xmm5, %xmm6
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB55_42: ## %else59
 | 
						|
; AVX1-NEXT:    vpextrb $5, %xmm4, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_44
 | 
						|
; AVX1-NEXT:  ## BB#43: ## %cond.load61
 | 
						|
; AVX1-NEXT:    vpinsrw $5, 42(%rdi), %xmm5, %xmm6
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB55_44: ## %else62
 | 
						|
; AVX1-NEXT:    vpextrb $6, %xmm4, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_46
 | 
						|
; AVX1-NEXT:  ## BB#45: ## %cond.load64
 | 
						|
; AVX1-NEXT:    vpinsrw $6, 44(%rdi), %xmm5, %xmm6
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB55_46: ## %else65
 | 
						|
; AVX1-NEXT:    vpextrb $7, %xmm4, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_48
 | 
						|
; AVX1-NEXT:  ## BB#47: ## %cond.load67
 | 
						|
; AVX1-NEXT:    vpinsrw $7, 46(%rdi), %xmm5, %xmm6
 | 
						|
; AVX1-NEXT:    vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX1-NEXT:  LBB55_48: ## %else68
 | 
						|
; AVX1-NEXT:    vpextrb $8, %xmm4, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_50
 | 
						|
; AVX1-NEXT:  ## BB#49: ## %cond.load70
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
 | 
						|
; AVX1-NEXT:    vpinsrw $0, 48(%rdi), %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX1-NEXT:  LBB55_50: ## %else71
 | 
						|
; AVX1-NEXT:    vpextrb $9, %xmm4, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_52
 | 
						|
; AVX1-NEXT:  ## BB#51: ## %cond.load73
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
 | 
						|
; AVX1-NEXT:    vpinsrw $1, 50(%rdi), %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX1-NEXT:  LBB55_52: ## %else74
 | 
						|
; AVX1-NEXT:    vpextrb $10, %xmm4, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_54
 | 
						|
; AVX1-NEXT:  ## BB#53: ## %cond.load76
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
 | 
						|
; AVX1-NEXT:    vpinsrw $2, 52(%rdi), %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX1-NEXT:  LBB55_54: ## %else77
 | 
						|
; AVX1-NEXT:    vpextrb $11, %xmm4, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_56
 | 
						|
; AVX1-NEXT:  ## BB#55: ## %cond.load79
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
 | 
						|
; AVX1-NEXT:    vpinsrw $3, 54(%rdi), %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX1-NEXT:  LBB55_56: ## %else80
 | 
						|
; AVX1-NEXT:    vpextrb $12, %xmm4, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_58
 | 
						|
; AVX1-NEXT:  ## BB#57: ## %cond.load82
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
 | 
						|
; AVX1-NEXT:    vpinsrw $4, 56(%rdi), %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX1-NEXT:  LBB55_58: ## %else83
 | 
						|
; AVX1-NEXT:    vpextrb $13, %xmm4, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_60
 | 
						|
; AVX1-NEXT:  ## BB#59: ## %cond.load85
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
 | 
						|
; AVX1-NEXT:    vpinsrw $5, 58(%rdi), %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX1-NEXT:  LBB55_60: ## %else86
 | 
						|
; AVX1-NEXT:    vpextrb $14, %xmm4, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_62
 | 
						|
; AVX1-NEXT:  ## BB#61: ## %cond.load88
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
 | 
						|
; AVX1-NEXT:    vpinsrw $6, 60(%rdi), %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX1-NEXT:  LBB55_62: ## %else89
 | 
						|
; AVX1-NEXT:    vpextrb $15, %xmm4, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB55_64
 | 
						|
; AVX1-NEXT:  ## BB#63: ## %cond.load91
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
 | 
						|
; AVX1-NEXT:    vpinsrw $7, 62(%rdi), %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX1-NEXT:  LBB55_64: ## %else92
 | 
						|
; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 | 
						|
; AVX1-NEXT:    vpsllw $15, %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vpsraw $15, %xmm6, %xmm6
 | 
						|
; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 | 
						|
; AVX1-NEXT:    vpsllw $15, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm6, %ymm0
 | 
						|
; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm1
 | 
						|
; AVX1-NEXT:    vandps %ymm0, %ymm3, %ymm0
 | 
						|
; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
 | 
						|
; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
 | 
						|
; AVX1-NEXT:    vpsllw $15, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpsraw $15, %xmm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 | 
						|
; AVX1-NEXT:    vpsllw $15, %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vpsraw $15, %xmm3, %xmm3
 | 
						|
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:    vandnps %ymm2, %ymm1, %ymm2
 | 
						|
; AVX1-NEXT:    vandps %ymm1, %ymm5, %ymm1
 | 
						|
; AVX1-NEXT:    vorps %ymm2, %ymm1, %ymm1
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test_mask_load_32xi16:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_2
 | 
						|
; AVX2-NEXT:  ## BB#1: ## %cond.load
 | 
						|
; AVX2-NEXT:    movzwl (%rdi), %eax
 | 
						|
; AVX2-NEXT:    vmovd %eax, %xmm3
 | 
						|
; AVX2-NEXT:  LBB55_2: ## %else
 | 
						|
; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_4
 | 
						|
; AVX2-NEXT:  ## BB#3: ## %cond.load1
 | 
						|
; AVX2-NEXT:    vpinsrw $1, 2(%rdi), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB55_4: ## %else2
 | 
						|
; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_6
 | 
						|
; AVX2-NEXT:  ## BB#5: ## %cond.load4
 | 
						|
; AVX2-NEXT:    vpinsrw $2, 4(%rdi), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB55_6: ## %else5
 | 
						|
; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_8
 | 
						|
; AVX2-NEXT:  ## BB#7: ## %cond.load7
 | 
						|
; AVX2-NEXT:    vpinsrw $3, 6(%rdi), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB55_8: ## %else8
 | 
						|
; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_10
 | 
						|
; AVX2-NEXT:  ## BB#9: ## %cond.load10
 | 
						|
; AVX2-NEXT:    vpinsrw $4, 8(%rdi), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB55_10: ## %else11
 | 
						|
; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_12
 | 
						|
; AVX2-NEXT:  ## BB#11: ## %cond.load13
 | 
						|
; AVX2-NEXT:    vpinsrw $5, 10(%rdi), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB55_12: ## %else14
 | 
						|
; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_14
 | 
						|
; AVX2-NEXT:  ## BB#13: ## %cond.load16
 | 
						|
; AVX2-NEXT:    vpinsrw $6, 12(%rdi), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB55_14: ## %else17
 | 
						|
; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_16
 | 
						|
; AVX2-NEXT:  ## BB#15: ## %cond.load19
 | 
						|
; AVX2-NEXT:    vpinsrw $7, 14(%rdi), %xmm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB55_16: ## %else20
 | 
						|
; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_18
 | 
						|
; AVX2-NEXT:  ## BB#17: ## %cond.load22
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrw $0, 16(%rdi), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB55_18: ## %else23
 | 
						|
; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_20
 | 
						|
; AVX2-NEXT:  ## BB#19: ## %cond.load25
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrw $1, 18(%rdi), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB55_20: ## %else26
 | 
						|
; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_22
 | 
						|
; AVX2-NEXT:  ## BB#21: ## %cond.load28
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrw $2, 20(%rdi), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB55_22: ## %else29
 | 
						|
; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_24
 | 
						|
; AVX2-NEXT:  ## BB#23: ## %cond.load31
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrw $3, 22(%rdi), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB55_24: ## %else32
 | 
						|
; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_26
 | 
						|
; AVX2-NEXT:  ## BB#25: ## %cond.load34
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrw $4, 24(%rdi), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB55_26: ## %else35
 | 
						|
; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_28
 | 
						|
; AVX2-NEXT:  ## BB#27: ## %cond.load37
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrw $5, 26(%rdi), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB55_28: ## %else38
 | 
						|
; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_30
 | 
						|
; AVX2-NEXT:  ## BB#29: ## %cond.load40
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrw $6, 28(%rdi), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB55_30: ## %else41
 | 
						|
; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_32
 | 
						|
; AVX2-NEXT:  ## BB#31: ## %cond.load43
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX2-NEXT:    vpinsrw $7, 30(%rdi), %xmm4, %xmm4
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX2-NEXT:  LBB55_32: ## %else44
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
 | 
						|
; AVX2-NEXT:    vpextrb $0, %xmm4, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_34
 | 
						|
; AVX2-NEXT:  ## BB#33: ## %cond.load46
 | 
						|
; AVX2-NEXT:    vpinsrw $0, 32(%rdi), %xmm0, %xmm5
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB55_34: ## %else47
 | 
						|
; AVX2-NEXT:    vpextrb $1, %xmm4, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_36
 | 
						|
; AVX2-NEXT:  ## BB#35: ## %cond.load49
 | 
						|
; AVX2-NEXT:    vpinsrw $1, 34(%rdi), %xmm5, %xmm6
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB55_36: ## %else50
 | 
						|
; AVX2-NEXT:    vpextrb $2, %xmm4, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_38
 | 
						|
; AVX2-NEXT:  ## BB#37: ## %cond.load52
 | 
						|
; AVX2-NEXT:    vpinsrw $2, 36(%rdi), %xmm5, %xmm6
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB55_38: ## %else53
 | 
						|
; AVX2-NEXT:    vpextrb $3, %xmm4, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_40
 | 
						|
; AVX2-NEXT:  ## BB#39: ## %cond.load55
 | 
						|
; AVX2-NEXT:    vpinsrw $3, 38(%rdi), %xmm5, %xmm6
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB55_40: ## %else56
 | 
						|
; AVX2-NEXT:    vpextrb $4, %xmm4, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_42
 | 
						|
; AVX2-NEXT:  ## BB#41: ## %cond.load58
 | 
						|
; AVX2-NEXT:    vpinsrw $4, 40(%rdi), %xmm5, %xmm6
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB55_42: ## %else59
 | 
						|
; AVX2-NEXT:    vpextrb $5, %xmm4, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_44
 | 
						|
; AVX2-NEXT:  ## BB#43: ## %cond.load61
 | 
						|
; AVX2-NEXT:    vpinsrw $5, 42(%rdi), %xmm5, %xmm6
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB55_44: ## %else62
 | 
						|
; AVX2-NEXT:    vpextrb $6, %xmm4, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_46
 | 
						|
; AVX2-NEXT:  ## BB#45: ## %cond.load64
 | 
						|
; AVX2-NEXT:    vpinsrw $6, 44(%rdi), %xmm5, %xmm6
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB55_46: ## %else65
 | 
						|
; AVX2-NEXT:    vpextrb $7, %xmm4, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_48
 | 
						|
; AVX2-NEXT:  ## BB#47: ## %cond.load67
 | 
						|
; AVX2-NEXT:    vpinsrw $7, 46(%rdi), %xmm5, %xmm6
 | 
						|
; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX2-NEXT:  LBB55_48: ## %else68
 | 
						|
; AVX2-NEXT:    vpextrb $8, %xmm4, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_50
 | 
						|
; AVX2-NEXT:  ## BB#49: ## %cond.load70
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
 | 
						|
; AVX2-NEXT:    vpinsrw $0, 48(%rdi), %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX2-NEXT:  LBB55_50: ## %else71
 | 
						|
; AVX2-NEXT:    vpextrb $9, %xmm4, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_52
 | 
						|
; AVX2-NEXT:  ## BB#51: ## %cond.load73
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
 | 
						|
; AVX2-NEXT:    vpinsrw $1, 50(%rdi), %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX2-NEXT:  LBB55_52: ## %else74
 | 
						|
; AVX2-NEXT:    vpextrb $10, %xmm4, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_54
 | 
						|
; AVX2-NEXT:  ## BB#53: ## %cond.load76
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
 | 
						|
; AVX2-NEXT:    vpinsrw $2, 52(%rdi), %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX2-NEXT:  LBB55_54: ## %else77
 | 
						|
; AVX2-NEXT:    vpextrb $11, %xmm4, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_56
 | 
						|
; AVX2-NEXT:  ## BB#55: ## %cond.load79
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
 | 
						|
; AVX2-NEXT:    vpinsrw $3, 54(%rdi), %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX2-NEXT:  LBB55_56: ## %else80
 | 
						|
; AVX2-NEXT:    vpextrb $12, %xmm4, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_58
 | 
						|
; AVX2-NEXT:  ## BB#57: ## %cond.load82
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
 | 
						|
; AVX2-NEXT:    vpinsrw $4, 56(%rdi), %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX2-NEXT:  LBB55_58: ## %else83
 | 
						|
; AVX2-NEXT:    vpextrb $13, %xmm4, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_60
 | 
						|
; AVX2-NEXT:  ## BB#59: ## %cond.load85
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
 | 
						|
; AVX2-NEXT:    vpinsrw $5, 58(%rdi), %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX2-NEXT:  LBB55_60: ## %else86
 | 
						|
; AVX2-NEXT:    vpextrb $14, %xmm4, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_62
 | 
						|
; AVX2-NEXT:  ## BB#61: ## %cond.load88
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
 | 
						|
; AVX2-NEXT:    vpinsrw $6, 60(%rdi), %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX2-NEXT:  LBB55_62: ## %else89
 | 
						|
; AVX2-NEXT:    vpextrb $15, %xmm4, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB55_64
 | 
						|
; AVX2-NEXT:  ## BB#63: ## %cond.load91
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
 | 
						|
; AVX2-NEXT:    vpinsrw $7, 62(%rdi), %xmm6, %xmm6
 | 
						|
; AVX2-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX2-NEXT:  LBB55_64: ## %else92
 | 
						|
; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
 | 
						|
; AVX2-NEXT:    vpsllw $15, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
 | 
						|
; AVX2-NEXT:    vpblendvb %ymm0, %ymm3, %ymm1, %ymm0
 | 
						|
; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
 | 
						|
; AVX2-NEXT:    vpsllw $15, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:    vpsraw $15, %ymm1, %ymm1
 | 
						|
; AVX2-NEXT:    vpblendvb %ymm1, %ymm5, %ymm2, %ymm1
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_mask_load_32xi16:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_2
 | 
						|
; AVX512F-NEXT:  ## BB#1: ## %cond.load
 | 
						|
; AVX512F-NEXT:    movzwl (%rdi), %eax
 | 
						|
; AVX512F-NEXT:    vmovd %eax, %xmm3
 | 
						|
; AVX512F-NEXT:  LBB55_2: ## %else
 | 
						|
; AVX512F-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_4
 | 
						|
; AVX512F-NEXT:  ## BB#3: ## %cond.load1
 | 
						|
; AVX512F-NEXT:    vpinsrw $1, 2(%rdi), %xmm3, %xmm4
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB55_4: ## %else2
 | 
						|
; AVX512F-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_6
 | 
						|
; AVX512F-NEXT:  ## BB#5: ## %cond.load4
 | 
						|
; AVX512F-NEXT:    vpinsrw $2, 4(%rdi), %xmm3, %xmm4
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB55_6: ## %else5
 | 
						|
; AVX512F-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_8
 | 
						|
; AVX512F-NEXT:  ## BB#7: ## %cond.load7
 | 
						|
; AVX512F-NEXT:    vpinsrw $3, 6(%rdi), %xmm3, %xmm4
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB55_8: ## %else8
 | 
						|
; AVX512F-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_10
 | 
						|
; AVX512F-NEXT:  ## BB#9: ## %cond.load10
 | 
						|
; AVX512F-NEXT:    vpinsrw $4, 8(%rdi), %xmm3, %xmm4
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB55_10: ## %else11
 | 
						|
; AVX512F-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_12
 | 
						|
; AVX512F-NEXT:  ## BB#11: ## %cond.load13
 | 
						|
; AVX512F-NEXT:    vpinsrw $5, 10(%rdi), %xmm3, %xmm4
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB55_12: ## %else14
 | 
						|
; AVX512F-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_14
 | 
						|
; AVX512F-NEXT:  ## BB#13: ## %cond.load16
 | 
						|
; AVX512F-NEXT:    vpinsrw $6, 12(%rdi), %xmm3, %xmm4
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB55_14: ## %else17
 | 
						|
; AVX512F-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_16
 | 
						|
; AVX512F-NEXT:  ## BB#15: ## %cond.load19
 | 
						|
; AVX512F-NEXT:    vpinsrw $7, 14(%rdi), %xmm3, %xmm4
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB55_16: ## %else20
 | 
						|
; AVX512F-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_18
 | 
						|
; AVX512F-NEXT:  ## BB#17: ## %cond.load22
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX512F-NEXT:    vpinsrw $0, 16(%rdi), %xmm4, %xmm4
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX512F-NEXT:  LBB55_18: ## %else23
 | 
						|
; AVX512F-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_20
 | 
						|
; AVX512F-NEXT:  ## BB#19: ## %cond.load25
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX512F-NEXT:    vpinsrw $1, 18(%rdi), %xmm4, %xmm4
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX512F-NEXT:  LBB55_20: ## %else26
 | 
						|
; AVX512F-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_22
 | 
						|
; AVX512F-NEXT:  ## BB#21: ## %cond.load28
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX512F-NEXT:    vpinsrw $2, 20(%rdi), %xmm4, %xmm4
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX512F-NEXT:  LBB55_22: ## %else29
 | 
						|
; AVX512F-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_24
 | 
						|
; AVX512F-NEXT:  ## BB#23: ## %cond.load31
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX512F-NEXT:    vpinsrw $3, 22(%rdi), %xmm4, %xmm4
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX512F-NEXT:  LBB55_24: ## %else32
 | 
						|
; AVX512F-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_26
 | 
						|
; AVX512F-NEXT:  ## BB#25: ## %cond.load34
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX512F-NEXT:    vpinsrw $4, 24(%rdi), %xmm4, %xmm4
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX512F-NEXT:  LBB55_26: ## %else35
 | 
						|
; AVX512F-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_28
 | 
						|
; AVX512F-NEXT:  ## BB#27: ## %cond.load37
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX512F-NEXT:    vpinsrw $5, 26(%rdi), %xmm4, %xmm4
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX512F-NEXT:  LBB55_28: ## %else38
 | 
						|
; AVX512F-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_30
 | 
						|
; AVX512F-NEXT:  ## BB#29: ## %cond.load40
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX512F-NEXT:    vpinsrw $6, 28(%rdi), %xmm4, %xmm4
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX512F-NEXT:  LBB55_30: ## %else41
 | 
						|
; AVX512F-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_32
 | 
						|
; AVX512F-NEXT:  ## BB#31: ## %cond.load43
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm4
 | 
						|
; AVX512F-NEXT:    vpinsrw $7, 30(%rdi), %xmm4, %xmm4
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 | 
						|
; AVX512F-NEXT:  LBB55_32: ## %else44
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm4
 | 
						|
; AVX512F-NEXT:    vpextrb $0, %xmm4, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_34
 | 
						|
; AVX512F-NEXT:  ## BB#33: ## %cond.load46
 | 
						|
; AVX512F-NEXT:    vpinsrw $0, 32(%rdi), %xmm0, %xmm5
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3],ymm0[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB55_34: ## %else47
 | 
						|
; AVX512F-NEXT:    vpextrb $1, %xmm4, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_36
 | 
						|
; AVX512F-NEXT:  ## BB#35: ## %cond.load49
 | 
						|
; AVX512F-NEXT:    vpinsrw $1, 34(%rdi), %xmm5, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB55_36: ## %else50
 | 
						|
; AVX512F-NEXT:    vpextrb $2, %xmm4, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_38
 | 
						|
; AVX512F-NEXT:  ## BB#37: ## %cond.load52
 | 
						|
; AVX512F-NEXT:    vpinsrw $2, 36(%rdi), %xmm5, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB55_38: ## %else53
 | 
						|
; AVX512F-NEXT:    vpextrb $3, %xmm4, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_40
 | 
						|
; AVX512F-NEXT:  ## BB#39: ## %cond.load55
 | 
						|
; AVX512F-NEXT:    vpinsrw $3, 38(%rdi), %xmm5, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB55_40: ## %else56
 | 
						|
; AVX512F-NEXT:    vpextrb $4, %xmm4, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_42
 | 
						|
; AVX512F-NEXT:  ## BB#41: ## %cond.load58
 | 
						|
; AVX512F-NEXT:    vpinsrw $4, 40(%rdi), %xmm5, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB55_42: ## %else59
 | 
						|
; AVX512F-NEXT:    vpextrb $5, %xmm4, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_44
 | 
						|
; AVX512F-NEXT:  ## BB#43: ## %cond.load61
 | 
						|
; AVX512F-NEXT:    vpinsrw $5, 42(%rdi), %xmm5, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB55_44: ## %else62
 | 
						|
; AVX512F-NEXT:    vpextrb $6, %xmm4, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_46
 | 
						|
; AVX512F-NEXT:  ## BB#45: ## %cond.load64
 | 
						|
; AVX512F-NEXT:    vpinsrw $6, 44(%rdi), %xmm5, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB55_46: ## %else65
 | 
						|
; AVX512F-NEXT:    vpextrb $7, %xmm4, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_48
 | 
						|
; AVX512F-NEXT:  ## BB#47: ## %cond.load67
 | 
						|
; AVX512F-NEXT:    vpinsrw $7, 46(%rdi), %xmm5, %xmm6
 | 
						|
; AVX512F-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
 | 
						|
; AVX512F-NEXT:  LBB55_48: ## %else68
 | 
						|
; AVX512F-NEXT:    vpextrb $8, %xmm4, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_50
 | 
						|
; AVX512F-NEXT:  ## BB#49: ## %cond.load70
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm6
 | 
						|
; AVX512F-NEXT:    vpinsrw $0, 48(%rdi), %xmm6, %xmm6
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX512F-NEXT:  LBB55_50: ## %else71
 | 
						|
; AVX512F-NEXT:    vpextrb $9, %xmm4, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_52
 | 
						|
; AVX512F-NEXT:  ## BB#51: ## %cond.load73
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm6
 | 
						|
; AVX512F-NEXT:    vpinsrw $1, 50(%rdi), %xmm6, %xmm6
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX512F-NEXT:  LBB55_52: ## %else74
 | 
						|
; AVX512F-NEXT:    vpextrb $10, %xmm4, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_54
 | 
						|
; AVX512F-NEXT:  ## BB#53: ## %cond.load76
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm6
 | 
						|
; AVX512F-NEXT:    vpinsrw $2, 52(%rdi), %xmm6, %xmm6
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX512F-NEXT:  LBB55_54: ## %else77
 | 
						|
; AVX512F-NEXT:    vpextrb $11, %xmm4, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_56
 | 
						|
; AVX512F-NEXT:  ## BB#55: ## %cond.load79
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm6
 | 
						|
; AVX512F-NEXT:    vpinsrw $3, 54(%rdi), %xmm6, %xmm6
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX512F-NEXT:  LBB55_56: ## %else80
 | 
						|
; AVX512F-NEXT:    vpextrb $12, %xmm4, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_58
 | 
						|
; AVX512F-NEXT:  ## BB#57: ## %cond.load82
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm6
 | 
						|
; AVX512F-NEXT:    vpinsrw $4, 56(%rdi), %xmm6, %xmm6
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX512F-NEXT:  LBB55_58: ## %else83
 | 
						|
; AVX512F-NEXT:    vpextrb $13, %xmm4, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_60
 | 
						|
; AVX512F-NEXT:  ## BB#59: ## %cond.load85
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm6
 | 
						|
; AVX512F-NEXT:    vpinsrw $5, 58(%rdi), %xmm6, %xmm6
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX512F-NEXT:  LBB55_60: ## %else86
 | 
						|
; AVX512F-NEXT:    vpextrb $14, %xmm4, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_62
 | 
						|
; AVX512F-NEXT:  ## BB#61: ## %cond.load88
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm6
 | 
						|
; AVX512F-NEXT:    vpinsrw $6, 60(%rdi), %xmm6, %xmm6
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX512F-NEXT:  LBB55_62: ## %else89
 | 
						|
; AVX512F-NEXT:    vpextrb $15, %xmm4, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB55_64
 | 
						|
; AVX512F-NEXT:  ## BB#63: ## %cond.load91
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm6
 | 
						|
; AVX512F-NEXT:    vpinsrw $7, 62(%rdi), %xmm6, %xmm6
 | 
						|
; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
 | 
						|
; AVX512F-NEXT:  LBB55_64: ## %else92
 | 
						|
; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
 | 
						|
; AVX512F-NEXT:    vpsllw $15, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:    vpsraw $15, %ymm0, %ymm0
 | 
						|
; AVX512F-NEXT:    vpblendvb %ymm0, %ymm3, %ymm1, %ymm0
 | 
						|
; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
 | 
						|
; AVX512F-NEXT:    vpsllw $15, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:    vpsraw $15, %ymm1, %ymm1
 | 
						|
; AVX512F-NEXT:    vpblendvb %ymm1, %ymm5, %ymm2, %ymm1
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_mask_load_32xi16:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
 | 
						|
; SKX-NEXT:    vpmovb2m %ymm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu16 (%rdi), %zmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  %res = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* %addr, i32 4, <32 x i1>%mask, <32 x i16> %val)
 | 
						|
  ret <32 x i16> %res
 | 
						|
}
 | 
						|
declare <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>*, i32, <32 x i1>, <32 x i16>)
 | 
						|
 | 
						|
define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) {
 | 
						|
; AVX-LABEL: test_mask_store_16xi8:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB56_2
 | 
						|
; AVX-NEXT:  ## BB#1: ## %cond.store
 | 
						|
; AVX-NEXT:    vpextrb $0, %xmm1, (%rdi)
 | 
						|
; AVX-NEXT:  LBB56_2: ## %else
 | 
						|
; AVX-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB56_4
 | 
						|
; AVX-NEXT:  ## BB#3: ## %cond.store1
 | 
						|
; AVX-NEXT:    vpextrb $1, %xmm1, 1(%rdi)
 | 
						|
; AVX-NEXT:  LBB56_4: ## %else2
 | 
						|
; AVX-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB56_6
 | 
						|
; AVX-NEXT:  ## BB#5: ## %cond.store3
 | 
						|
; AVX-NEXT:    vpextrb $2, %xmm1, 2(%rdi)
 | 
						|
; AVX-NEXT:  LBB56_6: ## %else4
 | 
						|
; AVX-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB56_8
 | 
						|
; AVX-NEXT:  ## BB#7: ## %cond.store5
 | 
						|
; AVX-NEXT:    vpextrb $3, %xmm1, 3(%rdi)
 | 
						|
; AVX-NEXT:  LBB56_8: ## %else6
 | 
						|
; AVX-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB56_10
 | 
						|
; AVX-NEXT:  ## BB#9: ## %cond.store7
 | 
						|
; AVX-NEXT:    vpextrb $4, %xmm1, 4(%rdi)
 | 
						|
; AVX-NEXT:  LBB56_10: ## %else8
 | 
						|
; AVX-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB56_12
 | 
						|
; AVX-NEXT:  ## BB#11: ## %cond.store9
 | 
						|
; AVX-NEXT:    vpextrb $5, %xmm1, 5(%rdi)
 | 
						|
; AVX-NEXT:  LBB56_12: ## %else10
 | 
						|
; AVX-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB56_14
 | 
						|
; AVX-NEXT:  ## BB#13: ## %cond.store11
 | 
						|
; AVX-NEXT:    vpextrb $6, %xmm1, 6(%rdi)
 | 
						|
; AVX-NEXT:  LBB56_14: ## %else12
 | 
						|
; AVX-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB56_16
 | 
						|
; AVX-NEXT:  ## BB#15: ## %cond.store13
 | 
						|
; AVX-NEXT:    vpextrb $7, %xmm1, 7(%rdi)
 | 
						|
; AVX-NEXT:  LBB56_16: ## %else14
 | 
						|
; AVX-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB56_18
 | 
						|
; AVX-NEXT:  ## BB#17: ## %cond.store15
 | 
						|
; AVX-NEXT:    vpextrb $8, %xmm1, 8(%rdi)
 | 
						|
; AVX-NEXT:  LBB56_18: ## %else16
 | 
						|
; AVX-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB56_20
 | 
						|
; AVX-NEXT:  ## BB#19: ## %cond.store17
 | 
						|
; AVX-NEXT:    vpextrb $9, %xmm1, 9(%rdi)
 | 
						|
; AVX-NEXT:  LBB56_20: ## %else18
 | 
						|
; AVX-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB56_22
 | 
						|
; AVX-NEXT:  ## BB#21: ## %cond.store19
 | 
						|
; AVX-NEXT:    vpextrb $10, %xmm1, 10(%rdi)
 | 
						|
; AVX-NEXT:  LBB56_22: ## %else20
 | 
						|
; AVX-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB56_24
 | 
						|
; AVX-NEXT:  ## BB#23: ## %cond.store21
 | 
						|
; AVX-NEXT:    vpextrb $11, %xmm1, 11(%rdi)
 | 
						|
; AVX-NEXT:  LBB56_24: ## %else22
 | 
						|
; AVX-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB56_26
 | 
						|
; AVX-NEXT:  ## BB#25: ## %cond.store23
 | 
						|
; AVX-NEXT:    vpextrb $12, %xmm1, 12(%rdi)
 | 
						|
; AVX-NEXT:  LBB56_26: ## %else24
 | 
						|
; AVX-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB56_28
 | 
						|
; AVX-NEXT:  ## BB#27: ## %cond.store25
 | 
						|
; AVX-NEXT:    vpextrb $13, %xmm1, 13(%rdi)
 | 
						|
; AVX-NEXT:  LBB56_28: ## %else26
 | 
						|
; AVX-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB56_30
 | 
						|
; AVX-NEXT:  ## BB#29: ## %cond.store27
 | 
						|
; AVX-NEXT:    vpextrb $14, %xmm1, 14(%rdi)
 | 
						|
; AVX-NEXT:  LBB56_30: ## %else28
 | 
						|
; AVX-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB56_32
 | 
						|
; AVX-NEXT:  ## BB#31: ## %cond.store29
 | 
						|
; AVX-NEXT:    vpextrb $15, %xmm1, 15(%rdi)
 | 
						|
; AVX-NEXT:  LBB56_32: ## %else30
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_mask_store_16xi8:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB56_2
 | 
						|
; AVX512F-NEXT:  ## BB#1: ## %cond.store
 | 
						|
; AVX512F-NEXT:    vpextrb $0, %xmm1, (%rdi)
 | 
						|
; AVX512F-NEXT:  LBB56_2: ## %else
 | 
						|
; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB56_4
 | 
						|
; AVX512F-NEXT:  ## BB#3: ## %cond.store1
 | 
						|
; AVX512F-NEXT:    vpextrb $1, %xmm1, 1(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB56_4: ## %else2
 | 
						|
; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB56_6
 | 
						|
; AVX512F-NEXT:  ## BB#5: ## %cond.store3
 | 
						|
; AVX512F-NEXT:    vpextrb $2, %xmm1, 2(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB56_6: ## %else4
 | 
						|
; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB56_8
 | 
						|
; AVX512F-NEXT:  ## BB#7: ## %cond.store5
 | 
						|
; AVX512F-NEXT:    vpextrb $3, %xmm1, 3(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB56_8: ## %else6
 | 
						|
; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB56_10
 | 
						|
; AVX512F-NEXT:  ## BB#9: ## %cond.store7
 | 
						|
; AVX512F-NEXT:    vpextrb $4, %xmm1, 4(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB56_10: ## %else8
 | 
						|
; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB56_12
 | 
						|
; AVX512F-NEXT:  ## BB#11: ## %cond.store9
 | 
						|
; AVX512F-NEXT:    vpextrb $5, %xmm1, 5(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB56_12: ## %else10
 | 
						|
; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB56_14
 | 
						|
; AVX512F-NEXT:  ## BB#13: ## %cond.store11
 | 
						|
; AVX512F-NEXT:    vpextrb $6, %xmm1, 6(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB56_14: ## %else12
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB56_16
 | 
						|
; AVX512F-NEXT:  ## BB#15: ## %cond.store13
 | 
						|
; AVX512F-NEXT:    vpextrb $7, %xmm1, 7(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB56_16: ## %else14
 | 
						|
; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB56_18
 | 
						|
; AVX512F-NEXT:  ## BB#17: ## %cond.store15
 | 
						|
; AVX512F-NEXT:    vpextrb $8, %xmm1, 8(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB56_18: ## %else16
 | 
						|
; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB56_20
 | 
						|
; AVX512F-NEXT:  ## BB#19: ## %cond.store17
 | 
						|
; AVX512F-NEXT:    vpextrb $9, %xmm1, 9(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB56_20: ## %else18
 | 
						|
; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB56_22
 | 
						|
; AVX512F-NEXT:  ## BB#21: ## %cond.store19
 | 
						|
; AVX512F-NEXT:    vpextrb $10, %xmm1, 10(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB56_22: ## %else20
 | 
						|
; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB56_24
 | 
						|
; AVX512F-NEXT:  ## BB#23: ## %cond.store21
 | 
						|
; AVX512F-NEXT:    vpextrb $11, %xmm1, 11(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB56_24: ## %else22
 | 
						|
; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB56_26
 | 
						|
; AVX512F-NEXT:  ## BB#25: ## %cond.store23
 | 
						|
; AVX512F-NEXT:    vpextrb $12, %xmm1, 12(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB56_26: ## %else24
 | 
						|
; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB56_28
 | 
						|
; AVX512F-NEXT:  ## BB#27: ## %cond.store25
 | 
						|
; AVX512F-NEXT:    vpextrb $13, %xmm1, 13(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB56_28: ## %else26
 | 
						|
; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB56_30
 | 
						|
; AVX512F-NEXT:  ## BB#29: ## %cond.store27
 | 
						|
; AVX512F-NEXT:    vpextrb $14, %xmm1, 14(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB56_30: ## %else28
 | 
						|
; AVX512F-NEXT:    kshiftlw $0, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB56_32
 | 
						|
; AVX512F-NEXT:  ## BB#31: ## %cond.store29
 | 
						|
; AVX512F-NEXT:    vpextrb $15, %xmm1, 15(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB56_32: ## %else30
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_mask_store_16xi8:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovb2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu8 %xmm1, (%rdi) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
 | 
						|
 | 
						|
define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) {
 | 
						|
; AVX1-LABEL: test_mask_store_32xi8:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_2
 | 
						|
; AVX1-NEXT:  ## BB#1: ## %cond.store
 | 
						|
; AVX1-NEXT:    vpextrb $0, %xmm1, (%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_2: ## %else
 | 
						|
; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_4
 | 
						|
; AVX1-NEXT:  ## BB#3: ## %cond.store1
 | 
						|
; AVX1-NEXT:    vpextrb $1, %xmm1, 1(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_4: ## %else2
 | 
						|
; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_6
 | 
						|
; AVX1-NEXT:  ## BB#5: ## %cond.store3
 | 
						|
; AVX1-NEXT:    vpextrb $2, %xmm1, 2(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_6: ## %else4
 | 
						|
; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_8
 | 
						|
; AVX1-NEXT:  ## BB#7: ## %cond.store5
 | 
						|
; AVX1-NEXT:    vpextrb $3, %xmm1, 3(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_8: ## %else6
 | 
						|
; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_10
 | 
						|
; AVX1-NEXT:  ## BB#9: ## %cond.store7
 | 
						|
; AVX1-NEXT:    vpextrb $4, %xmm1, 4(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_10: ## %else8
 | 
						|
; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_12
 | 
						|
; AVX1-NEXT:  ## BB#11: ## %cond.store9
 | 
						|
; AVX1-NEXT:    vpextrb $5, %xmm1, 5(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_12: ## %else10
 | 
						|
; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_14
 | 
						|
; AVX1-NEXT:  ## BB#13: ## %cond.store11
 | 
						|
; AVX1-NEXT:    vpextrb $6, %xmm1, 6(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_14: ## %else12
 | 
						|
; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_16
 | 
						|
; AVX1-NEXT:  ## BB#15: ## %cond.store13
 | 
						|
; AVX1-NEXT:    vpextrb $7, %xmm1, 7(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_16: ## %else14
 | 
						|
; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_18
 | 
						|
; AVX1-NEXT:  ## BB#17: ## %cond.store15
 | 
						|
; AVX1-NEXT:    vpextrb $8, %xmm1, 8(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_18: ## %else16
 | 
						|
; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_20
 | 
						|
; AVX1-NEXT:  ## BB#19: ## %cond.store17
 | 
						|
; AVX1-NEXT:    vpextrb $9, %xmm1, 9(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_20: ## %else18
 | 
						|
; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_22
 | 
						|
; AVX1-NEXT:  ## BB#21: ## %cond.store19
 | 
						|
; AVX1-NEXT:    vpextrb $10, %xmm1, 10(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_22: ## %else20
 | 
						|
; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_24
 | 
						|
; AVX1-NEXT:  ## BB#23: ## %cond.store21
 | 
						|
; AVX1-NEXT:    vpextrb $11, %xmm1, 11(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_24: ## %else22
 | 
						|
; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_26
 | 
						|
; AVX1-NEXT:  ## BB#25: ## %cond.store23
 | 
						|
; AVX1-NEXT:    vpextrb $12, %xmm1, 12(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_26: ## %else24
 | 
						|
; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_28
 | 
						|
; AVX1-NEXT:  ## BB#27: ## %cond.store25
 | 
						|
; AVX1-NEXT:    vpextrb $13, %xmm1, 13(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_28: ## %else26
 | 
						|
; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_30
 | 
						|
; AVX1-NEXT:  ## BB#29: ## %cond.store27
 | 
						|
; AVX1-NEXT:    vpextrb $14, %xmm1, 14(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_30: ## %else28
 | 
						|
; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_32
 | 
						|
; AVX1-NEXT:  ## BB#31: ## %cond.store29
 | 
						|
; AVX1-NEXT:    vpextrb $15, %xmm1, 15(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_32: ## %else30
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_34
 | 
						|
; AVX1-NEXT:  ## BB#33: ## %cond.store31
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $0, %xmm2, 16(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_34: ## %else32
 | 
						|
; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_36
 | 
						|
; AVX1-NEXT:  ## BB#35: ## %cond.store33
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $1, %xmm2, 17(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_36: ## %else34
 | 
						|
; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_38
 | 
						|
; AVX1-NEXT:  ## BB#37: ## %cond.store35
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $2, %xmm2, 18(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_38: ## %else36
 | 
						|
; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_40
 | 
						|
; AVX1-NEXT:  ## BB#39: ## %cond.store37
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $3, %xmm2, 19(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_40: ## %else38
 | 
						|
; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_42
 | 
						|
; AVX1-NEXT:  ## BB#41: ## %cond.store39
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $4, %xmm2, 20(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_42: ## %else40
 | 
						|
; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_44
 | 
						|
; AVX1-NEXT:  ## BB#43: ## %cond.store41
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $5, %xmm2, 21(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_44: ## %else42
 | 
						|
; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_46
 | 
						|
; AVX1-NEXT:  ## BB#45: ## %cond.store43
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $6, %xmm2, 22(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_46: ## %else44
 | 
						|
; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_48
 | 
						|
; AVX1-NEXT:  ## BB#47: ## %cond.store45
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $7, %xmm2, 23(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_48: ## %else46
 | 
						|
; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_50
 | 
						|
; AVX1-NEXT:  ## BB#49: ## %cond.store47
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $8, %xmm2, 24(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_50: ## %else48
 | 
						|
; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_52
 | 
						|
; AVX1-NEXT:  ## BB#51: ## %cond.store49
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $9, %xmm2, 25(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_52: ## %else50
 | 
						|
; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_54
 | 
						|
; AVX1-NEXT:  ## BB#53: ## %cond.store51
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $10, %xmm2, 26(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_54: ## %else52
 | 
						|
; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_56
 | 
						|
; AVX1-NEXT:  ## BB#55: ## %cond.store53
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $11, %xmm2, 27(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_56: ## %else54
 | 
						|
; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_58
 | 
						|
; AVX1-NEXT:  ## BB#57: ## %cond.store55
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $12, %xmm2, 28(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_58: ## %else56
 | 
						|
; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_60
 | 
						|
; AVX1-NEXT:  ## BB#59: ## %cond.store57
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $13, %xmm2, 29(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_60: ## %else58
 | 
						|
; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_62
 | 
						|
; AVX1-NEXT:  ## BB#61: ## %cond.store59
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_62: ## %else60
 | 
						|
; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB57_64
 | 
						|
; AVX1-NEXT:  ## BB#63: ## %cond.store61
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 | 
						|
; AVX1-NEXT:  LBB57_64: ## %else62
 | 
						|
; AVX1-NEXT:    vzeroupper
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test_mask_store_32xi8:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_2
 | 
						|
; AVX2-NEXT:  ## BB#1: ## %cond.store
 | 
						|
; AVX2-NEXT:    vpextrb $0, %xmm1, (%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_2: ## %else
 | 
						|
; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_4
 | 
						|
; AVX2-NEXT:  ## BB#3: ## %cond.store1
 | 
						|
; AVX2-NEXT:    vpextrb $1, %xmm1, 1(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_4: ## %else2
 | 
						|
; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_6
 | 
						|
; AVX2-NEXT:  ## BB#5: ## %cond.store3
 | 
						|
; AVX2-NEXT:    vpextrb $2, %xmm1, 2(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_6: ## %else4
 | 
						|
; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_8
 | 
						|
; AVX2-NEXT:  ## BB#7: ## %cond.store5
 | 
						|
; AVX2-NEXT:    vpextrb $3, %xmm1, 3(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_8: ## %else6
 | 
						|
; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_10
 | 
						|
; AVX2-NEXT:  ## BB#9: ## %cond.store7
 | 
						|
; AVX2-NEXT:    vpextrb $4, %xmm1, 4(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_10: ## %else8
 | 
						|
; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_12
 | 
						|
; AVX2-NEXT:  ## BB#11: ## %cond.store9
 | 
						|
; AVX2-NEXT:    vpextrb $5, %xmm1, 5(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_12: ## %else10
 | 
						|
; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_14
 | 
						|
; AVX2-NEXT:  ## BB#13: ## %cond.store11
 | 
						|
; AVX2-NEXT:    vpextrb $6, %xmm1, 6(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_14: ## %else12
 | 
						|
; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_16
 | 
						|
; AVX2-NEXT:  ## BB#15: ## %cond.store13
 | 
						|
; AVX2-NEXT:    vpextrb $7, %xmm1, 7(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_16: ## %else14
 | 
						|
; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_18
 | 
						|
; AVX2-NEXT:  ## BB#17: ## %cond.store15
 | 
						|
; AVX2-NEXT:    vpextrb $8, %xmm1, 8(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_18: ## %else16
 | 
						|
; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_20
 | 
						|
; AVX2-NEXT:  ## BB#19: ## %cond.store17
 | 
						|
; AVX2-NEXT:    vpextrb $9, %xmm1, 9(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_20: ## %else18
 | 
						|
; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_22
 | 
						|
; AVX2-NEXT:  ## BB#21: ## %cond.store19
 | 
						|
; AVX2-NEXT:    vpextrb $10, %xmm1, 10(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_22: ## %else20
 | 
						|
; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_24
 | 
						|
; AVX2-NEXT:  ## BB#23: ## %cond.store21
 | 
						|
; AVX2-NEXT:    vpextrb $11, %xmm1, 11(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_24: ## %else22
 | 
						|
; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_26
 | 
						|
; AVX2-NEXT:  ## BB#25: ## %cond.store23
 | 
						|
; AVX2-NEXT:    vpextrb $12, %xmm1, 12(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_26: ## %else24
 | 
						|
; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_28
 | 
						|
; AVX2-NEXT:  ## BB#27: ## %cond.store25
 | 
						|
; AVX2-NEXT:    vpextrb $13, %xmm1, 13(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_28: ## %else26
 | 
						|
; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_30
 | 
						|
; AVX2-NEXT:  ## BB#29: ## %cond.store27
 | 
						|
; AVX2-NEXT:    vpextrb $14, %xmm1, 14(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_30: ## %else28
 | 
						|
; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_32
 | 
						|
; AVX2-NEXT:  ## BB#31: ## %cond.store29
 | 
						|
; AVX2-NEXT:    vpextrb $15, %xmm1, 15(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_32: ## %else30
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_34
 | 
						|
; AVX2-NEXT:  ## BB#33: ## %cond.store31
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $0, %xmm2, 16(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_34: ## %else32
 | 
						|
; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_36
 | 
						|
; AVX2-NEXT:  ## BB#35: ## %cond.store33
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $1, %xmm2, 17(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_36: ## %else34
 | 
						|
; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_38
 | 
						|
; AVX2-NEXT:  ## BB#37: ## %cond.store35
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $2, %xmm2, 18(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_38: ## %else36
 | 
						|
; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_40
 | 
						|
; AVX2-NEXT:  ## BB#39: ## %cond.store37
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $3, %xmm2, 19(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_40: ## %else38
 | 
						|
; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_42
 | 
						|
; AVX2-NEXT:  ## BB#41: ## %cond.store39
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $4, %xmm2, 20(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_42: ## %else40
 | 
						|
; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_44
 | 
						|
; AVX2-NEXT:  ## BB#43: ## %cond.store41
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $5, %xmm2, 21(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_44: ## %else42
 | 
						|
; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_46
 | 
						|
; AVX2-NEXT:  ## BB#45: ## %cond.store43
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $6, %xmm2, 22(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_46: ## %else44
 | 
						|
; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_48
 | 
						|
; AVX2-NEXT:  ## BB#47: ## %cond.store45
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $7, %xmm2, 23(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_48: ## %else46
 | 
						|
; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_50
 | 
						|
; AVX2-NEXT:  ## BB#49: ## %cond.store47
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $8, %xmm2, 24(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_50: ## %else48
 | 
						|
; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_52
 | 
						|
; AVX2-NEXT:  ## BB#51: ## %cond.store49
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $9, %xmm2, 25(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_52: ## %else50
 | 
						|
; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_54
 | 
						|
; AVX2-NEXT:  ## BB#53: ## %cond.store51
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $10, %xmm2, 26(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_54: ## %else52
 | 
						|
; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_56
 | 
						|
; AVX2-NEXT:  ## BB#55: ## %cond.store53
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $11, %xmm2, 27(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_56: ## %else54
 | 
						|
; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_58
 | 
						|
; AVX2-NEXT:  ## BB#57: ## %cond.store55
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $12, %xmm2, 28(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_58: ## %else56
 | 
						|
; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_60
 | 
						|
; AVX2-NEXT:  ## BB#59: ## %cond.store57
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $13, %xmm2, 29(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_60: ## %else58
 | 
						|
; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_62
 | 
						|
; AVX2-NEXT:  ## BB#61: ## %cond.store59
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_62: ## %else60
 | 
						|
; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB57_64
 | 
						|
; AVX2-NEXT:  ## BB#63: ## %cond.store61
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 | 
						|
; AVX2-NEXT:  LBB57_64: ## %else62
 | 
						|
; AVX2-NEXT:    vzeroupper
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_mask_store_32xi8:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_2
 | 
						|
; AVX512F-NEXT:  ## BB#1: ## %cond.store
 | 
						|
; AVX512F-NEXT:    vpextrb $0, %xmm1, (%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_2: ## %else
 | 
						|
; AVX512F-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_4
 | 
						|
; AVX512F-NEXT:  ## BB#3: ## %cond.store1
 | 
						|
; AVX512F-NEXT:    vpextrb $1, %xmm1, 1(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_4: ## %else2
 | 
						|
; AVX512F-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_6
 | 
						|
; AVX512F-NEXT:  ## BB#5: ## %cond.store3
 | 
						|
; AVX512F-NEXT:    vpextrb $2, %xmm1, 2(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_6: ## %else4
 | 
						|
; AVX512F-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_8
 | 
						|
; AVX512F-NEXT:  ## BB#7: ## %cond.store5
 | 
						|
; AVX512F-NEXT:    vpextrb $3, %xmm1, 3(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_8: ## %else6
 | 
						|
; AVX512F-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_10
 | 
						|
; AVX512F-NEXT:  ## BB#9: ## %cond.store7
 | 
						|
; AVX512F-NEXT:    vpextrb $4, %xmm1, 4(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_10: ## %else8
 | 
						|
; AVX512F-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_12
 | 
						|
; AVX512F-NEXT:  ## BB#11: ## %cond.store9
 | 
						|
; AVX512F-NEXT:    vpextrb $5, %xmm1, 5(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_12: ## %else10
 | 
						|
; AVX512F-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_14
 | 
						|
; AVX512F-NEXT:  ## BB#13: ## %cond.store11
 | 
						|
; AVX512F-NEXT:    vpextrb $6, %xmm1, 6(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_14: ## %else12
 | 
						|
; AVX512F-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_16
 | 
						|
; AVX512F-NEXT:  ## BB#15: ## %cond.store13
 | 
						|
; AVX512F-NEXT:    vpextrb $7, %xmm1, 7(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_16: ## %else14
 | 
						|
; AVX512F-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_18
 | 
						|
; AVX512F-NEXT:  ## BB#17: ## %cond.store15
 | 
						|
; AVX512F-NEXT:    vpextrb $8, %xmm1, 8(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_18: ## %else16
 | 
						|
; AVX512F-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_20
 | 
						|
; AVX512F-NEXT:  ## BB#19: ## %cond.store17
 | 
						|
; AVX512F-NEXT:    vpextrb $9, %xmm1, 9(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_20: ## %else18
 | 
						|
; AVX512F-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_22
 | 
						|
; AVX512F-NEXT:  ## BB#21: ## %cond.store19
 | 
						|
; AVX512F-NEXT:    vpextrb $10, %xmm1, 10(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_22: ## %else20
 | 
						|
; AVX512F-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_24
 | 
						|
; AVX512F-NEXT:  ## BB#23: ## %cond.store21
 | 
						|
; AVX512F-NEXT:    vpextrb $11, %xmm1, 11(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_24: ## %else22
 | 
						|
; AVX512F-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_26
 | 
						|
; AVX512F-NEXT:  ## BB#25: ## %cond.store23
 | 
						|
; AVX512F-NEXT:    vpextrb $12, %xmm1, 12(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_26: ## %else24
 | 
						|
; AVX512F-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_28
 | 
						|
; AVX512F-NEXT:  ## BB#27: ## %cond.store25
 | 
						|
; AVX512F-NEXT:    vpextrb $13, %xmm1, 13(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_28: ## %else26
 | 
						|
; AVX512F-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_30
 | 
						|
; AVX512F-NEXT:  ## BB#29: ## %cond.store27
 | 
						|
; AVX512F-NEXT:    vpextrb $14, %xmm1, 14(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_30: ## %else28
 | 
						|
; AVX512F-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_32
 | 
						|
; AVX512F-NEXT:  ## BB#31: ## %cond.store29
 | 
						|
; AVX512F-NEXT:    vpextrb $15, %xmm1, 15(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_32: ## %else30
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_34
 | 
						|
; AVX512F-NEXT:  ## BB#33: ## %cond.store31
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpextrb $0, %xmm2, 16(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_34: ## %else32
 | 
						|
; AVX512F-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_36
 | 
						|
; AVX512F-NEXT:  ## BB#35: ## %cond.store33
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpextrb $1, %xmm2, 17(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_36: ## %else34
 | 
						|
; AVX512F-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_38
 | 
						|
; AVX512F-NEXT:  ## BB#37: ## %cond.store35
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpextrb $2, %xmm2, 18(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_38: ## %else36
 | 
						|
; AVX512F-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_40
 | 
						|
; AVX512F-NEXT:  ## BB#39: ## %cond.store37
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpextrb $3, %xmm2, 19(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_40: ## %else38
 | 
						|
; AVX512F-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_42
 | 
						|
; AVX512F-NEXT:  ## BB#41: ## %cond.store39
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpextrb $4, %xmm2, 20(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_42: ## %else40
 | 
						|
; AVX512F-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_44
 | 
						|
; AVX512F-NEXT:  ## BB#43: ## %cond.store41
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpextrb $5, %xmm2, 21(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_44: ## %else42
 | 
						|
; AVX512F-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_46
 | 
						|
; AVX512F-NEXT:  ## BB#45: ## %cond.store43
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpextrb $6, %xmm2, 22(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_46: ## %else44
 | 
						|
; AVX512F-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_48
 | 
						|
; AVX512F-NEXT:  ## BB#47: ## %cond.store45
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpextrb $7, %xmm2, 23(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_48: ## %else46
 | 
						|
; AVX512F-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_50
 | 
						|
; AVX512F-NEXT:  ## BB#49: ## %cond.store47
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpextrb $8, %xmm2, 24(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_50: ## %else48
 | 
						|
; AVX512F-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_52
 | 
						|
; AVX512F-NEXT:  ## BB#51: ## %cond.store49
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpextrb $9, %xmm2, 25(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_52: ## %else50
 | 
						|
; AVX512F-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_54
 | 
						|
; AVX512F-NEXT:  ## BB#53: ## %cond.store51
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpextrb $10, %xmm2, 26(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_54: ## %else52
 | 
						|
; AVX512F-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_56
 | 
						|
; AVX512F-NEXT:  ## BB#55: ## %cond.store53
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpextrb $11, %xmm2, 27(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_56: ## %else54
 | 
						|
; AVX512F-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_58
 | 
						|
; AVX512F-NEXT:  ## BB#57: ## %cond.store55
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpextrb $12, %xmm2, 28(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_58: ## %else56
 | 
						|
; AVX512F-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_60
 | 
						|
; AVX512F-NEXT:  ## BB#59: ## %cond.store57
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpextrb $13, %xmm2, 29(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_60: ## %else58
 | 
						|
; AVX512F-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_62
 | 
						|
; AVX512F-NEXT:  ## BB#61: ## %cond.store59
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX512F-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_62: ## %else60
 | 
						|
; AVX512F-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB57_64
 | 
						|
; AVX512F-NEXT:  ## BB#63: ## %cond.store61
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB57_64: ## %else62
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_mask_store_32xi8:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
 | 
						|
; SKX-NEXT:    vpmovb2m %ymm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu8 %ymm1, (%rdi) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
declare void @llvm.masked.store.v32i8.p0v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>)
 | 
						|
 | 
						|
define void @test_mask_store_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x i8> %val) {
 | 
						|
; AVX1-LABEL: test_mask_store_64xi8:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    movq {{[0-9]+}}(%rsp), %rax
 | 
						|
; AVX1-NEXT:    testb $1, %dil
 | 
						|
; AVX1-NEXT:    je LBB58_2
 | 
						|
; AVX1-NEXT:  ## BB#1: ## %cond.store
 | 
						|
; AVX1-NEXT:    vpextrb $0, %xmm0, (%rax)
 | 
						|
; AVX1-NEXT:  LBB58_2: ## %else
 | 
						|
; AVX1-NEXT:    testb $1, %sil
 | 
						|
; AVX1-NEXT:    je LBB58_4
 | 
						|
; AVX1-NEXT:  ## BB#3: ## %cond.store1
 | 
						|
; AVX1-NEXT:    vpextrb $1, %xmm0, 1(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_4: ## %else2
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_6
 | 
						|
; AVX1-NEXT:  ## BB#5: ## %cond.store3
 | 
						|
; AVX1-NEXT:    vpextrb $2, %xmm0, 2(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_6: ## %else4
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_8
 | 
						|
; AVX1-NEXT:  ## BB#7: ## %cond.store5
 | 
						|
; AVX1-NEXT:    vpextrb $3, %xmm0, 3(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_8: ## %else6
 | 
						|
; AVX1-NEXT:    testb $1, %r8b
 | 
						|
; AVX1-NEXT:    je LBB58_10
 | 
						|
; AVX1-NEXT:  ## BB#9: ## %cond.store7
 | 
						|
; AVX1-NEXT:    vpextrb $4, %xmm0, 4(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_10: ## %else8
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %r9b
 | 
						|
; AVX1-NEXT:    je LBB58_12
 | 
						|
; AVX1-NEXT:  ## BB#11: ## %cond.store9
 | 
						|
; AVX1-NEXT:    vpextrb $5, %xmm0, 5(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_12: ## %else10
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_14
 | 
						|
; AVX1-NEXT:  ## BB#13: ## %cond.store11
 | 
						|
; AVX1-NEXT:    vpextrb $6, %xmm0, 6(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_14: ## %else12
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_16
 | 
						|
; AVX1-NEXT:  ## BB#15: ## %cond.store13
 | 
						|
; AVX1-NEXT:    vpextrb $7, %xmm0, 7(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_16: ## %else14
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_18
 | 
						|
; AVX1-NEXT:  ## BB#17: ## %cond.store15
 | 
						|
; AVX1-NEXT:    vpextrb $8, %xmm0, 8(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_18: ## %else16
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_20
 | 
						|
; AVX1-NEXT:  ## BB#19: ## %cond.store17
 | 
						|
; AVX1-NEXT:    vpextrb $9, %xmm0, 9(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_20: ## %else18
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_22
 | 
						|
; AVX1-NEXT:  ## BB#21: ## %cond.store19
 | 
						|
; AVX1-NEXT:    vpextrb $10, %xmm0, 10(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_22: ## %else20
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_24
 | 
						|
; AVX1-NEXT:  ## BB#23: ## %cond.store21
 | 
						|
; AVX1-NEXT:    vpextrb $11, %xmm0, 11(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_24: ## %else22
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_26
 | 
						|
; AVX1-NEXT:  ## BB#25: ## %cond.store23
 | 
						|
; AVX1-NEXT:    vpextrb $12, %xmm0, 12(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_26: ## %else24
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_28
 | 
						|
; AVX1-NEXT:  ## BB#27: ## %cond.store25
 | 
						|
; AVX1-NEXT:    vpextrb $13, %xmm0, 13(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_28: ## %else26
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_30
 | 
						|
; AVX1-NEXT:  ## BB#29: ## %cond.store27
 | 
						|
; AVX1-NEXT:    vpextrb $14, %xmm0, 14(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_30: ## %else28
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_32
 | 
						|
; AVX1-NEXT:  ## BB#31: ## %cond.store29
 | 
						|
; AVX1-NEXT:    vpextrb $15, %xmm0, 15(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_32: ## %else30
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_34
 | 
						|
; AVX1-NEXT:  ## BB#33: ## %cond.store31
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $0, %xmm2, 16(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_34: ## %else32
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_36
 | 
						|
; AVX1-NEXT:  ## BB#35: ## %cond.store33
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $1, %xmm2, 17(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_36: ## %else34
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_38
 | 
						|
; AVX1-NEXT:  ## BB#37: ## %cond.store35
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $2, %xmm2, 18(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_38: ## %else36
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_40
 | 
						|
; AVX1-NEXT:  ## BB#39: ## %cond.store37
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $3, %xmm2, 19(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_40: ## %else38
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_42
 | 
						|
; AVX1-NEXT:  ## BB#41: ## %cond.store39
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $4, %xmm2, 20(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_42: ## %else40
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_44
 | 
						|
; AVX1-NEXT:  ## BB#43: ## %cond.store41
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $5, %xmm2, 21(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_44: ## %else42
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_46
 | 
						|
; AVX1-NEXT:  ## BB#45: ## %cond.store43
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $6, %xmm2, 22(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_46: ## %else44
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_48
 | 
						|
; AVX1-NEXT:  ## BB#47: ## %cond.store45
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $7, %xmm2, 23(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_48: ## %else46
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_50
 | 
						|
; AVX1-NEXT:  ## BB#49: ## %cond.store47
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $8, %xmm2, 24(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_50: ## %else48
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_52
 | 
						|
; AVX1-NEXT:  ## BB#51: ## %cond.store49
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $9, %xmm2, 25(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_52: ## %else50
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_54
 | 
						|
; AVX1-NEXT:  ## BB#53: ## %cond.store51
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $10, %xmm2, 26(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_54: ## %else52
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_56
 | 
						|
; AVX1-NEXT:  ## BB#55: ## %cond.store53
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $11, %xmm2, 27(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_56: ## %else54
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_58
 | 
						|
; AVX1-NEXT:  ## BB#57: ## %cond.store55
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $12, %xmm2, 28(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_58: ## %else56
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_60
 | 
						|
; AVX1-NEXT:  ## BB#59: ## %cond.store57
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $13, %xmm2, 29(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_60: ## %else58
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_62
 | 
						|
; AVX1-NEXT:  ## BB#61: ## %cond.store59
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrb $14, %xmm2, 30(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_62: ## %else60
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_64
 | 
						|
; AVX1-NEXT:  ## BB#63: ## %cond.store61
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $15, %xmm0, 31(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_64: ## %else62
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_66
 | 
						|
; AVX1-NEXT:  ## BB#65: ## %cond.store63
 | 
						|
; AVX1-NEXT:    vpextrb $0, %xmm1, 32(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_66: ## %else64
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_68
 | 
						|
; AVX1-NEXT:  ## BB#67: ## %cond.store65
 | 
						|
; AVX1-NEXT:    vpextrb $1, %xmm1, 33(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_68: ## %else66
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_70
 | 
						|
; AVX1-NEXT:  ## BB#69: ## %cond.store67
 | 
						|
; AVX1-NEXT:    vpextrb $2, %xmm1, 34(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_70: ## %else68
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_72
 | 
						|
; AVX1-NEXT:  ## BB#71: ## %cond.store69
 | 
						|
; AVX1-NEXT:    vpextrb $3, %xmm1, 35(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_72: ## %else70
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_74
 | 
						|
; AVX1-NEXT:  ## BB#73: ## %cond.store71
 | 
						|
; AVX1-NEXT:    vpextrb $4, %xmm1, 36(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_74: ## %else72
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_76
 | 
						|
; AVX1-NEXT:  ## BB#75: ## %cond.store73
 | 
						|
; AVX1-NEXT:    vpextrb $5, %xmm1, 37(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_76: ## %else74
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_78
 | 
						|
; AVX1-NEXT:  ## BB#77: ## %cond.store75
 | 
						|
; AVX1-NEXT:    vpextrb $6, %xmm1, 38(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_78: ## %else76
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_80
 | 
						|
; AVX1-NEXT:  ## BB#79: ## %cond.store77
 | 
						|
; AVX1-NEXT:    vpextrb $7, %xmm1, 39(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_80: ## %else78
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_82
 | 
						|
; AVX1-NEXT:  ## BB#81: ## %cond.store79
 | 
						|
; AVX1-NEXT:    vpextrb $8, %xmm1, 40(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_82: ## %else80
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_84
 | 
						|
; AVX1-NEXT:  ## BB#83: ## %cond.store81
 | 
						|
; AVX1-NEXT:    vpextrb $9, %xmm1, 41(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_84: ## %else82
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_86
 | 
						|
; AVX1-NEXT:  ## BB#85: ## %cond.store83
 | 
						|
; AVX1-NEXT:    vpextrb $10, %xmm1, 42(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_86: ## %else84
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_88
 | 
						|
; AVX1-NEXT:  ## BB#87: ## %cond.store85
 | 
						|
; AVX1-NEXT:    vpextrb $11, %xmm1, 43(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_88: ## %else86
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_90
 | 
						|
; AVX1-NEXT:  ## BB#89: ## %cond.store87
 | 
						|
; AVX1-NEXT:    vpextrb $12, %xmm1, 44(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_90: ## %else88
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_92
 | 
						|
; AVX1-NEXT:  ## BB#91: ## %cond.store89
 | 
						|
; AVX1-NEXT:    vpextrb $13, %xmm1, 45(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_92: ## %else90
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_94
 | 
						|
; AVX1-NEXT:  ## BB#93: ## %cond.store91
 | 
						|
; AVX1-NEXT:    vpextrb $14, %xmm1, 46(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_94: ## %else92
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_96
 | 
						|
; AVX1-NEXT:  ## BB#95: ## %cond.store93
 | 
						|
; AVX1-NEXT:    vpextrb $15, %xmm1, 47(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_96: ## %else94
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_98
 | 
						|
; AVX1-NEXT:  ## BB#97: ## %cond.store95
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $0, %xmm0, 48(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_98: ## %else96
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_100
 | 
						|
; AVX1-NEXT:  ## BB#99: ## %cond.store97
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $1, %xmm0, 49(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_100: ## %else98
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_102
 | 
						|
; AVX1-NEXT:  ## BB#101: ## %cond.store99
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $2, %xmm0, 50(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_102: ## %else100
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_104
 | 
						|
; AVX1-NEXT:  ## BB#103: ## %cond.store101
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $3, %xmm0, 51(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_104: ## %else102
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_106
 | 
						|
; AVX1-NEXT:  ## BB#105: ## %cond.store103
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $4, %xmm0, 52(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_106: ## %else104
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_108
 | 
						|
; AVX1-NEXT:  ## BB#107: ## %cond.store105
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $5, %xmm0, 53(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_108: ## %else106
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_110
 | 
						|
; AVX1-NEXT:  ## BB#109: ## %cond.store107
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $6, %xmm0, 54(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_110: ## %else108
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_112
 | 
						|
; AVX1-NEXT:  ## BB#111: ## %cond.store109
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $7, %xmm0, 55(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_112: ## %else110
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_114
 | 
						|
; AVX1-NEXT:  ## BB#113: ## %cond.store111
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $8, %xmm0, 56(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_114: ## %else112
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_116
 | 
						|
; AVX1-NEXT:  ## BB#115: ## %cond.store113
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $9, %xmm0, 57(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_116: ## %else114
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_118
 | 
						|
; AVX1-NEXT:  ## BB#117: ## %cond.store115
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $10, %xmm0, 58(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_118: ## %else116
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_120
 | 
						|
; AVX1-NEXT:  ## BB#119: ## %cond.store117
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $11, %xmm0, 59(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_120: ## %else118
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_122
 | 
						|
; AVX1-NEXT:  ## BB#121: ## %cond.store119
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $12, %xmm0, 60(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_122: ## %else120
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_124
 | 
						|
; AVX1-NEXT:  ## BB#123: ## %cond.store121
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $13, %xmm0, 61(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_124: ## %else122
 | 
						|
; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX1-NEXT:    testb $1, %cl
 | 
						|
; AVX1-NEXT:    je LBB58_126
 | 
						|
; AVX1-NEXT:  ## BB#125: ## %cond.store123
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $14, %xmm0, 62(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_126: ## %else124
 | 
						|
; AVX1-NEXT:    testb $1, %dl
 | 
						|
; AVX1-NEXT:    je LBB58_128
 | 
						|
; AVX1-NEXT:  ## BB#127: ## %cond.store125
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $15, %xmm0, 63(%rax)
 | 
						|
; AVX1-NEXT:  LBB58_128: ## %else126
 | 
						|
; AVX1-NEXT:    vzeroupper
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test_mask_store_64xi8:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
 | 
						|
; AVX2-NEXT:    testb $1, %dil
 | 
						|
; AVX2-NEXT:    je LBB58_2
 | 
						|
; AVX2-NEXT:  ## BB#1: ## %cond.store
 | 
						|
; AVX2-NEXT:    vpextrb $0, %xmm0, (%rax)
 | 
						|
; AVX2-NEXT:  LBB58_2: ## %else
 | 
						|
; AVX2-NEXT:    testb $1, %sil
 | 
						|
; AVX2-NEXT:    je LBB58_4
 | 
						|
; AVX2-NEXT:  ## BB#3: ## %cond.store1
 | 
						|
; AVX2-NEXT:    vpextrb $1, %xmm0, 1(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_4: ## %else2
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_6
 | 
						|
; AVX2-NEXT:  ## BB#5: ## %cond.store3
 | 
						|
; AVX2-NEXT:    vpextrb $2, %xmm0, 2(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_6: ## %else4
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_8
 | 
						|
; AVX2-NEXT:  ## BB#7: ## %cond.store5
 | 
						|
; AVX2-NEXT:    vpextrb $3, %xmm0, 3(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_8: ## %else6
 | 
						|
; AVX2-NEXT:    testb $1, %r8b
 | 
						|
; AVX2-NEXT:    je LBB58_10
 | 
						|
; AVX2-NEXT:  ## BB#9: ## %cond.store7
 | 
						|
; AVX2-NEXT:    vpextrb $4, %xmm0, 4(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_10: ## %else8
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %r9b
 | 
						|
; AVX2-NEXT:    je LBB58_12
 | 
						|
; AVX2-NEXT:  ## BB#11: ## %cond.store9
 | 
						|
; AVX2-NEXT:    vpextrb $5, %xmm0, 5(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_12: ## %else10
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_14
 | 
						|
; AVX2-NEXT:  ## BB#13: ## %cond.store11
 | 
						|
; AVX2-NEXT:    vpextrb $6, %xmm0, 6(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_14: ## %else12
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_16
 | 
						|
; AVX2-NEXT:  ## BB#15: ## %cond.store13
 | 
						|
; AVX2-NEXT:    vpextrb $7, %xmm0, 7(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_16: ## %else14
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_18
 | 
						|
; AVX2-NEXT:  ## BB#17: ## %cond.store15
 | 
						|
; AVX2-NEXT:    vpextrb $8, %xmm0, 8(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_18: ## %else16
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_20
 | 
						|
; AVX2-NEXT:  ## BB#19: ## %cond.store17
 | 
						|
; AVX2-NEXT:    vpextrb $9, %xmm0, 9(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_20: ## %else18
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_22
 | 
						|
; AVX2-NEXT:  ## BB#21: ## %cond.store19
 | 
						|
; AVX2-NEXT:    vpextrb $10, %xmm0, 10(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_22: ## %else20
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_24
 | 
						|
; AVX2-NEXT:  ## BB#23: ## %cond.store21
 | 
						|
; AVX2-NEXT:    vpextrb $11, %xmm0, 11(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_24: ## %else22
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_26
 | 
						|
; AVX2-NEXT:  ## BB#25: ## %cond.store23
 | 
						|
; AVX2-NEXT:    vpextrb $12, %xmm0, 12(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_26: ## %else24
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_28
 | 
						|
; AVX2-NEXT:  ## BB#27: ## %cond.store25
 | 
						|
; AVX2-NEXT:    vpextrb $13, %xmm0, 13(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_28: ## %else26
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_30
 | 
						|
; AVX2-NEXT:  ## BB#29: ## %cond.store27
 | 
						|
; AVX2-NEXT:    vpextrb $14, %xmm0, 14(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_30: ## %else28
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_32
 | 
						|
; AVX2-NEXT:  ## BB#31: ## %cond.store29
 | 
						|
; AVX2-NEXT:    vpextrb $15, %xmm0, 15(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_32: ## %else30
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_34
 | 
						|
; AVX2-NEXT:  ## BB#33: ## %cond.store31
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $0, %xmm2, 16(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_34: ## %else32
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_36
 | 
						|
; AVX2-NEXT:  ## BB#35: ## %cond.store33
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $1, %xmm2, 17(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_36: ## %else34
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_38
 | 
						|
; AVX2-NEXT:  ## BB#37: ## %cond.store35
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $2, %xmm2, 18(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_38: ## %else36
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_40
 | 
						|
; AVX2-NEXT:  ## BB#39: ## %cond.store37
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $3, %xmm2, 19(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_40: ## %else38
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_42
 | 
						|
; AVX2-NEXT:  ## BB#41: ## %cond.store39
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $4, %xmm2, 20(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_42: ## %else40
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_44
 | 
						|
; AVX2-NEXT:  ## BB#43: ## %cond.store41
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $5, %xmm2, 21(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_44: ## %else42
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_46
 | 
						|
; AVX2-NEXT:  ## BB#45: ## %cond.store43
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $6, %xmm2, 22(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_46: ## %else44
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_48
 | 
						|
; AVX2-NEXT:  ## BB#47: ## %cond.store45
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $7, %xmm2, 23(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_48: ## %else46
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_50
 | 
						|
; AVX2-NEXT:  ## BB#49: ## %cond.store47
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $8, %xmm2, 24(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_50: ## %else48
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_52
 | 
						|
; AVX2-NEXT:  ## BB#51: ## %cond.store49
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $9, %xmm2, 25(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_52: ## %else50
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_54
 | 
						|
; AVX2-NEXT:  ## BB#53: ## %cond.store51
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $10, %xmm2, 26(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_54: ## %else52
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_56
 | 
						|
; AVX2-NEXT:  ## BB#55: ## %cond.store53
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $11, %xmm2, 27(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_56: ## %else54
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_58
 | 
						|
; AVX2-NEXT:  ## BB#57: ## %cond.store55
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $12, %xmm2, 28(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_58: ## %else56
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_60
 | 
						|
; AVX2-NEXT:  ## BB#59: ## %cond.store57
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $13, %xmm2, 29(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_60: ## %else58
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_62
 | 
						|
; AVX2-NEXT:  ## BB#61: ## %cond.store59
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrb $14, %xmm2, 30(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_62: ## %else60
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_64
 | 
						|
; AVX2-NEXT:  ## BB#63: ## %cond.store61
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $15, %xmm0, 31(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_64: ## %else62
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_66
 | 
						|
; AVX2-NEXT:  ## BB#65: ## %cond.store63
 | 
						|
; AVX2-NEXT:    vpextrb $0, %xmm1, 32(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_66: ## %else64
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_68
 | 
						|
; AVX2-NEXT:  ## BB#67: ## %cond.store65
 | 
						|
; AVX2-NEXT:    vpextrb $1, %xmm1, 33(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_68: ## %else66
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_70
 | 
						|
; AVX2-NEXT:  ## BB#69: ## %cond.store67
 | 
						|
; AVX2-NEXT:    vpextrb $2, %xmm1, 34(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_70: ## %else68
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_72
 | 
						|
; AVX2-NEXT:  ## BB#71: ## %cond.store69
 | 
						|
; AVX2-NEXT:    vpextrb $3, %xmm1, 35(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_72: ## %else70
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_74
 | 
						|
; AVX2-NEXT:  ## BB#73: ## %cond.store71
 | 
						|
; AVX2-NEXT:    vpextrb $4, %xmm1, 36(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_74: ## %else72
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_76
 | 
						|
; AVX2-NEXT:  ## BB#75: ## %cond.store73
 | 
						|
; AVX2-NEXT:    vpextrb $5, %xmm1, 37(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_76: ## %else74
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_78
 | 
						|
; AVX2-NEXT:  ## BB#77: ## %cond.store75
 | 
						|
; AVX2-NEXT:    vpextrb $6, %xmm1, 38(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_78: ## %else76
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_80
 | 
						|
; AVX2-NEXT:  ## BB#79: ## %cond.store77
 | 
						|
; AVX2-NEXT:    vpextrb $7, %xmm1, 39(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_80: ## %else78
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_82
 | 
						|
; AVX2-NEXT:  ## BB#81: ## %cond.store79
 | 
						|
; AVX2-NEXT:    vpextrb $8, %xmm1, 40(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_82: ## %else80
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_84
 | 
						|
; AVX2-NEXT:  ## BB#83: ## %cond.store81
 | 
						|
; AVX2-NEXT:    vpextrb $9, %xmm1, 41(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_84: ## %else82
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_86
 | 
						|
; AVX2-NEXT:  ## BB#85: ## %cond.store83
 | 
						|
; AVX2-NEXT:    vpextrb $10, %xmm1, 42(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_86: ## %else84
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_88
 | 
						|
; AVX2-NEXT:  ## BB#87: ## %cond.store85
 | 
						|
; AVX2-NEXT:    vpextrb $11, %xmm1, 43(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_88: ## %else86
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_90
 | 
						|
; AVX2-NEXT:  ## BB#89: ## %cond.store87
 | 
						|
; AVX2-NEXT:    vpextrb $12, %xmm1, 44(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_90: ## %else88
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_92
 | 
						|
; AVX2-NEXT:  ## BB#91: ## %cond.store89
 | 
						|
; AVX2-NEXT:    vpextrb $13, %xmm1, 45(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_92: ## %else90
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_94
 | 
						|
; AVX2-NEXT:  ## BB#93: ## %cond.store91
 | 
						|
; AVX2-NEXT:    vpextrb $14, %xmm1, 46(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_94: ## %else92
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_96
 | 
						|
; AVX2-NEXT:  ## BB#95: ## %cond.store93
 | 
						|
; AVX2-NEXT:    vpextrb $15, %xmm1, 47(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_96: ## %else94
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_98
 | 
						|
; AVX2-NEXT:  ## BB#97: ## %cond.store95
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $0, %xmm0, 48(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_98: ## %else96
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_100
 | 
						|
; AVX2-NEXT:  ## BB#99: ## %cond.store97
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $1, %xmm0, 49(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_100: ## %else98
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_102
 | 
						|
; AVX2-NEXT:  ## BB#101: ## %cond.store99
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $2, %xmm0, 50(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_102: ## %else100
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_104
 | 
						|
; AVX2-NEXT:  ## BB#103: ## %cond.store101
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $3, %xmm0, 51(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_104: ## %else102
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_106
 | 
						|
; AVX2-NEXT:  ## BB#105: ## %cond.store103
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $4, %xmm0, 52(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_106: ## %else104
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_108
 | 
						|
; AVX2-NEXT:  ## BB#107: ## %cond.store105
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $5, %xmm0, 53(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_108: ## %else106
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_110
 | 
						|
; AVX2-NEXT:  ## BB#109: ## %cond.store107
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $6, %xmm0, 54(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_110: ## %else108
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_112
 | 
						|
; AVX2-NEXT:  ## BB#111: ## %cond.store109
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $7, %xmm0, 55(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_112: ## %else110
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_114
 | 
						|
; AVX2-NEXT:  ## BB#113: ## %cond.store111
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $8, %xmm0, 56(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_114: ## %else112
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_116
 | 
						|
; AVX2-NEXT:  ## BB#115: ## %cond.store113
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $9, %xmm0, 57(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_116: ## %else114
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_118
 | 
						|
; AVX2-NEXT:  ## BB#117: ## %cond.store115
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $10, %xmm0, 58(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_118: ## %else116
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_120
 | 
						|
; AVX2-NEXT:  ## BB#119: ## %cond.store117
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $11, %xmm0, 59(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_120: ## %else118
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_122
 | 
						|
; AVX2-NEXT:  ## BB#121: ## %cond.store119
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $12, %xmm0, 60(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_122: ## %else120
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_124
 | 
						|
; AVX2-NEXT:  ## BB#123: ## %cond.store121
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $13, %xmm0, 61(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_124: ## %else122
 | 
						|
; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 | 
						|
; AVX2-NEXT:    testb $1, %cl
 | 
						|
; AVX2-NEXT:    je LBB58_126
 | 
						|
; AVX2-NEXT:  ## BB#125: ## %cond.store123
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $14, %xmm0, 62(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_126: ## %else124
 | 
						|
; AVX2-NEXT:    testb $1, %dl
 | 
						|
; AVX2-NEXT:    je LBB58_128
 | 
						|
; AVX2-NEXT:  ## BB#127: ## %cond.store125
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $15, %xmm0, 63(%rax)
 | 
						|
; AVX2-NEXT:  LBB58_128: ## %else126
 | 
						|
; AVX2-NEXT:    vzeroupper
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_mask_store_64xi8:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_2
 | 
						|
; AVX512F-NEXT:  ## BB#1: ## %cond.store
 | 
						|
; AVX512F-NEXT:    vpextrb $0, %xmm4, (%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_2: ## %else
 | 
						|
; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_4
 | 
						|
; AVX512F-NEXT:  ## BB#3: ## %cond.store1
 | 
						|
; AVX512F-NEXT:    vpextrb $1, %xmm4, 1(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_4: ## %else2
 | 
						|
; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_6
 | 
						|
; AVX512F-NEXT:  ## BB#5: ## %cond.store3
 | 
						|
; AVX512F-NEXT:    vpextrb $2, %xmm4, 2(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_6: ## %else4
 | 
						|
; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_8
 | 
						|
; AVX512F-NEXT:  ## BB#7: ## %cond.store5
 | 
						|
; AVX512F-NEXT:    vpextrb $3, %xmm4, 3(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_8: ## %else6
 | 
						|
; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_10
 | 
						|
; AVX512F-NEXT:  ## BB#9: ## %cond.store7
 | 
						|
; AVX512F-NEXT:    vpextrb $4, %xmm4, 4(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_10: ## %else8
 | 
						|
; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_12
 | 
						|
; AVX512F-NEXT:  ## BB#11: ## %cond.store9
 | 
						|
; AVX512F-NEXT:    vpextrb $5, %xmm4, 5(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_12: ## %else10
 | 
						|
; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_14
 | 
						|
; AVX512F-NEXT:  ## BB#13: ## %cond.store11
 | 
						|
; AVX512F-NEXT:    vpextrb $6, %xmm4, 6(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_14: ## %else12
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_16
 | 
						|
; AVX512F-NEXT:  ## BB#15: ## %cond.store13
 | 
						|
; AVX512F-NEXT:    vpextrb $7, %xmm4, 7(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_16: ## %else14
 | 
						|
; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_18
 | 
						|
; AVX512F-NEXT:  ## BB#17: ## %cond.store15
 | 
						|
; AVX512F-NEXT:    vpextrb $8, %xmm4, 8(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_18: ## %else16
 | 
						|
; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_20
 | 
						|
; AVX512F-NEXT:  ## BB#19: ## %cond.store17
 | 
						|
; AVX512F-NEXT:    vpextrb $9, %xmm4, 9(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_20: ## %else18
 | 
						|
; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_22
 | 
						|
; AVX512F-NEXT:  ## BB#21: ## %cond.store19
 | 
						|
; AVX512F-NEXT:    vpextrb $10, %xmm4, 10(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_22: ## %else20
 | 
						|
; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_24
 | 
						|
; AVX512F-NEXT:  ## BB#23: ## %cond.store21
 | 
						|
; AVX512F-NEXT:    vpextrb $11, %xmm4, 11(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_24: ## %else22
 | 
						|
; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_26
 | 
						|
; AVX512F-NEXT:  ## BB#25: ## %cond.store23
 | 
						|
; AVX512F-NEXT:    vpextrb $12, %xmm4, 12(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_26: ## %else24
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm0
 | 
						|
; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_28
 | 
						|
; AVX512F-NEXT:  ## BB#27: ## %cond.store25
 | 
						|
; AVX512F-NEXT:    vpextrb $13, %xmm4, 13(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_28: ## %else26
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_30
 | 
						|
; AVX512F-NEXT:  ## BB#29: ## %cond.store27
 | 
						|
; AVX512F-NEXT:    vpextrb $14, %xmm4, 14(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_30: ## %else28
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftlw $0, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_32
 | 
						|
; AVX512F-NEXT:  ## BB#31: ## %cond.store29
 | 
						|
; AVX512F-NEXT:    vpextrb $15, %xmm4, 15(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_32: ## %else30
 | 
						|
; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_34
 | 
						|
; AVX512F-NEXT:  ## BB#33: ## %cond.store31
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $0, %xmm0, 16(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_34: ## %else32
 | 
						|
; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_36
 | 
						|
; AVX512F-NEXT:  ## BB#35: ## %cond.store33
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $1, %xmm0, 17(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_36: ## %else34
 | 
						|
; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_38
 | 
						|
; AVX512F-NEXT:  ## BB#37: ## %cond.store35
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $2, %xmm0, 18(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_38: ## %else36
 | 
						|
; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_40
 | 
						|
; AVX512F-NEXT:  ## BB#39: ## %cond.store37
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $3, %xmm0, 19(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_40: ## %else38
 | 
						|
; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_42
 | 
						|
; AVX512F-NEXT:  ## BB#41: ## %cond.store39
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $4, %xmm0, 20(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_42: ## %else40
 | 
						|
; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_44
 | 
						|
; AVX512F-NEXT:  ## BB#43: ## %cond.store41
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $5, %xmm0, 21(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_44: ## %else42
 | 
						|
; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_46
 | 
						|
; AVX512F-NEXT:  ## BB#45: ## %cond.store43
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $6, %xmm0, 22(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_46: ## %else44
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_48
 | 
						|
; AVX512F-NEXT:  ## BB#47: ## %cond.store45
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $7, %xmm0, 23(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_48: ## %else46
 | 
						|
; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_50
 | 
						|
; AVX512F-NEXT:  ## BB#49: ## %cond.store47
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $8, %xmm0, 24(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_50: ## %else48
 | 
						|
; AVX512F-NEXT:    kshiftlw $6, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_52
 | 
						|
; AVX512F-NEXT:  ## BB#51: ## %cond.store49
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $9, %xmm0, 25(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_52: ## %else50
 | 
						|
; AVX512F-NEXT:    kshiftlw $5, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_54
 | 
						|
; AVX512F-NEXT:  ## BB#53: ## %cond.store51
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $10, %xmm0, 26(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_54: ## %else52
 | 
						|
; AVX512F-NEXT:    kshiftlw $4, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_56
 | 
						|
; AVX512F-NEXT:  ## BB#55: ## %cond.store53
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $11, %xmm0, 27(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_56: ## %else54
 | 
						|
; AVX512F-NEXT:    kshiftlw $3, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_58
 | 
						|
; AVX512F-NEXT:  ## BB#57: ## %cond.store55
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $12, %xmm0, 28(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_58: ## %else56
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm2, %zmm0
 | 
						|
; AVX512F-NEXT:    kshiftlw $2, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_60
 | 
						|
; AVX512F-NEXT:  ## BB#59: ## %cond.store57
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm1
 | 
						|
; AVX512F-NEXT:    vpextrb $13, %xmm1, 29(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_60: ## %else58
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    kshiftlw $1, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_62
 | 
						|
; AVX512F-NEXT:  ## BB#61: ## %cond.store59
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm1
 | 
						|
; AVX512F-NEXT:    vpextrb $14, %xmm1, 30(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_62: ## %else60
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftlw $0, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_64
 | 
						|
; AVX512F-NEXT:  ## BB#63: ## %cond.store61
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_64: ## %else62
 | 
						|
; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_66
 | 
						|
; AVX512F-NEXT:  ## BB#65: ## %cond.store63
 | 
						|
; AVX512F-NEXT:    vpextrb $0, %xmm5, 32(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_66: ## %else64
 | 
						|
; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_68
 | 
						|
; AVX512F-NEXT:  ## BB#67: ## %cond.store65
 | 
						|
; AVX512F-NEXT:    vpextrb $1, %xmm5, 33(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_68: ## %else66
 | 
						|
; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_70
 | 
						|
; AVX512F-NEXT:  ## BB#69: ## %cond.store67
 | 
						|
; AVX512F-NEXT:    vpextrb $2, %xmm5, 34(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_70: ## %else68
 | 
						|
; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_72
 | 
						|
; AVX512F-NEXT:  ## BB#71: ## %cond.store69
 | 
						|
; AVX512F-NEXT:    vpextrb $3, %xmm5, 35(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_72: ## %else70
 | 
						|
; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_74
 | 
						|
; AVX512F-NEXT:  ## BB#73: ## %cond.store71
 | 
						|
; AVX512F-NEXT:    vpextrb $4, %xmm5, 36(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_74: ## %else72
 | 
						|
; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_76
 | 
						|
; AVX512F-NEXT:  ## BB#75: ## %cond.store73
 | 
						|
; AVX512F-NEXT:    vpextrb $5, %xmm5, 37(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_76: ## %else74
 | 
						|
; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_78
 | 
						|
; AVX512F-NEXT:  ## BB#77: ## %cond.store75
 | 
						|
; AVX512F-NEXT:    vpextrb $6, %xmm5, 38(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_78: ## %else76
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_80
 | 
						|
; AVX512F-NEXT:  ## BB#79: ## %cond.store77
 | 
						|
; AVX512F-NEXT:    vpextrb $7, %xmm5, 39(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_80: ## %else78
 | 
						|
; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_82
 | 
						|
; AVX512F-NEXT:  ## BB#81: ## %cond.store79
 | 
						|
; AVX512F-NEXT:    vpextrb $8, %xmm5, 40(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_82: ## %else80
 | 
						|
; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_84
 | 
						|
; AVX512F-NEXT:  ## BB#83: ## %cond.store81
 | 
						|
; AVX512F-NEXT:    vpextrb $9, %xmm5, 41(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_84: ## %else82
 | 
						|
; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_86
 | 
						|
; AVX512F-NEXT:  ## BB#85: ## %cond.store83
 | 
						|
; AVX512F-NEXT:    vpextrb $10, %xmm5, 42(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_86: ## %else84
 | 
						|
; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_88
 | 
						|
; AVX512F-NEXT:  ## BB#87: ## %cond.store85
 | 
						|
; AVX512F-NEXT:    vpextrb $11, %xmm5, 43(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_88: ## %else86
 | 
						|
; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_90
 | 
						|
; AVX512F-NEXT:  ## BB#89: ## %cond.store87
 | 
						|
; AVX512F-NEXT:    vpextrb $12, %xmm5, 44(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_90: ## %else88
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm3, %zmm0
 | 
						|
; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_92
 | 
						|
; AVX512F-NEXT:  ## BB#91: ## %cond.store89
 | 
						|
; AVX512F-NEXT:    vpextrb $13, %xmm5, 45(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_92: ## %else90
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_94
 | 
						|
; AVX512F-NEXT:  ## BB#93: ## %cond.store91
 | 
						|
; AVX512F-NEXT:    vpextrb $14, %xmm5, 46(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_94: ## %else92
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftlw $0, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_96
 | 
						|
; AVX512F-NEXT:  ## BB#95: ## %cond.store93
 | 
						|
; AVX512F-NEXT:    vpextrb $15, %xmm5, 47(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_96: ## %else94
 | 
						|
; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_98
 | 
						|
; AVX512F-NEXT:  ## BB#97: ## %cond.store95
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $0, %xmm0, 48(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_98: ## %else96
 | 
						|
; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_100
 | 
						|
; AVX512F-NEXT:  ## BB#99: ## %cond.store97
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $1, %xmm0, 49(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_100: ## %else98
 | 
						|
; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_102
 | 
						|
; AVX512F-NEXT:  ## BB#101: ## %cond.store99
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $2, %xmm0, 50(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_102: ## %else100
 | 
						|
; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_104
 | 
						|
; AVX512F-NEXT:  ## BB#103: ## %cond.store101
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $3, %xmm0, 51(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_104: ## %else102
 | 
						|
; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_106
 | 
						|
; AVX512F-NEXT:  ## BB#105: ## %cond.store103
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $4, %xmm0, 52(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_106: ## %else104
 | 
						|
; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_108
 | 
						|
; AVX512F-NEXT:  ## BB#107: ## %cond.store105
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $5, %xmm0, 53(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_108: ## %else106
 | 
						|
; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_110
 | 
						|
; AVX512F-NEXT:  ## BB#109: ## %cond.store107
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $6, %xmm0, 54(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_110: ## %else108
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_112
 | 
						|
; AVX512F-NEXT:  ## BB#111: ## %cond.store109
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $7, %xmm0, 55(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_112: ## %else110
 | 
						|
; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_114
 | 
						|
; AVX512F-NEXT:  ## BB#113: ## %cond.store111
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $8, %xmm0, 56(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_114: ## %else112
 | 
						|
; AVX512F-NEXT:    kshiftlw $6, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_116
 | 
						|
; AVX512F-NEXT:  ## BB#115: ## %cond.store113
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $9, %xmm0, 57(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_116: ## %else114
 | 
						|
; AVX512F-NEXT:    kshiftlw $5, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_118
 | 
						|
; AVX512F-NEXT:  ## BB#117: ## %cond.store115
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $10, %xmm0, 58(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_118: ## %else116
 | 
						|
; AVX512F-NEXT:    kshiftlw $4, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_120
 | 
						|
; AVX512F-NEXT:  ## BB#119: ## %cond.store117
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $11, %xmm0, 59(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_120: ## %else118
 | 
						|
; AVX512F-NEXT:    kshiftlw $3, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_122
 | 
						|
; AVX512F-NEXT:  ## BB#121: ## %cond.store119
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $12, %xmm0, 60(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_122: ## %else120
 | 
						|
; AVX512F-NEXT:    kshiftlw $2, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_124
 | 
						|
; AVX512F-NEXT:  ## BB#123: ## %cond.store121
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $13, %xmm0, 61(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_124: ## %else122
 | 
						|
; AVX512F-NEXT:    kshiftlw $1, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_126
 | 
						|
; AVX512F-NEXT:  ## BB#125: ## %cond.store123
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $14, %xmm0, 62(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_126: ## %else124
 | 
						|
; AVX512F-NEXT:    kshiftlw $0, %k1, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB58_128
 | 
						|
; AVX512F-NEXT:  ## BB#127: ## %cond.store125
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $15, %xmm0, 63(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB58_128: ## %else126
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_mask_store_64xi8:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $7, %zmm0, %zmm0
 | 
						|
; SKX-NEXT:    vpmovb2m %zmm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu8 %zmm1, (%rdi) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> %val, <64 x i8>* %addr, i32 4, <64 x i1>%mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
declare void @llvm.masked.store.v64i8.p0v64i8(<64 x i8>, <64 x i8>*, i32, <64 x i1>)
 | 
						|
 | 
						|
define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) {
 | 
						|
; AVX-LABEL: test_mask_store_8xi16:
 | 
						|
; AVX:       ## BB#0:
 | 
						|
; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB59_2
 | 
						|
; AVX-NEXT:  ## BB#1: ## %cond.store
 | 
						|
; AVX-NEXT:    vmovd %xmm1, %eax
 | 
						|
; AVX-NEXT:    movw %ax, (%rdi)
 | 
						|
; AVX-NEXT:  LBB59_2: ## %else
 | 
						|
; AVX-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB59_4
 | 
						|
; AVX-NEXT:  ## BB#3: ## %cond.store1
 | 
						|
; AVX-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
 | 
						|
; AVX-NEXT:  LBB59_4: ## %else2
 | 
						|
; AVX-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB59_6
 | 
						|
; AVX-NEXT:  ## BB#5: ## %cond.store3
 | 
						|
; AVX-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
 | 
						|
; AVX-NEXT:  LBB59_6: ## %else4
 | 
						|
; AVX-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB59_8
 | 
						|
; AVX-NEXT:  ## BB#7: ## %cond.store5
 | 
						|
; AVX-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
 | 
						|
; AVX-NEXT:  LBB59_8: ## %else6
 | 
						|
; AVX-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB59_10
 | 
						|
; AVX-NEXT:  ## BB#9: ## %cond.store7
 | 
						|
; AVX-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
 | 
						|
; AVX-NEXT:  LBB59_10: ## %else8
 | 
						|
; AVX-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB59_12
 | 
						|
; AVX-NEXT:  ## BB#11: ## %cond.store9
 | 
						|
; AVX-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
 | 
						|
; AVX-NEXT:  LBB59_12: ## %else10
 | 
						|
; AVX-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB59_14
 | 
						|
; AVX-NEXT:  ## BB#13: ## %cond.store11
 | 
						|
; AVX-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
 | 
						|
; AVX-NEXT:  LBB59_14: ## %else12
 | 
						|
; AVX-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX-NEXT:    testb $1, %al
 | 
						|
; AVX-NEXT:    je LBB59_16
 | 
						|
; AVX-NEXT:  ## BB#15: ## %cond.store13
 | 
						|
; AVX-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
 | 
						|
; AVX-NEXT:  LBB59_16: ## %else14
 | 
						|
; AVX-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_mask_store_8xi16:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB59_2
 | 
						|
; AVX512F-NEXT:  ## BB#1: ## %cond.store
 | 
						|
; AVX512F-NEXT:    vmovd %xmm1, %eax
 | 
						|
; AVX512F-NEXT:    movw %ax, (%rdi)
 | 
						|
; AVX512F-NEXT:  LBB59_2: ## %else
 | 
						|
; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB59_4
 | 
						|
; AVX512F-NEXT:  ## BB#3: ## %cond.store1
 | 
						|
; AVX512F-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB59_4: ## %else2
 | 
						|
; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB59_6
 | 
						|
; AVX512F-NEXT:  ## BB#5: ## %cond.store3
 | 
						|
; AVX512F-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB59_6: ## %else4
 | 
						|
; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB59_8
 | 
						|
; AVX512F-NEXT:  ## BB#7: ## %cond.store5
 | 
						|
; AVX512F-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB59_8: ## %else6
 | 
						|
; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB59_10
 | 
						|
; AVX512F-NEXT:  ## BB#9: ## %cond.store7
 | 
						|
; AVX512F-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB59_10: ## %else8
 | 
						|
; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB59_12
 | 
						|
; AVX512F-NEXT:  ## BB#11: ## %cond.store9
 | 
						|
; AVX512F-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB59_12: ## %else10
 | 
						|
; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB59_14
 | 
						|
; AVX512F-NEXT:  ## BB#13: ## %cond.store11
 | 
						|
; AVX512F-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB59_14: ## %else12
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB59_16
 | 
						|
; AVX512F-NEXT:  ## BB#15: ## %cond.store13
 | 
						|
; AVX512F-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB59_16: ## %else14
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_mask_store_8xi16:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovw2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu16 %xmm1, (%rdi) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %val, <8 x i16>* %addr, i32 4, <8 x i1>%mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
 | 
						|
 | 
						|
define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) {
 | 
						|
; AVX1-LABEL: test_mask_store_16xi16:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB60_2
 | 
						|
; AVX1-NEXT:  ## BB#1: ## %cond.store
 | 
						|
; AVX1-NEXT:    vmovd %xmm1, %eax
 | 
						|
; AVX1-NEXT:    movw %ax, (%rdi)
 | 
						|
; AVX1-NEXT:  LBB60_2: ## %else
 | 
						|
; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB60_4
 | 
						|
; AVX1-NEXT:  ## BB#3: ## %cond.store1
 | 
						|
; AVX1-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
 | 
						|
; AVX1-NEXT:  LBB60_4: ## %else2
 | 
						|
; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB60_6
 | 
						|
; AVX1-NEXT:  ## BB#5: ## %cond.store3
 | 
						|
; AVX1-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
 | 
						|
; AVX1-NEXT:  LBB60_6: ## %else4
 | 
						|
; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB60_8
 | 
						|
; AVX1-NEXT:  ## BB#7: ## %cond.store5
 | 
						|
; AVX1-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
 | 
						|
; AVX1-NEXT:  LBB60_8: ## %else6
 | 
						|
; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB60_10
 | 
						|
; AVX1-NEXT:  ## BB#9: ## %cond.store7
 | 
						|
; AVX1-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
 | 
						|
; AVX1-NEXT:  LBB60_10: ## %else8
 | 
						|
; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB60_12
 | 
						|
; AVX1-NEXT:  ## BB#11: ## %cond.store9
 | 
						|
; AVX1-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
 | 
						|
; AVX1-NEXT:  LBB60_12: ## %else10
 | 
						|
; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB60_14
 | 
						|
; AVX1-NEXT:  ## BB#13: ## %cond.store11
 | 
						|
; AVX1-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
 | 
						|
; AVX1-NEXT:  LBB60_14: ## %else12
 | 
						|
; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB60_16
 | 
						|
; AVX1-NEXT:  ## BB#15: ## %cond.store13
 | 
						|
; AVX1-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
 | 
						|
; AVX1-NEXT:  LBB60_16: ## %else14
 | 
						|
; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB60_18
 | 
						|
; AVX1-NEXT:  ## BB#17: ## %cond.store15
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vmovd %xmm2, %eax
 | 
						|
; AVX1-NEXT:    movw %ax, 16(%rdi)
 | 
						|
; AVX1-NEXT:  LBB60_18: ## %else16
 | 
						|
; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB60_20
 | 
						|
; AVX1-NEXT:  ## BB#19: ## %cond.store17
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrw $1, %xmm2, 18(%rdi)
 | 
						|
; AVX1-NEXT:  LBB60_20: ## %else18
 | 
						|
; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB60_22
 | 
						|
; AVX1-NEXT:  ## BB#21: ## %cond.store19
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrw $2, %xmm2, 20(%rdi)
 | 
						|
; AVX1-NEXT:  LBB60_22: ## %else20
 | 
						|
; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB60_24
 | 
						|
; AVX1-NEXT:  ## BB#23: ## %cond.store21
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrw $3, %xmm2, 22(%rdi)
 | 
						|
; AVX1-NEXT:  LBB60_24: ## %else22
 | 
						|
; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB60_26
 | 
						|
; AVX1-NEXT:  ## BB#25: ## %cond.store23
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrw $4, %xmm2, 24(%rdi)
 | 
						|
; AVX1-NEXT:  LBB60_26: ## %else24
 | 
						|
; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB60_28
 | 
						|
; AVX1-NEXT:  ## BB#27: ## %cond.store25
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrw $5, %xmm2, 26(%rdi)
 | 
						|
; AVX1-NEXT:  LBB60_28: ## %else26
 | 
						|
; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB60_30
 | 
						|
; AVX1-NEXT:  ## BB#29: ## %cond.store27
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 | 
						|
; AVX1-NEXT:    vpextrw $6, %xmm2, 28(%rdi)
 | 
						|
; AVX1-NEXT:  LBB60_30: ## %else28
 | 
						|
; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB60_32
 | 
						|
; AVX1-NEXT:  ## BB#31: ## %cond.store29
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
 | 
						|
; AVX1-NEXT:  LBB60_32: ## %else30
 | 
						|
; AVX1-NEXT:    vzeroupper
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test_mask_store_16xi16:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB60_2
 | 
						|
; AVX2-NEXT:  ## BB#1: ## %cond.store
 | 
						|
; AVX2-NEXT:    vmovd %xmm1, %eax
 | 
						|
; AVX2-NEXT:    movw %ax, (%rdi)
 | 
						|
; AVX2-NEXT:  LBB60_2: ## %else
 | 
						|
; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB60_4
 | 
						|
; AVX2-NEXT:  ## BB#3: ## %cond.store1
 | 
						|
; AVX2-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
 | 
						|
; AVX2-NEXT:  LBB60_4: ## %else2
 | 
						|
; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB60_6
 | 
						|
; AVX2-NEXT:  ## BB#5: ## %cond.store3
 | 
						|
; AVX2-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
 | 
						|
; AVX2-NEXT:  LBB60_6: ## %else4
 | 
						|
; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB60_8
 | 
						|
; AVX2-NEXT:  ## BB#7: ## %cond.store5
 | 
						|
; AVX2-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
 | 
						|
; AVX2-NEXT:  LBB60_8: ## %else6
 | 
						|
; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB60_10
 | 
						|
; AVX2-NEXT:  ## BB#9: ## %cond.store7
 | 
						|
; AVX2-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
 | 
						|
; AVX2-NEXT:  LBB60_10: ## %else8
 | 
						|
; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB60_12
 | 
						|
; AVX2-NEXT:  ## BB#11: ## %cond.store9
 | 
						|
; AVX2-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
 | 
						|
; AVX2-NEXT:  LBB60_12: ## %else10
 | 
						|
; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB60_14
 | 
						|
; AVX2-NEXT:  ## BB#13: ## %cond.store11
 | 
						|
; AVX2-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
 | 
						|
; AVX2-NEXT:  LBB60_14: ## %else12
 | 
						|
; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB60_16
 | 
						|
; AVX2-NEXT:  ## BB#15: ## %cond.store13
 | 
						|
; AVX2-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
 | 
						|
; AVX2-NEXT:  LBB60_16: ## %else14
 | 
						|
; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB60_18
 | 
						|
; AVX2-NEXT:  ## BB#17: ## %cond.store15
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vmovd %xmm2, %eax
 | 
						|
; AVX2-NEXT:    movw %ax, 16(%rdi)
 | 
						|
; AVX2-NEXT:  LBB60_18: ## %else16
 | 
						|
; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB60_20
 | 
						|
; AVX2-NEXT:  ## BB#19: ## %cond.store17
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrw $1, %xmm2, 18(%rdi)
 | 
						|
; AVX2-NEXT:  LBB60_20: ## %else18
 | 
						|
; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB60_22
 | 
						|
; AVX2-NEXT:  ## BB#21: ## %cond.store19
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrw $2, %xmm2, 20(%rdi)
 | 
						|
; AVX2-NEXT:  LBB60_22: ## %else20
 | 
						|
; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB60_24
 | 
						|
; AVX2-NEXT:  ## BB#23: ## %cond.store21
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrw $3, %xmm2, 22(%rdi)
 | 
						|
; AVX2-NEXT:  LBB60_24: ## %else22
 | 
						|
; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB60_26
 | 
						|
; AVX2-NEXT:  ## BB#25: ## %cond.store23
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrw $4, %xmm2, 24(%rdi)
 | 
						|
; AVX2-NEXT:  LBB60_26: ## %else24
 | 
						|
; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB60_28
 | 
						|
; AVX2-NEXT:  ## BB#27: ## %cond.store25
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrw $5, %xmm2, 26(%rdi)
 | 
						|
; AVX2-NEXT:  LBB60_28: ## %else26
 | 
						|
; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB60_30
 | 
						|
; AVX2-NEXT:  ## BB#29: ## %cond.store27
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
 | 
						|
; AVX2-NEXT:    vpextrw $6, %xmm2, 28(%rdi)
 | 
						|
; AVX2-NEXT:  LBB60_30: ## %else28
 | 
						|
; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB60_32
 | 
						|
; AVX2-NEXT:  ## BB#31: ## %cond.store29
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
 | 
						|
; AVX2-NEXT:  LBB60_32: ## %else30
 | 
						|
; AVX2-NEXT:    vzeroupper
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_mask_store_16xi16:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB60_2
 | 
						|
; AVX512F-NEXT:  ## BB#1: ## %cond.store
 | 
						|
; AVX512F-NEXT:    vmovd %xmm1, %eax
 | 
						|
; AVX512F-NEXT:    movw %ax, (%rdi)
 | 
						|
; AVX512F-NEXT:  LBB60_2: ## %else
 | 
						|
; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB60_4
 | 
						|
; AVX512F-NEXT:  ## BB#3: ## %cond.store1
 | 
						|
; AVX512F-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB60_4: ## %else2
 | 
						|
; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB60_6
 | 
						|
; AVX512F-NEXT:  ## BB#5: ## %cond.store3
 | 
						|
; AVX512F-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB60_6: ## %else4
 | 
						|
; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB60_8
 | 
						|
; AVX512F-NEXT:  ## BB#7: ## %cond.store5
 | 
						|
; AVX512F-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB60_8: ## %else6
 | 
						|
; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB60_10
 | 
						|
; AVX512F-NEXT:  ## BB#9: ## %cond.store7
 | 
						|
; AVX512F-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB60_10: ## %else8
 | 
						|
; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB60_12
 | 
						|
; AVX512F-NEXT:  ## BB#11: ## %cond.store9
 | 
						|
; AVX512F-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB60_12: ## %else10
 | 
						|
; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB60_14
 | 
						|
; AVX512F-NEXT:  ## BB#13: ## %cond.store11
 | 
						|
; AVX512F-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB60_14: ## %else12
 | 
						|
; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB60_16
 | 
						|
; AVX512F-NEXT:  ## BB#15: ## %cond.store13
 | 
						|
; AVX512F-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB60_16: ## %else14
 | 
						|
; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB60_18
 | 
						|
; AVX512F-NEXT:  ## BB#17: ## %cond.store15
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX512F-NEXT:    vmovd %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    movw %ax, 16(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB60_18: ## %else16
 | 
						|
; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB60_20
 | 
						|
; AVX512F-NEXT:  ## BB#19: ## %cond.store17
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrw $1, %xmm0, 18(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB60_20: ## %else18
 | 
						|
; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB60_22
 | 
						|
; AVX512F-NEXT:  ## BB#21: ## %cond.store19
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrw $2, %xmm0, 20(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB60_22: ## %else20
 | 
						|
; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB60_24
 | 
						|
; AVX512F-NEXT:  ## BB#23: ## %cond.store21
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrw $3, %xmm0, 22(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB60_24: ## %else22
 | 
						|
; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB60_26
 | 
						|
; AVX512F-NEXT:  ## BB#25: ## %cond.store23
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrw $4, %xmm0, 24(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB60_26: ## %else24
 | 
						|
; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB60_28
 | 
						|
; AVX512F-NEXT:  ## BB#27: ## %cond.store25
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrw $5, %xmm0, 26(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB60_28: ## %else26
 | 
						|
; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 | 
						|
; AVX512F-NEXT:    kmovw %k1, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB60_30
 | 
						|
; AVX512F-NEXT:  ## BB#29: ## %cond.store27
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrw $6, %xmm0, 28(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB60_30: ## %else28
 | 
						|
; AVX512F-NEXT:    kshiftlw $0, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 | 
						|
; AVX512F-NEXT:    kmovw %k0, %eax
 | 
						|
; AVX512F-NEXT:    testb %al, %al
 | 
						|
; AVX512F-NEXT:    je LBB60_32
 | 
						|
; AVX512F-NEXT:  ## BB#31: ## %cond.store29
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB60_32: ## %else30
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_mask_store_16xi16:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovb2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu16 %ymm1, (%rdi) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
declare void @llvm.masked.store.v16i16.p0v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>)
 | 
						|
 | 
						|
define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i16> %val) {
 | 
						|
; AVX1-LABEL: test_mask_store_32xi16:
 | 
						|
; AVX1:       ## BB#0:
 | 
						|
; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_2
 | 
						|
; AVX1-NEXT:  ## BB#1: ## %cond.store
 | 
						|
; AVX1-NEXT:    vmovd %xmm1, %eax
 | 
						|
; AVX1-NEXT:    movw %ax, (%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_2: ## %else
 | 
						|
; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_4
 | 
						|
; AVX1-NEXT:  ## BB#3: ## %cond.store1
 | 
						|
; AVX1-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_4: ## %else2
 | 
						|
; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_6
 | 
						|
; AVX1-NEXT:  ## BB#5: ## %cond.store3
 | 
						|
; AVX1-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_6: ## %else4
 | 
						|
; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_8
 | 
						|
; AVX1-NEXT:  ## BB#7: ## %cond.store5
 | 
						|
; AVX1-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_8: ## %else6
 | 
						|
; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_10
 | 
						|
; AVX1-NEXT:  ## BB#9: ## %cond.store7
 | 
						|
; AVX1-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_10: ## %else8
 | 
						|
; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_12
 | 
						|
; AVX1-NEXT:  ## BB#11: ## %cond.store9
 | 
						|
; AVX1-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_12: ## %else10
 | 
						|
; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_14
 | 
						|
; AVX1-NEXT:  ## BB#13: ## %cond.store11
 | 
						|
; AVX1-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_14: ## %else12
 | 
						|
; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_16
 | 
						|
; AVX1-NEXT:  ## BB#15: ## %cond.store13
 | 
						|
; AVX1-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_16: ## %else14
 | 
						|
; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_18
 | 
						|
; AVX1-NEXT:  ## BB#17: ## %cond.store15
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vmovd %xmm3, %eax
 | 
						|
; AVX1-NEXT:    movw %ax, 16(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_18: ## %else16
 | 
						|
; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_20
 | 
						|
; AVX1-NEXT:  ## BB#19: ## %cond.store17
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpextrw $1, %xmm3, 18(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_20: ## %else18
 | 
						|
; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_22
 | 
						|
; AVX1-NEXT:  ## BB#21: ## %cond.store19
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpextrw $2, %xmm3, 20(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_22: ## %else20
 | 
						|
; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_24
 | 
						|
; AVX1-NEXT:  ## BB#23: ## %cond.store21
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpextrw $3, %xmm3, 22(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_24: ## %else22
 | 
						|
; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_26
 | 
						|
; AVX1-NEXT:  ## BB#25: ## %cond.store23
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpextrw $4, %xmm3, 24(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_26: ## %else24
 | 
						|
; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_28
 | 
						|
; AVX1-NEXT:  ## BB#27: ## %cond.store25
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpextrw $5, %xmm3, 26(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_28: ## %else26
 | 
						|
; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_30
 | 
						|
; AVX1-NEXT:  ## BB#29: ## %cond.store27
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 | 
						|
; AVX1-NEXT:    vpextrw $6, %xmm3, 28(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_30: ## %else28
 | 
						|
; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_32
 | 
						|
; AVX1-NEXT:  ## BB#31: ## %cond.store29
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
 | 
						|
; AVX1-NEXT:    vpextrw $7, %xmm1, 30(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_32: ## %else30
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_34
 | 
						|
; AVX1-NEXT:  ## BB#33: ## %cond.store31
 | 
						|
; AVX1-NEXT:    vmovd %xmm2, %eax
 | 
						|
; AVX1-NEXT:    movw %ax, 32(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_34: ## %else32
 | 
						|
; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_36
 | 
						|
; AVX1-NEXT:  ## BB#35: ## %cond.store33
 | 
						|
; AVX1-NEXT:    vpextrw $1, %xmm2, 34(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_36: ## %else34
 | 
						|
; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_38
 | 
						|
; AVX1-NEXT:  ## BB#37: ## %cond.store35
 | 
						|
; AVX1-NEXT:    vpextrw $2, %xmm2, 36(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_38: ## %else36
 | 
						|
; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_40
 | 
						|
; AVX1-NEXT:  ## BB#39: ## %cond.store37
 | 
						|
; AVX1-NEXT:    vpextrw $3, %xmm2, 38(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_40: ## %else38
 | 
						|
; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_42
 | 
						|
; AVX1-NEXT:  ## BB#41: ## %cond.store39
 | 
						|
; AVX1-NEXT:    vpextrw $4, %xmm2, 40(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_42: ## %else40
 | 
						|
; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_44
 | 
						|
; AVX1-NEXT:  ## BB#43: ## %cond.store41
 | 
						|
; AVX1-NEXT:    vpextrw $5, %xmm2, 42(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_44: ## %else42
 | 
						|
; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_46
 | 
						|
; AVX1-NEXT:  ## BB#45: ## %cond.store43
 | 
						|
; AVX1-NEXT:    vpextrw $6, %xmm2, 44(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_46: ## %else44
 | 
						|
; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_48
 | 
						|
; AVX1-NEXT:  ## BB#47: ## %cond.store45
 | 
						|
; AVX1-NEXT:    vpextrw $7, %xmm2, 46(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_48: ## %else46
 | 
						|
; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_50
 | 
						|
; AVX1-NEXT:  ## BB#49: ## %cond.store47
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
 | 
						|
; AVX1-NEXT:    vmovd %xmm1, %eax
 | 
						|
; AVX1-NEXT:    movw %ax, 48(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_50: ## %else48
 | 
						|
; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_52
 | 
						|
; AVX1-NEXT:  ## BB#51: ## %cond.store49
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
 | 
						|
; AVX1-NEXT:    vpextrw $1, %xmm1, 50(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_52: ## %else50
 | 
						|
; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_54
 | 
						|
; AVX1-NEXT:  ## BB#53: ## %cond.store51
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
 | 
						|
; AVX1-NEXT:    vpextrw $2, %xmm1, 52(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_54: ## %else52
 | 
						|
; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_56
 | 
						|
; AVX1-NEXT:  ## BB#55: ## %cond.store53
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
 | 
						|
; AVX1-NEXT:    vpextrw $3, %xmm1, 54(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_56: ## %else54
 | 
						|
; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_58
 | 
						|
; AVX1-NEXT:  ## BB#57: ## %cond.store55
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
 | 
						|
; AVX1-NEXT:    vpextrw $4, %xmm1, 56(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_58: ## %else56
 | 
						|
; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_60
 | 
						|
; AVX1-NEXT:  ## BB#59: ## %cond.store57
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
 | 
						|
; AVX1-NEXT:    vpextrw $5, %xmm1, 58(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_60: ## %else58
 | 
						|
; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_62
 | 
						|
; AVX1-NEXT:  ## BB#61: ## %cond.store59
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
 | 
						|
; AVX1-NEXT:    vpextrw $6, %xmm1, 60(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_62: ## %else60
 | 
						|
; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX1-NEXT:    testb $1, %al
 | 
						|
; AVX1-NEXT:    je LBB61_64
 | 
						|
; AVX1-NEXT:  ## BB#63: ## %cond.store61
 | 
						|
; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm0
 | 
						|
; AVX1-NEXT:    vpextrw $7, %xmm0, 62(%rdi)
 | 
						|
; AVX1-NEXT:  LBB61_64: ## %else62
 | 
						|
; AVX1-NEXT:    vzeroupper
 | 
						|
; AVX1-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX2-LABEL: test_mask_store_32xi16:
 | 
						|
; AVX2:       ## BB#0:
 | 
						|
; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_2
 | 
						|
; AVX2-NEXT:  ## BB#1: ## %cond.store
 | 
						|
; AVX2-NEXT:    vmovd %xmm1, %eax
 | 
						|
; AVX2-NEXT:    movw %ax, (%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_2: ## %else
 | 
						|
; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_4
 | 
						|
; AVX2-NEXT:  ## BB#3: ## %cond.store1
 | 
						|
; AVX2-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_4: ## %else2
 | 
						|
; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_6
 | 
						|
; AVX2-NEXT:  ## BB#5: ## %cond.store3
 | 
						|
; AVX2-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_6: ## %else4
 | 
						|
; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_8
 | 
						|
; AVX2-NEXT:  ## BB#7: ## %cond.store5
 | 
						|
; AVX2-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_8: ## %else6
 | 
						|
; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_10
 | 
						|
; AVX2-NEXT:  ## BB#9: ## %cond.store7
 | 
						|
; AVX2-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_10: ## %else8
 | 
						|
; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_12
 | 
						|
; AVX2-NEXT:  ## BB#11: ## %cond.store9
 | 
						|
; AVX2-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_12: ## %else10
 | 
						|
; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_14
 | 
						|
; AVX2-NEXT:  ## BB#13: ## %cond.store11
 | 
						|
; AVX2-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_14: ## %else12
 | 
						|
; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_16
 | 
						|
; AVX2-NEXT:  ## BB#15: ## %cond.store13
 | 
						|
; AVX2-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_16: ## %else14
 | 
						|
; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_18
 | 
						|
; AVX2-NEXT:  ## BB#17: ## %cond.store15
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vmovd %xmm3, %eax
 | 
						|
; AVX2-NEXT:    movw %ax, 16(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_18: ## %else16
 | 
						|
; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_20
 | 
						|
; AVX2-NEXT:  ## BB#19: ## %cond.store17
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpextrw $1, %xmm3, 18(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_20: ## %else18
 | 
						|
; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_22
 | 
						|
; AVX2-NEXT:  ## BB#21: ## %cond.store19
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpextrw $2, %xmm3, 20(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_22: ## %else20
 | 
						|
; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_24
 | 
						|
; AVX2-NEXT:  ## BB#23: ## %cond.store21
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpextrw $3, %xmm3, 22(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_24: ## %else22
 | 
						|
; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_26
 | 
						|
; AVX2-NEXT:  ## BB#25: ## %cond.store23
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpextrw $4, %xmm3, 24(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_26: ## %else24
 | 
						|
; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_28
 | 
						|
; AVX2-NEXT:  ## BB#27: ## %cond.store25
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpextrw $5, %xmm3, 26(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_28: ## %else26
 | 
						|
; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_30
 | 
						|
; AVX2-NEXT:  ## BB#29: ## %cond.store27
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX2-NEXT:    vpextrw $6, %xmm3, 28(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_30: ## %else28
 | 
						|
; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_32
 | 
						|
; AVX2-NEXT:  ## BB#31: ## %cond.store29
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm1
 | 
						|
; AVX2-NEXT:    vpextrw $7, %xmm1, 30(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_32: ## %else30
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_34
 | 
						|
; AVX2-NEXT:  ## BB#33: ## %cond.store31
 | 
						|
; AVX2-NEXT:    vmovd %xmm2, %eax
 | 
						|
; AVX2-NEXT:    movw %ax, 32(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_34: ## %else32
 | 
						|
; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_36
 | 
						|
; AVX2-NEXT:  ## BB#35: ## %cond.store33
 | 
						|
; AVX2-NEXT:    vpextrw $1, %xmm2, 34(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_36: ## %else34
 | 
						|
; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_38
 | 
						|
; AVX2-NEXT:  ## BB#37: ## %cond.store35
 | 
						|
; AVX2-NEXT:    vpextrw $2, %xmm2, 36(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_38: ## %else36
 | 
						|
; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_40
 | 
						|
; AVX2-NEXT:  ## BB#39: ## %cond.store37
 | 
						|
; AVX2-NEXT:    vpextrw $3, %xmm2, 38(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_40: ## %else38
 | 
						|
; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_42
 | 
						|
; AVX2-NEXT:  ## BB#41: ## %cond.store39
 | 
						|
; AVX2-NEXT:    vpextrw $4, %xmm2, 40(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_42: ## %else40
 | 
						|
; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_44
 | 
						|
; AVX2-NEXT:  ## BB#43: ## %cond.store41
 | 
						|
; AVX2-NEXT:    vpextrw $5, %xmm2, 42(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_44: ## %else42
 | 
						|
; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_46
 | 
						|
; AVX2-NEXT:  ## BB#45: ## %cond.store43
 | 
						|
; AVX2-NEXT:    vpextrw $6, %xmm2, 44(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_46: ## %else44
 | 
						|
; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_48
 | 
						|
; AVX2-NEXT:  ## BB#47: ## %cond.store45
 | 
						|
; AVX2-NEXT:    vpextrw $7, %xmm2, 46(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_48: ## %else46
 | 
						|
; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_50
 | 
						|
; AVX2-NEXT:  ## BB#49: ## %cond.store47
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm1
 | 
						|
; AVX2-NEXT:    vmovd %xmm1, %eax
 | 
						|
; AVX2-NEXT:    movw %ax, 48(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_50: ## %else48
 | 
						|
; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_52
 | 
						|
; AVX2-NEXT:  ## BB#51: ## %cond.store49
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm1
 | 
						|
; AVX2-NEXT:    vpextrw $1, %xmm1, 50(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_52: ## %else50
 | 
						|
; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_54
 | 
						|
; AVX2-NEXT:  ## BB#53: ## %cond.store51
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm1
 | 
						|
; AVX2-NEXT:    vpextrw $2, %xmm1, 52(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_54: ## %else52
 | 
						|
; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_56
 | 
						|
; AVX2-NEXT:  ## BB#55: ## %cond.store53
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm1
 | 
						|
; AVX2-NEXT:    vpextrw $3, %xmm1, 54(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_56: ## %else54
 | 
						|
; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_58
 | 
						|
; AVX2-NEXT:  ## BB#57: ## %cond.store55
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm1
 | 
						|
; AVX2-NEXT:    vpextrw $4, %xmm1, 56(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_58: ## %else56
 | 
						|
; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_60
 | 
						|
; AVX2-NEXT:  ## BB#59: ## %cond.store57
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm1
 | 
						|
; AVX2-NEXT:    vpextrw $5, %xmm1, 58(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_60: ## %else58
 | 
						|
; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_62
 | 
						|
; AVX2-NEXT:  ## BB#61: ## %cond.store59
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm1
 | 
						|
; AVX2-NEXT:    vpextrw $6, %xmm1, 60(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_62: ## %else60
 | 
						|
; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX2-NEXT:    testb $1, %al
 | 
						|
; AVX2-NEXT:    je LBB61_64
 | 
						|
; AVX2-NEXT:  ## BB#63: ## %cond.store61
 | 
						|
; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm0
 | 
						|
; AVX2-NEXT:    vpextrw $7, %xmm0, 62(%rdi)
 | 
						|
; AVX2-NEXT:  LBB61_64: ## %else62
 | 
						|
; AVX2-NEXT:    vzeroupper
 | 
						|
; AVX2-NEXT:    retq
 | 
						|
;
 | 
						|
; AVX512F-LABEL: test_mask_store_32xi16:
 | 
						|
; AVX512F:       ## BB#0:
 | 
						|
; AVX512F-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_2
 | 
						|
; AVX512F-NEXT:  ## BB#1: ## %cond.store
 | 
						|
; AVX512F-NEXT:    vmovd %xmm1, %eax
 | 
						|
; AVX512F-NEXT:    movw %ax, (%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_2: ## %else
 | 
						|
; AVX512F-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_4
 | 
						|
; AVX512F-NEXT:  ## BB#3: ## %cond.store1
 | 
						|
; AVX512F-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_4: ## %else2
 | 
						|
; AVX512F-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_6
 | 
						|
; AVX512F-NEXT:  ## BB#5: ## %cond.store3
 | 
						|
; AVX512F-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_6: ## %else4
 | 
						|
; AVX512F-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_8
 | 
						|
; AVX512F-NEXT:  ## BB#7: ## %cond.store5
 | 
						|
; AVX512F-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_8: ## %else6
 | 
						|
; AVX512F-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_10
 | 
						|
; AVX512F-NEXT:  ## BB#9: ## %cond.store7
 | 
						|
; AVX512F-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_10: ## %else8
 | 
						|
; AVX512F-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_12
 | 
						|
; AVX512F-NEXT:  ## BB#11: ## %cond.store9
 | 
						|
; AVX512F-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_12: ## %else10
 | 
						|
; AVX512F-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_14
 | 
						|
; AVX512F-NEXT:  ## BB#13: ## %cond.store11
 | 
						|
; AVX512F-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_14: ## %else12
 | 
						|
; AVX512F-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_16
 | 
						|
; AVX512F-NEXT:  ## BB#15: ## %cond.store13
 | 
						|
; AVX512F-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_16: ## %else14
 | 
						|
; AVX512F-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_18
 | 
						|
; AVX512F-NEXT:  ## BB#17: ## %cond.store15
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vmovd %xmm3, %eax
 | 
						|
; AVX512F-NEXT:    movw %ax, 16(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_18: ## %else16
 | 
						|
; AVX512F-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_20
 | 
						|
; AVX512F-NEXT:  ## BB#19: ## %cond.store17
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpextrw $1, %xmm3, 18(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_20: ## %else18
 | 
						|
; AVX512F-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_22
 | 
						|
; AVX512F-NEXT:  ## BB#21: ## %cond.store19
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpextrw $2, %xmm3, 20(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_22: ## %else20
 | 
						|
; AVX512F-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_24
 | 
						|
; AVX512F-NEXT:  ## BB#23: ## %cond.store21
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpextrw $3, %xmm3, 22(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_24: ## %else22
 | 
						|
; AVX512F-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_26
 | 
						|
; AVX512F-NEXT:  ## BB#25: ## %cond.store23
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpextrw $4, %xmm3, 24(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_26: ## %else24
 | 
						|
; AVX512F-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_28
 | 
						|
; AVX512F-NEXT:  ## BB#27: ## %cond.store25
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpextrw $5, %xmm3, 26(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_28: ## %else26
 | 
						|
; AVX512F-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_30
 | 
						|
; AVX512F-NEXT:  ## BB#29: ## %cond.store27
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
 | 
						|
; AVX512F-NEXT:    vpextrw $6, %xmm3, 28(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_30: ## %else28
 | 
						|
; AVX512F-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_32
 | 
						|
; AVX512F-NEXT:  ## BB#31: ## %cond.store29
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm1
 | 
						|
; AVX512F-NEXT:    vpextrw $7, %xmm1, 30(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_32: ## %else30
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrb $0, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_34
 | 
						|
; AVX512F-NEXT:  ## BB#33: ## %cond.store31
 | 
						|
; AVX512F-NEXT:    vmovd %xmm2, %eax
 | 
						|
; AVX512F-NEXT:    movw %ax, 32(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_34: ## %else32
 | 
						|
; AVX512F-NEXT:    vpextrb $1, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_36
 | 
						|
; AVX512F-NEXT:  ## BB#35: ## %cond.store33
 | 
						|
; AVX512F-NEXT:    vpextrw $1, %xmm2, 34(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_36: ## %else34
 | 
						|
; AVX512F-NEXT:    vpextrb $2, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_38
 | 
						|
; AVX512F-NEXT:  ## BB#37: ## %cond.store35
 | 
						|
; AVX512F-NEXT:    vpextrw $2, %xmm2, 36(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_38: ## %else36
 | 
						|
; AVX512F-NEXT:    vpextrb $3, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_40
 | 
						|
; AVX512F-NEXT:  ## BB#39: ## %cond.store37
 | 
						|
; AVX512F-NEXT:    vpextrw $3, %xmm2, 38(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_40: ## %else38
 | 
						|
; AVX512F-NEXT:    vpextrb $4, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_42
 | 
						|
; AVX512F-NEXT:  ## BB#41: ## %cond.store39
 | 
						|
; AVX512F-NEXT:    vpextrw $4, %xmm2, 40(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_42: ## %else40
 | 
						|
; AVX512F-NEXT:    vpextrb $5, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_44
 | 
						|
; AVX512F-NEXT:  ## BB#43: ## %cond.store41
 | 
						|
; AVX512F-NEXT:    vpextrw $5, %xmm2, 42(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_44: ## %else42
 | 
						|
; AVX512F-NEXT:    vpextrb $6, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_46
 | 
						|
; AVX512F-NEXT:  ## BB#45: ## %cond.store43
 | 
						|
; AVX512F-NEXT:    vpextrw $6, %xmm2, 44(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_46: ## %else44
 | 
						|
; AVX512F-NEXT:    vpextrb $7, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_48
 | 
						|
; AVX512F-NEXT:  ## BB#47: ## %cond.store45
 | 
						|
; AVX512F-NEXT:    vpextrw $7, %xmm2, 46(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_48: ## %else46
 | 
						|
; AVX512F-NEXT:    vpextrb $8, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_50
 | 
						|
; AVX512F-NEXT:  ## BB#49: ## %cond.store47
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
 | 
						|
; AVX512F-NEXT:    vmovd %xmm1, %eax
 | 
						|
; AVX512F-NEXT:    movw %ax, 48(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_50: ## %else48
 | 
						|
; AVX512F-NEXT:    vpextrb $9, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_52
 | 
						|
; AVX512F-NEXT:  ## BB#51: ## %cond.store49
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
 | 
						|
; AVX512F-NEXT:    vpextrw $1, %xmm1, 50(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_52: ## %else50
 | 
						|
; AVX512F-NEXT:    vpextrb $10, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_54
 | 
						|
; AVX512F-NEXT:  ## BB#53: ## %cond.store51
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
 | 
						|
; AVX512F-NEXT:    vpextrw $2, %xmm1, 52(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_54: ## %else52
 | 
						|
; AVX512F-NEXT:    vpextrb $11, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_56
 | 
						|
; AVX512F-NEXT:  ## BB#55: ## %cond.store53
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
 | 
						|
; AVX512F-NEXT:    vpextrw $3, %xmm1, 54(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_56: ## %else54
 | 
						|
; AVX512F-NEXT:    vpextrb $12, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_58
 | 
						|
; AVX512F-NEXT:  ## BB#57: ## %cond.store55
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
 | 
						|
; AVX512F-NEXT:    vpextrw $4, %xmm1, 56(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_58: ## %else56
 | 
						|
; AVX512F-NEXT:    vpextrb $13, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_60
 | 
						|
; AVX512F-NEXT:  ## BB#59: ## %cond.store57
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
 | 
						|
; AVX512F-NEXT:    vpextrw $5, %xmm1, 58(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_60: ## %else58
 | 
						|
; AVX512F-NEXT:    vpextrb $14, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_62
 | 
						|
; AVX512F-NEXT:  ## BB#61: ## %cond.store59
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
 | 
						|
; AVX512F-NEXT:    vpextrw $6, %xmm1, 60(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_62: ## %else60
 | 
						|
; AVX512F-NEXT:    vpextrb $15, %xmm0, %eax
 | 
						|
; AVX512F-NEXT:    testb $1, %al
 | 
						|
; AVX512F-NEXT:    je LBB61_64
 | 
						|
; AVX512F-NEXT:  ## BB#63: ## %cond.store61
 | 
						|
; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm0
 | 
						|
; AVX512F-NEXT:    vpextrw $7, %xmm0, 62(%rdi)
 | 
						|
; AVX512F-NEXT:  LBB61_64: ## %else62
 | 
						|
; AVX512F-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX-LABEL: test_mask_store_32xi16:
 | 
						|
; SKX:       ## BB#0:
 | 
						|
; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
 | 
						|
; SKX-NEXT:    vpmovb2m %ymm0, %k1
 | 
						|
; SKX-NEXT:    vmovdqu16 %zmm1, (%rdi) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
  call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> %val, <32 x i16>* %addr, i32 4, <32 x i1>%mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
declare void @llvm.masked.store.v32i16.p0v32i16(<32 x i16>, <32 x i16>*, i32, <32 x i1>)
 |