; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512 define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_v2f64_sext: ; SSE: # %bb.0: ; SSE-NEXT: cmpltpd %xmm0, %xmm1 ; SSE-NEXT: movmskpd %xmm1, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbq %rax, %rax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v2f64_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovmskpd %xmm0, %eax ; AVX-NEXT: negl %eax ; AVX-NEXT: sbbq %rax, %rax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f64_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: retq %c = fcmp ogt <2 x double> %a0, %a1 %s = sext <2 x i1> %c to <2 x i64> %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> %2 = or <2 x i64> %s, %1 %3 = extractelement <2 x i64> %2, i32 0 ret i64 %3 } define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) { ; SSE-LABEL: test_v4f64_sext: ; SSE: # %bb.0: ; SSE-NEXT: cmpltpd %xmm1, %xmm3 ; SSE-NEXT: cmpltpd %xmm0, %xmm2 ; SSE-NEXT: orpd %xmm3, %xmm2 ; SSE-NEXT: movmskpd %xmm2, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbq %rax, %rax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4f64_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 ; AVX-NEXT: vmovmskpd %ymm0, %eax ; AVX-NEXT: negl %eax ; AVX-NEXT: sbbq %rax, %rax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f64_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %c = fcmp ogt <4 x double> %a0, %a1 %s = sext <4 x i1> %c to <4 x i64> %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> %2 = or <4 x i64> %s, %1 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> %4 = or <4 x i64> %2, %3 %5 = extractelement <4 x i64> %4, i64 0 ret i64 %5 } define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) { ; SSE-LABEL: test_v4f64_legal_sext: ; SSE: # %bb.0: ; SSE-NEXT: cmpltpd %xmm1, %xmm3 ; SSE-NEXT: cmpltpd %xmm0, %xmm2 ; SSE-NEXT: packssdw %xmm3, %xmm2 ; SSE-NEXT: movmskps %xmm2, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbq %rax, %rax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4f64_legal_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovmskps %xmm0, %eax ; AVX-NEXT: negl %eax ; AVX-NEXT: sbbq %rax, %rax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f64_legal_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: cltq ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %c = fcmp ogt <4 x double> %a0, %a1 %s = sext <4 x i1> %c to <4 x i32> %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> %2 = or <4 x i32> %s, %1 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> %4 = or <4 x i32> %2, %3 %5 = extractelement <4 x i32> %4, i64 0 %6 = sext i32 %5 to i64 ret i64 %6 } define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_v4f32_sext: ; SSE: # %bb.0: ; SSE-NEXT: cmpltps %xmm0, %xmm1 ; SSE-NEXT: movmskps %xmm1, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4f32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovmskps %xmm0, %eax ; AVX-NEXT: negl %eax ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f32_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq %c = fcmp ogt <4 x float> %a0, %a1 %s = sext <4 x i1> %c to <4 x i32> %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> %2 = or <4 x i32> %s, %1 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> %4 = or <4 x i32> %2, %3 %5 = extractelement <4 x i32> %4, i32 0 ret i32 %5 } define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) { ; SSE-LABEL: test_v8f32_sext: ; SSE: # %bb.0: ; SSE-NEXT: cmpltps %xmm1, %xmm3 ; SSE-NEXT: cmpltps %xmm0, %xmm2 ; SSE-NEXT: orps %xmm3, %xmm2 ; SSE-NEXT: movmskps %xmm2, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8f32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: vmovmskps %ymm0, %eax ; AVX-NEXT: negl %eax ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v8f32_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %c = fcmp ogt <8 x float> %a0, %a1 %s = sext <8 x i1> %c to <8 x i32> %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> %2 = or <8 x i32> %s, %1 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> %4 = or <8 x i32> %2, %3 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> %6 = or <8 x i32> %4, %5 %7 = extractelement <8 x i32> %6, i32 0 ret i32 %7 } define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) { ; SSE-LABEL: test_v8f32_legal_sext: ; SSE: # %bb.0: ; SSE-NEXT: cmpltps %xmm1, %xmm3 ; SSE-NEXT: cmpltps %xmm0, %xmm2 ; SSE-NEXT: packssdw %xmm3, %xmm2 ; SSE-NEXT: pmovmskb %xmm2, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8f32_legal_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpmovmskb %xmm0, %eax ; AVX-NEXT: negl %eax ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v8f32_legal_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0 ; AVX512-NEXT: vpmovm2w %k0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: cwtl ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %c = fcmp ogt <8 x float> %a0, %a1 %s = sext <8 x i1> %c to <8 x i16> %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> %2 = or <8 x i16> %s, %1 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> %4 = or <8 x i16> %2, %3 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> %6 = or <8 x i16> %4, %5 %7 = extractelement <8 x i16> %6, i32 0 %8 = sext i16 %7 to i32 ret i32 %8 } define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_v2i64_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtq %xmm1, %xmm0 ; SSE-NEXT: movmskpd %xmm0, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbq %rax, %rax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v2i64_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovmskpd %xmm0, %eax ; AVX-NEXT: negl %eax ; AVX-NEXT: sbbq %rax, %rax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2i64_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: retq %c = icmp sgt <2 x i64> %a0, %a1 %s = sext <2 x i1> %c to <2 x i64> %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> %2 = or <2 x i64> %s, %1 %3 = extractelement <2 x i64> %2, i32 0 ret i64 %3 } define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) { ; SSE-LABEL: test_v4i64_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtq %xmm3, %xmm1 ; SSE-NEXT: pcmpgtq %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: movmskpd %xmm0, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbq %rax, %rax ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v4i64_sext: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskpd %ymm0, %eax ; AVX1-NEXT: negl %eax ; AVX1-NEXT: sbbq %rax, %rax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v4i64_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vmovmskpd %ymm0, %eax ; AVX2-NEXT: negl %eax ; AVX2-NEXT: sbbq %rax, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_v4i64_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %c = icmp sgt <4 x i64> %a0, %a1 %s = sext <4 x i1> %c to <4 x i64> %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> %2 = or <4 x i64> %s, %1 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> %4 = or <4 x i64> %2, %3 %5 = extractelement <4 x i64> %4, i64 0 ret i64 %5 } define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) { ; SSE-LABEL: test_v4i64_legal_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtq %xmm3, %xmm1 ; SSE-NEXT: pcmpgtq %xmm2, %xmm0 ; SSE-NEXT: packssdw %xmm1, %xmm0 ; SSE-NEXT: movmskps %xmm0, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbq %rax, %rax ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v4i64_legal_sext: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovmskps %xmm0, %eax ; AVX1-NEXT: negl %eax ; AVX1-NEXT: sbbq %rax, %rax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v4i64_legal_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovmskps %xmm0, %eax ; AVX2-NEXT: negl %eax ; AVX2-NEXT: sbbq %rax, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_v4i64_legal_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: cltq ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %c = icmp sgt <4 x i64> %a0, %a1 %s = sext <4 x i1> %c to <4 x i32> %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> %2 = or <4 x i32> %s, %1 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> %4 = or <4 x i32> %2, %3 %5 = extractelement <4 x i32> %4, i64 0 %6 = sext i32 %5 to i64 ret i64 %6 } define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: test_v4i32_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE-NEXT: movmskps %xmm0, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4i32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovmskps %xmm0, %eax ; AVX-NEXT: negl %eax ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4i32_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq %c = icmp sgt <4 x i32> %a0, %a1 %s = sext <4 x i1> %c to <4 x i32> %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> %2 = or <4 x i32> %s, %1 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> %4 = or <4 x i32> %2, %3 %5 = extractelement <4 x i32> %4, i32 0 ret i32 %5 } define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) { ; SSE-LABEL: test_v8i32_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtd %xmm3, %xmm1 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: movmskps %xmm0, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v8i32_sext: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: negl %eax ; AVX1-NEXT: sbbl %eax, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v8i32_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: negl %eax ; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_v8i32_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %c = icmp sgt <8 x i32> %a0, %a1 %s = sext <8 x i1> %c to <8 x i32> %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> %2 = or <8 x i32> %s, %1 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> %4 = or <8 x i32> %2, %3 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> %6 = or <8 x i32> %4, %5 %7 = extractelement <8 x i32> %6, i32 0 ret i32 %7 } define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) { ; SSE-LABEL: test_v8i32_legal_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtd %xmm3, %xmm1 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0 ; SSE-NEXT: packssdw %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v8i32_legal_sext: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax ; AVX1-NEXT: negl %eax ; AVX1-NEXT: sbbl %eax, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v8i32_legal_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpmovmskb %xmm0, %eax ; AVX2-NEXT: negl %eax ; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_v8i32_legal_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ; AVX512-NEXT: vpmovm2w %k0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: cwtl ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %c = icmp sgt <8 x i32> %a0, %a1 %s = sext <8 x i1> %c to <8 x i16> %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> %2 = or <8 x i16> %s, %1 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> %4 = or <8 x i16> %2, %3 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> %6 = or <8 x i16> %4, %5 %7 = extractelement <8 x i16> %6, i32 0 %8 = sext i16 %7 to i32 ret i32 %8 } define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) { ; SSE-LABEL: test_v8i16_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: # kill: def $ax killed $ax killed $eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8i16_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpmovmskb %xmm0, %eax ; AVX-NEXT: negl %eax ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v8i16_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq %c = icmp sgt <8 x i16> %a0, %a1 %s = sext <8 x i1> %c to <8 x i16> %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> %2 = or <8 x i16> %s, %1 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> %4 = or <8 x i16> %2, %3 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> %6 = or <8 x i16> %4, %5 %7 = extractelement <8 x i16> %6, i32 0 ret i16 %7 } define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) { ; SSE-LABEL: test_v16i16_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtw %xmm3, %xmm1 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: # kill: def $ax killed $ax killed $eax ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v16i16_sext: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax ; AVX1-NEXT: negl %eax ; AVX1-NEXT: sbbl %eax, %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v16i16_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: negl %eax ; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_v16i16_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %c = icmp sgt <16 x i16> %a0, %a1 %s = sext <16 x i1> %c to <16 x i16> %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> %2 = or <16 x i16> %s, %1 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> %4 = or <16 x i16> %2, %3 %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> %6 = or <16 x i16> %4, %5 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> %8 = or <16 x i16> %6, %7 %9 = extractelement <16 x i16> %8, i32 0 ret i16 %9 } define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) { ; SSE-LABEL: test_v16i16_legal_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtw %xmm3, %xmm1 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE-NEXT: packsswb %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbw %ax, %ax ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v16i16_legal_sext: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax ; AVX1-NEXT: negl %eax ; AVX1-NEXT: sbbw %ax, %ax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v16i16_legal_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpmovmskb %xmm0, %eax ; AVX2-NEXT: negl %eax ; AVX2-NEXT: sbbw %ax, %ax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_v16i16_legal_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ; AVX512-NEXT: vpmovm2b %k0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax ; AVX512-NEXT: movsbl %al, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %c = icmp sgt <16 x i16> %a0, %a1 %s = sext <16 x i1> %c to <16 x i8> %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> %2 = or <16 x i8> %s, %1 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> %4 = or <16 x i8> %2, %3 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> %6 = or <16 x i8> %4, %5 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> %8 = or <16 x i8> %6, %7 %9 = extractelement <16 x i8> %8, i32 0 %10 = sext i8 %9 to i16 ret i16 %10 } define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) { ; SSE-LABEL: test_v16i8_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtb %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbb %al, %al ; SSE-NEXT: retq ; ; AVX-LABEL: test_v16i8_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpmovmskb %xmm0, %eax ; AVX-NEXT: negl %eax ; AVX-NEXT: sbbb %al, %al ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v16i8_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %c = icmp sgt <16 x i8> %a0, %a1 %s = sext <16 x i1> %c to <16 x i8> %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> %2 = or <16 x i8> %s, %1 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> %4 = or <16 x i8> %2, %3 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> %6 = or <16 x i8> %4, %5 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> %8 = or <16 x i8> %6, %7 %9 = extractelement <16 x i8> %8, i32 0 ret i8 %9 } define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) { ; SSE-LABEL: test_v32i8_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtb %xmm3, %xmm1 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: negl %eax ; SSE-NEXT: sbbb %al, %al ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v32i8_sext: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax ; AVX1-NEXT: negl %eax ; AVX1-NEXT: sbbb %al, %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v32i8_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: negl %eax ; AVX2-NEXT: sbbb %al, %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_v32i8_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %c = icmp sgt <32 x i8> %a0, %a1 %s = sext <32 x i1> %c to <32 x i8> %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> %2 = or <32 x i8> %s, %1 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> %4 = or <32 x i8> %2, %3 %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> %6 = or <32 x i8> %4, %5 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> %8 = or <32 x i8> %6, %7 %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> %10 = or <32 x i8> %8, %9 %11 = extractelement <32 x i8> %10, i32 0 ret i8 %11 } define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) { ; SSE-LABEL: bool_reduction_v2f64: ; SSE: # %bb.0: ; SSE-NEXT: cmpltpd %xmm0, %xmm1 ; SSE-NEXT: movmskpd %xmm1, %eax ; SSE-NEXT: testb %al, %al ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; ; AVX-LABEL: bool_reduction_v2f64: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovmskpd %xmm0, %eax ; AVX-NEXT: testb %al, %al ; AVX-NEXT: setne %al ; AVX-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v2f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: testb $3, %al ; AVX512-NEXT: setne %al ; AVX512-NEXT: retq %a = fcmp ogt <2 x double> %x, %y %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> %c = or <2 x i1> %a, %b %d = extractelement <2 x i1> %c, i32 0 ret i1 %d } define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) { ; SSE-LABEL: bool_reduction_v4f32: ; SSE: # %bb.0: ; SSE-NEXT: cmpeqps %xmm1, %xmm0 ; SSE-NEXT: movmskps %xmm0, %eax ; SSE-NEXT: testb %al, %al ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; ; AVX-LABEL: bool_reduction_v4f32: ; AVX: # %bb.0: ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovmskps %xmm0, %eax ; AVX-NEXT: testb %al, %al ; AVX-NEXT: setne %al ; AVX-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v4f32: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: testb $15, %al ; AVX512-NEXT: setne %al ; AVX512-NEXT: retq %a = fcmp oeq <4 x float> %x, %y %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> %b = or <4 x i1> %s1, %a %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> %c = or <4 x i1> %s2, %b %d = extractelement <4 x i1> %c, i32 0 ret i1 %d } define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) { ; SSE-LABEL: bool_reduction_v4f64: ; SSE: # %bb.0: ; SSE-NEXT: cmplepd %xmm1, %xmm3 ; SSE-NEXT: cmplepd %xmm0, %xmm2 ; SSE-NEXT: packssdw %xmm3, %xmm2 ; SSE-NEXT: movmskps %xmm2, %eax ; SSE-NEXT: testb %al, %al ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; ; AVX-LABEL: bool_reduction_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 ; AVX-NEXT: vmovmskpd %ymm0, %eax ; AVX-NEXT: testb %al, %al ; AVX-NEXT: setne %al ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v4f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: testb $15, %al ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %a = fcmp oge <4 x double> %x, %y %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> %b = or <4 x i1> %s1, %a %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> %c = or <4 x i1> %s2, %b %d = extractelement <4 x i1> %c, i32 0 ret i1 %d } define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) { ; SSE-LABEL: bool_reduction_v8f32: ; SSE: # %bb.0: ; SSE-NEXT: cmpneqps %xmm3, %xmm1 ; SSE-NEXT: cmpneqps %xmm2, %xmm0 ; SSE-NEXT: packssdw %xmm1, %xmm0 ; SSE-NEXT: packsswb %xmm0, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: testb %al, %al ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; ; AVX-LABEL: bool_reduction_v8f32: ; AVX: # %bb.0: ; AVX-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vmovmskps %ymm0, %eax ; AVX-NEXT: testb %al, %al ; AVX-NEXT: setne %al ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v8f32: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: testb %al, %al ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %a = fcmp une <8 x float> %x, %y %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> %b = or <8 x i1> %s1, %a %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> %c = or <8 x i1> %s2, %b %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> %d = or <8 x i1> %s3, %c %e = extractelement <8 x i1> %d, i32 0 ret i1 %e } define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE-LABEL: bool_reduction_v2i64: ; SSE: # %bb.0: ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; SSE-NEXT: pxor %xmm2, %xmm1 ; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: pcmpgtq %xmm1, %xmm0 ; SSE-NEXT: movmskpd %xmm0, %eax ; SSE-NEXT: testb %al, %al ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; ; AVX-LABEL: bool_reduction_v2i64: ; AVX: # %bb.0: ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovmskpd %xmm0, %eax ; AVX-NEXT: testb %al, %al ; AVX-NEXT: setne %al ; AVX-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v2i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: testb $3, %al ; AVX512-NEXT: setne %al ; AVX512-NEXT: retq %a = icmp ugt <2 x i64> %x, %y %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> %c = or <2 x i1> %a, %b %d = extractelement <2 x i1> %c, i32 0 ret i1 %d } define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { ; SSE-LABEL: bool_reduction_v4i32: ; SSE: # %bb.0: ; SSE-NEXT: pcmpeqd %xmm1, %xmm0 ; SSE-NEXT: movmskps %xmm0, %eax ; SSE-NEXT: xorb $15, %al ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; ; AVX-LABEL: bool_reduction_v4i32: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovmskps %xmm0, %eax ; AVX-NEXT: xorb $15, %al ; AVX-NEXT: setne %al ; AVX-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v4i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: testb $15, %al ; AVX512-NEXT: setne %al ; AVX512-NEXT: retq %a = icmp ne <4 x i32> %x, %y %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> %b = or <4 x i1> %s1, %a %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> %c = or <4 x i1> %s2, %b %d = extractelement <4 x i1> %c, i32 0 ret i1 %d } define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) { ; SSE-LABEL: bool_reduction_v8i16: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtw %xmm0, %xmm1 ; SSE-NEXT: packsswb %xmm0, %xmm1 ; SSE-NEXT: pmovmskb %xmm1, %eax ; SSE-NEXT: testb %al, %al ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; ; AVX-LABEL: bool_reduction_v8i16: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vpmovmskb %xmm0, %eax ; AVX-NEXT: testb %al, %al ; AVX-NEXT: setne %al ; AVX-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v8i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: testb %al, %al ; AVX512-NEXT: setne %al ; AVX512-NEXT: retq %a = icmp slt <8 x i16> %x, %y %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> %b = or <8 x i1> %s1, %a %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> %c = or <8 x i1> %s2, %b %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> %d = or <8 x i1> %s3, %c %e = extractelement <8 x i1> %d, i32 0 ret i1 %e } define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) { ; SSE-LABEL: bool_reduction_v16i8: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtb %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: testw %ax, %ax ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; ; AVX-LABEL: bool_reduction_v16i8: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpmovmskb %xmm0, %eax ; AVX-NEXT: testw %ax, %ax ; AVX-NEXT: setne %al ; AVX-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v16i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 ; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: retq %a = icmp sgt <16 x i8> %x, %y %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> %b = or <16 x i1> %s1, %a %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> %c = or <16 x i1> %s2, %b %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> %d = or <16 x i1> %s3, %c %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> %e = or <16 x i1> %s4, %d %f = extractelement <16 x i1> %e, i32 0 ret i1 %f } define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) { ; SSE-LABEL: bool_reduction_v4i64: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtq %xmm1, %xmm3 ; SSE-NEXT: pcmpgtq %xmm0, %xmm2 ; SSE-NEXT: packssdw %xmm3, %xmm2 ; SSE-NEXT: movmskps %xmm2, %eax ; SSE-NEXT: testb %al, %al ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; ; AVX1-LABEL: bool_reduction_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskpd %ymm0, %eax ; AVX1-NEXT: testb %al, %al ; AVX1-NEXT: setne %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: bool_reduction_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vmovmskpd %ymm0, %eax ; AVX2-NEXT: testb %al, %al ; AVX2-NEXT: setne %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v4i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: testb $15, %al ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %a = icmp slt <4 x i64> %x, %y %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> %b = or <4 x i1> %s1, %a %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> %c = or <4 x i1> %s2, %b %d = extractelement <4 x i1> %c, i32 0 ret i1 %d } define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) { ; SSE-LABEL: bool_reduction_v8i32: ; SSE: # %bb.0: ; SSE-NEXT: pminud %xmm1, %xmm3 ; SSE-NEXT: pcmpeqd %xmm1, %xmm3 ; SSE-NEXT: pminud %xmm0, %xmm2 ; SSE-NEXT: pcmpeqd %xmm0, %xmm2 ; SSE-NEXT: packssdw %xmm3, %xmm2 ; SSE-NEXT: packsswb %xmm0, %xmm2 ; SSE-NEXT: pmovmskb %xmm2, %eax ; SSE-NEXT: testb %al, %al ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; ; AVX1-LABEL: bool_reduction_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm1 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: testb %al, %al ; AVX1-NEXT: setne %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: bool_reduction_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: testb %al, %al ; AVX2-NEXT: setne %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v8i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpleud %ymm1, %ymm0, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: testb %al, %al ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %a = icmp ule <8 x i32> %x, %y %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> %b = or <8 x i1> %s1, %a %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> %c = or <8 x i1> %s2, %b %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> %d = or <8 x i1> %s3, %c %e = extractelement <8 x i1> %d, i32 0 ret i1 %e } define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) { ; SSE-LABEL: bool_reduction_v16i16: ; SSE: # %bb.0: ; SSE-NEXT: pcmpeqw %xmm3, %xmm1 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0 ; SSE-NEXT: packsswb %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: testw %ax, %ax ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; ; AVX1-LABEL: bool_reduction_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax ; AVX1-NEXT: testw %ax, %ax ; AVX1-NEXT: setne %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: bool_reduction_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpmovmskb %xmm0, %eax ; AVX2-NEXT: testw %ax, %ax ; AVX2-NEXT: setne %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v16i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %a = icmp eq <16 x i16> %x, %y %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> %b = or <16 x i1> %s1, %a %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> %c = or <16 x i1> %s2, %b %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> %d = or <16 x i1> %s3, %c %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> %e = or <16 x i1> %s4, %d %f = extractelement <16 x i1> %e, i32 0 ret i1 %f } define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) { ; SSE-LABEL: bool_reduction_v32i8: ; SSE: # %bb.0: ; SSE-NEXT: pcmpeqb %xmm3, %xmm1 ; SSE-NEXT: pcmpeqb %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax ; SSE-NEXT: testw %ax, %ax ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; ; AVX1-LABEL: bool_reduction_v32i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax ; AVX1-NEXT: testw %ax, %ax ; AVX1-NEXT: setne %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: bool_reduction_v32i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpmovmskb %ymm0, %eax ; AVX2-NEXT: testl %eax, %eax ; AVX2-NEXT: setne %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v32i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ; AVX512-NEXT: kortestd %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %a = icmp eq <32 x i8> %x, %y %s1 = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> %b = or <32 x i1> %s1, %a %s2 = shufflevector <32 x i1> %b, <32 x i1> undef, <32 x i32> %c = or <32 x i1> %s2, %b %s3 = shufflevector <32 x i1> %c, <32 x i1> undef, <32 x i32> %d = or <32 x i1> %s3, %c %s4 = shufflevector <32 x i1> %d, <32 x i1> undef, <32 x i32> %e = or <32 x i1> %s4, %d %s5 = shufflevector <32 x i1> %e, <32 x i1> undef, <32 x i32> %f = or <32 x i1> %s5, %e %g = extractelement <32 x i1> %f, i32 0 ret i1 %g }