; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 -verify-machineinstrs | FileCheck %s ; After tail duplication, two copies in an early exit BB can be cancelled out. ; rdar://10640363 define i32 @t1(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: t1: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: testl %esi, %esi ; CHECK-NEXT: je LBB0_4 ; CHECK-NEXT: ## %bb.1: ## %while.body.preheader ; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_2: ## %while.body ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl %edx, %ecx ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl %ecx ; CHECK-NEXT: testl %edx, %edx ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: jne LBB0_2 ; CHECK-NEXT: ## %bb.3: ## %while.end ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: LBB0_4: ; CHECK-NEXT: retq entry: %cmp1 = icmp eq i32 %b, 0 br i1 %cmp1, label %while.end, label %while.body while.body: ; preds = %entry, %while.body %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ] %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ] %rem = srem i32 %a.addr.03, %b.addr.02 %cmp = icmp eq i32 %rem, 0 br i1 %cmp, label %while.end, label %while.body while.end: ; preds = %while.body, %entry %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ] ret i32 %a.addr.0.lcssa } ; Two movdqa (from phi-elimination) in the entry BB cancels out. ; rdar://10428165 define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { ; CHECK-LABEL: t2: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] ; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7] ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq entry: %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef > ret <8 x i16> %tmp8 } define i32 @t3(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: t3: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: je LBB2_4 ; CHECK-NEXT: ## %bb.1: ## %while.body.preheader ; CHECK-NEXT: movq %rsi, %rdx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB2_2: ## %while.body ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: cqto ; CHECK-NEXT: idivq %rcx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: movq %rcx, %rax ; CHECK-NEXT: jne LBB2_2 ; CHECK-NEXT: ## %bb.3: ## %while.end ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: LBB2_4: ; CHECK-NEXT: retq entry: %cmp1 = icmp eq i64 %b, 0 br i1 %cmp1, label %while.end, label %while.body while.body: ; preds = %entry, %while.body %a.addr.03 = phi i64 [ %b.addr.02, %while.body ], [ %a, %entry ] %b.addr.02 = phi i64 [ %rem, %while.body ], [ %b, %entry ] %rem = srem i64 %a.addr.03, %b.addr.02 %cmp = icmp eq i64 %rem, 0 br i1 %cmp, label %while.end, label %while.body while.end: ; preds = %while.body, %entry %a.addr.0.lcssa = phi i64 [ %a, %entry ], [ %b.addr.02, %while.body ] %t = trunc i64 %a.addr.0.lcssa to i32 ret i32 %t } ; Check that copy propagation does not kill thing like: ; dst = copy src <-- do not kill that. ; ... = op1 undef dst ; ... = op2 dst <-- this is used here. define <16 x float> @foo(<16 x float> %x) { ; CHECK-LABEL: foo: ; CHECK: ## %bb.0: ## %bb ; CHECK-NEXT: movaps %xmm3, %xmm9 ; CHECK-NEXT: movaps %xmm2, %xmm8 ; CHECK-NEXT: movaps %xmm0, %xmm7 ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: movaps %xmm3, %xmm2 ; CHECK-NEXT: cmpltps %xmm0, %xmm2 ; CHECK-NEXT: movaps %xmm2, %xmm4 ; CHECK-NEXT: orps {{.*}}(%rip), %xmm4 ; CHECK-NEXT: movaps %xmm4, %xmm10 ; CHECK-NEXT: andnps %xmm2, %xmm10 ; CHECK-NEXT: movaps %xmm8, %xmm5 ; CHECK-NEXT: cmpltps %xmm0, %xmm5 ; CHECK-NEXT: movaps {{.*#+}} xmm11 = [9,10,11,12] ; CHECK-NEXT: movaps %xmm5, %xmm2 ; CHECK-NEXT: orps %xmm11, %xmm2 ; CHECK-NEXT: movaps %xmm2, %xmm14 ; CHECK-NEXT: andnps %xmm5, %xmm14 ; CHECK-NEXT: cvttps2dq %xmm1, %xmm12 ; CHECK-NEXT: cmpltps %xmm0, %xmm1 ; CHECK-NEXT: movaps {{.*#+}} xmm13 = [5,6,7,8] ; CHECK-NEXT: movaps %xmm1, %xmm6 ; CHECK-NEXT: orps %xmm13, %xmm6 ; CHECK-NEXT: movaps %xmm6, %xmm5 ; CHECK-NEXT: andnps %xmm1, %xmm5 ; CHECK-NEXT: cvttps2dq %xmm7, %xmm3 ; CHECK-NEXT: cmpltps %xmm0, %xmm7 ; CHECK-NEXT: movaps {{.*#+}} xmm15 = [1,2,3,4] ; CHECK-NEXT: movaps %xmm7, %xmm0 ; CHECK-NEXT: orps %xmm15, %xmm0 ; CHECK-NEXT: movaps %xmm0, %xmm1 ; CHECK-NEXT: andnps %xmm7, %xmm1 ; CHECK-NEXT: andps %xmm15, %xmm0 ; CHECK-NEXT: cvtdq2ps %xmm3, %xmm3 ; CHECK-NEXT: andps %xmm3, %xmm0 ; CHECK-NEXT: movaps {{.*#+}} xmm3 = [1,1,1,1] ; CHECK-NEXT: andps %xmm3, %xmm1 ; CHECK-NEXT: orps %xmm1, %xmm0 ; CHECK-NEXT: andps %xmm13, %xmm6 ; CHECK-NEXT: cvtdq2ps %xmm12, %xmm1 ; CHECK-NEXT: andps %xmm1, %xmm6 ; CHECK-NEXT: andps %xmm3, %xmm5 ; CHECK-NEXT: orps %xmm5, %xmm6 ; CHECK-NEXT: andps %xmm11, %xmm2 ; CHECK-NEXT: cvttps2dq %xmm8, %xmm1 ; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1 ; CHECK-NEXT: andps %xmm1, %xmm2 ; CHECK-NEXT: andps %xmm3, %xmm14 ; CHECK-NEXT: orps %xmm14, %xmm2 ; CHECK-NEXT: andps %xmm3, %xmm10 ; CHECK-NEXT: andps {{.*}}(%rip), %xmm4 ; CHECK-NEXT: cvttps2dq %xmm9, %xmm1 ; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1 ; CHECK-NEXT: andps %xmm1, %xmm4 ; CHECK-NEXT: orps %xmm10, %xmm4 ; CHECK-NEXT: movaps %xmm6, %xmm1 ; CHECK-NEXT: movaps %xmm4, %xmm3 ; CHECK-NEXT: retq bb: %v3 = icmp slt <16 x i32> , zeroinitializer %v14 = zext <16 x i1> %v3 to <16 x i32> %v16 = fcmp olt <16 x float> %x, zeroinitializer %v17 = sext <16 x i1> %v16 to <16 x i32> %v18 = zext <16 x i1> %v16 to <16 x i32> %v19 = xor <16 x i32> %v14, %v18 %v20 = or <16 x i32> %v17, %v21 = fptosi <16 x float> %x to <16 x i32> %v22 = sitofp <16 x i32> %v21 to <16 x float> %v69 = fcmp ogt <16 x float> %v22, zeroinitializer %v75 = and <16 x i1> %v69, %v3 %v77 = bitcast <16 x float> %v22 to <16 x i32> %v79 = sext <16 x i1> %v75 to <16 x i32> %v80 = and <16 x i32> , %v79 %v81 = xor <16 x i32> %v77, %v80 %v82 = and <16 x i32> , %v81 %v83 = xor <16 x i32> %v19, %v82 %v84 = and <16 x i32> %v83, %v20 %v85 = xor <16 x i32> %v19, %v84 %v86 = bitcast <16 x i32> %v85 to <16 x float> ret <16 x float> %v86 }