diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/binop-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/binop-costs.ll index ff1dee41e62bf..8e4c6d470c9be 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/binop-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/binop-costs.ll @@ -17,12 +17,6 @@ define void @udiv_rhs_opt_cost(ptr %dst) #0 { ; CHECK: Cost of 0 for VF vscale x 2: IR %div = udiv i8 %iv.trunc, 3 ; CHECK: Cost of 5 for VF vscale x 4: CLONE ir<%div> = udiv vp<[[VP7]]>, ir<3> ; CHECK: Cost of 0 for VF vscale x 4: IR %div = udiv i8 %iv.trunc, 3 -; CHECK: LV: Found an estimated cost of 5 for VF 1 For instruction: %div = udiv i8 %iv.trunc, 3 -; CHECK: LV: Found an estimated cost of 5 for VF 2 For instruction: %div = udiv i8 %iv.trunc, 3 -; CHECK: LV: Found an estimated cost of 5 for VF 4 For instruction: %div = udiv i8 %iv.trunc, 3 -; CHECK: LV: Found an estimated cost of 5 for VF vscale x 1 For instruction: %div = udiv i8 %iv.trunc, 3 -; CHECK: LV: Found an estimated cost of 5 for VF vscale x 2 For instruction: %div = udiv i8 %iv.trunc, 3 -; CHECK: LV: Found an estimated cost of 5 for VF vscale x 4 For instruction: %div = udiv i8 %iv.trunc, 3 ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll index a44a16455445c..bcd1c28318450 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll @@ -8,6 +8,7 @@ target triple = "aarch64-unknown-linux-gnu" define void @zext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { ; CHECK-COST-LABEL: LV: Checking a loop in 'zext_i8_i16' +; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv = zext i8 %0 to i32 ; CHECK-COST: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i16 ; CHECK-COST: Cost of 1 for VF 4: WIDEN-CAST ir<%conv> = zext ir<%0> to i16 ; CHECK-COST: Cost of 1 for VF 8: WIDEN-CAST ir<%conv> = zext ir<%0> to i16 @@ -16,7 +17,6 @@ define void @zext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-COST: Cost of 1 for VF vscale x 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i16 ; CHECK-COST: Cost of 1 for VF vscale x 4: WIDEN-CAST ir<%conv> = zext ir<%0> to i16 ; CHECK-COST: Cost of 0 for VF vscale x 8: WIDEN-CAST ir<%conv> = zext ir<%0> to i16 -; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv = zext i8 %0 to i32 ; CHECK-LABEL: define void @zext_i8_i16 ; CHECK-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i32 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll index 99139da67bb78..b0738cad80064 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll @@ -84,22 +84,26 @@ define void @goo(ptr nocapture noundef %a, i32 noundef signext %n) { ; CHECK-SCALAR: LV(REG): VF = 1 ; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 1 item ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers -; CHECK-LMUL1: LV(REG): VF = vscale x 2 +; CHECK-LMUL1-LABEL: goo +; CHECK-LMUL1: LV(REG): VF = vscale x 1 ; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers -; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers -; CHECK-LMUL2: LV(REG): VF = vscale x 4 +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 1 registers +; CHECK-LMUL2-LABEL: goo +; CHECK-LMUL2: LV(REG): VF = vscale x 2 ; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers -; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers -; CHECK-LMUL4: LV(REG): VF = vscale x 8 +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers +; CHECK-LMUL4-LABEL: goo +; CHECK-LMUL4: LV(REG): VF = vscale x 4 ; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers -; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers -; CHECK-LMUL8: LV(REG): VF = vscale x 16 +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers +; CHECK-LMUL8-LABEL: goo +; CHECK-LMUL8: LV(REG): VF = vscale x 8 ; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers -; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers entry: %cmp3 = icmp sgt i32 %n, 0 br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction-cost.ll index 10d83f4ad125e..fe39700d1787c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction-cost.ll @@ -3,8 +3,8 @@ ; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ ; RUN: -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck %s +; CHECK: Cost of 0 for VF vscale x 4: WIDEN-REDUCTION-PHI ir<%rdx> = phi ; CHECK: Cost of 2 for VF vscale x 4: WIDEN-INTRINSIC vp<%{{.+}}> = call llvm.vp.merge(ir, ir<%add>, ir<%rdx>, vp<%{{.+}}>) -; CHECK: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %rdx = phi i32 [ %start, %entry ], [ %add, %loop ] define i32 @add(ptr %a, i64 %n, i32 %start) { entry: diff --git a/llvm/test/Transforms/LoopVectorize/WebAssembly/int-mac-reduction-costs.ll b/llvm/test/Transforms/LoopVectorize/WebAssembly/int-mac-reduction-costs.ll index d23c2272d9c0d..9f824d1a963eb 100644 --- a/llvm/test/Transforms/LoopVectorize/WebAssembly/int-mac-reduction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/WebAssembly/int-mac-reduction-costs.ll @@ -11,17 +11,17 @@ define hidden i32 @i32_mac_s8(ptr nocapture noundef readonly %a, ptr nocapture n ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv2 = sext i8 %1 to i32 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul nsw i32 %conv2, %conv -; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction: %0 = load i8, ptr %arrayidx, align 1 -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv = sext i8 %0 to i32 -; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction: %1 = load i8, ptr %arrayidx1, align 1 -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv2 = sext i8 %1 to i32 -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %mul = mul nsw i32 %conv2, %conv - -; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %0 = load i8, ptr %arrayidx, align 1 -; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %conv = sext i8 %0 to i32 -; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %1 = load i8, ptr %arrayidx1, align 1 -; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %conv2 = sext i8 %1 to i32 -; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %mul = mul nsw i32 %conv2, %conv +; CHECK: Cost of 3 for VF 2: WIDEN ir<%0> = load +; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv> = sext ir<%0> to i32 +; CHECK: Cost of 3 for VF 2: WIDEN ir<%1> = load +; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv2> = sext ir<%1> to i32 +; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nsw ir<%conv2>, ir<%conv> + +; CHECK: Cost of 2 for VF 4: WIDEN ir<%0> = load +; CHECK: Cost of 1 for VF 4: WIDEN-CAST ir<%conv> = sext ir<%0> to i32 +; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load +; CHECK: Cost of 1 for VF 4: WIDEN-CAST ir<%conv2> = sext ir<%1> to i32 +; CHECK: Cost of 1 for VF 4: WIDEN ir<%mul> = mul nsw ir<%conv2>, ir<%conv> ; CHECK: LV: Selecting VF: 4. entry: %cmp7.not = icmp eq i32 %N, 0 @@ -55,17 +55,17 @@ define hidden i32 @i32_mac_s16(ptr nocapture noundef readonly %a, ptr nocapture ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv2 = sext i16 %1 to i32 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul nsw i32 %conv2, %conv -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %0 = load i16, ptr %arrayidx, align 2 -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv = sext i16 %0 to i32 -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %1 = load i16, ptr %arrayidx1, align 2 -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv2 = sext i16 %1 to i32 -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %mul = mul nsw i32 %conv2, %conv - -; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %0 = load i16, ptr %arrayidx, align 2 -; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv = sext i16 %0 to i32 -; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %1 = load i16, ptr %arrayidx1, align 2 -; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv2 = sext i16 %1 to i32 -; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %mul = mul nsw i32 %conv2, %conv +; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load +; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv> = sext ir<%0> to i32 +; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load +; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv2> = sext ir<%1> to i32 +; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nsw ir<%conv2>, ir<%conv> + +; CHECK: Cost of 2 for VF 4: WIDEN ir<%0> = load +; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv> = sext ir<%0> to i32 +; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load +; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv2> = sext ir<%1> to i32 +; CHECK: Cost of 1 for VF 4: WIDEN ir<%mul> = mul nsw ir<%conv2>, ir<%conv> ; CHECK: LV: Selecting VF: 4. entry: %cmp7.not = icmp eq i32 %N, 0 @@ -99,11 +99,11 @@ define hidden i64 @i64_mac_s16(ptr nocapture noundef readonly %a, ptr nocapture ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv2 = sext i16 %1 to i64 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul nsw i64 %conv2, %conv -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %0 = load i16, ptr %arrayidx, align 2 -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %conv = sext i16 %0 to i64 -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %1 = load i16, ptr %arrayidx1, align 2 -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %conv2 = sext i16 %1 to i64 -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %mul = mul nsw i64 %conv2, %conv +; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load +; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = sext ir<%0> to i64 +; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load +; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv2> = sext ir<%1> to i64 +; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nsw ir<%conv2>, ir<%conv> ; CHECK: LV: Selecting VF: 2. entry: %cmp7.not = icmp eq i32 %N, 0 @@ -136,10 +136,10 @@ define hidden i64 @i64_mac_s32(ptr nocapture noundef readonly %a, ptr nocapture ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul i32 %1, %0 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %conv = sext i32 %mul to i64 -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %0 = load i32, ptr %arrayidx, align 4 -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %1 = load i32, ptr %arrayidx1, align 4 -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %mul = mul i32 %1, %0 -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %conv = sext i32 %mul to i64 +; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load +; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load +; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul ir<%1>, ir<%0> +; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = sext ir<%mul> to i64 ; CHECK: LV: Selecting VF: 2. entry: %cmp6.not = icmp eq i32 %N, 0 @@ -172,17 +172,17 @@ define hidden i32 @i32_mac_u8(ptr nocapture noundef readonly %a, ptr nocapture n ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv2 = zext i8 %1 to i32 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul nuw nsw i32 %conv2, %conv -; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction: %0 = load i8, ptr %arrayidx, align 1 -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv = zext i8 %0 to i32 -; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction: %1 = load i8, ptr %arrayidx1, align 1 -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv2 = zext i8 %1 to i32 -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %mul = mul nuw nsw i32 %conv2, %conv - -; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %0 = load i8, ptr %arrayidx, align 1 -; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %conv = zext i8 %0 to i32 -; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %1 = load i8, ptr %arrayidx1, align 1 -; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %conv2 = zext i8 %1 to i32 -; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %mul = mul nuw nsw i32 %conv2, %conv +; CHECK: Cost of 3 for VF 2: WIDEN ir<%0> = load +; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i32 +; CHECK: Cost of 3 for VF 2: WIDEN ir<%1> = load +; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv2> = zext ir<%1> to i32 +; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nuw nsw ir<%conv2>, ir<%conv> + +; CHECK: Cost of 2 for VF 4: WIDEN ir<%0> = load +; CHECK: Cost of 1 for VF 4: WIDEN-CAST ir<%conv> = zext ir<%0> to i32 +; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load +; CHECK: Cost of 1 for VF 4: WIDEN-CAST ir<%conv2> = zext ir<%1> to i32 +; CHECK: Cost of 1 for VF 4: WIDEN ir<%mul> = mul nuw nsw ir<%conv2>, ir<%conv> ; CHECK: LV: Selecting VF: 4. entry: %cmp7.not = icmp eq i32 %N, 0 @@ -216,17 +216,17 @@ define hidden i32 @i32_mac_u16(ptr nocapture noundef readonly %a, ptr nocapture ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv2 = zext i16 %1 to i32 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul nuw nsw i32 %conv2, %conv -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %0 = load i16, ptr %arrayidx, align 2 -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv = zext i16 %0 to i32 -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %1 = load i16, ptr %arrayidx1, align 2 -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv2 = zext i16 %1 to i32 -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %mul = mul nuw nsw i32 %conv2, %conv - -; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %0 = load i16, ptr %arrayidx, align 2 -; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv = zext i16 %0 to i32 -; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %1 = load i16, ptr %arrayidx1, align 2 -; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv2 = zext i16 %1 to i32 -; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %mul = mul nuw nsw i32 %conv2, %conv +; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load +; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i32 +; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load +; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv2> = zext ir<%1> to i32 +; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nuw nsw ir<%conv2>, ir<%conv> + +; CHECK: Cost of 2 for VF 4: WIDEN ir<%0> = load +; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv> = zext ir<%0> to i32 +; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load +; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv2> = zext ir<%1> to i32 +; CHECK: Cost of 1 for VF 4: WIDEN ir<%mul> = mul nuw nsw ir<%conv2>, ir<%conv> ; CHECK: LV: Selecting VF: 4. entry: %cmp7.not = icmp eq i32 %N, 0 @@ -260,11 +260,11 @@ define hidden i64 @i64_mac_u16(ptr nocapture noundef readonly %a, ptr nocapture ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv2 = zext i16 %1 to i64 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul nuw nsw i64 %conv2, %conv -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %0 = load i16, ptr %arrayidx, align 2 -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %conv = zext i16 %0 to i64 -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %1 = load i16, ptr %arrayidx1, align 2 -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %conv2 = zext i16 %1 to i64 -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %mul = mul nuw nsw i64 %conv2, %conv +; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load +; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i64 +; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load +; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv2> = zext ir<%1> to i64 +; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nuw nsw ir<%conv2>, ir<%conv> ; CHECK: LV: Selecting VF: 2. entry: %cmp8.not = icmp eq i32 %N, 0 @@ -297,10 +297,10 @@ define hidden i64 @i64_mac_u32(ptr nocapture noundef readonly %a, ptr nocapture ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul i32 %1, %0 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %conv = zext i32 %mul to i64 -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %0 = load i32, ptr %arrayidx, align 4 -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %1 = load i32, ptr %arrayidx1, align 4 -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %mul = mul i32 %1, %0 -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %conv = zext i32 %mul to i64 +; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load +; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load +; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul ir<%1>, ir<%0> +; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%mul> to i64 ; CHECK: LV: Selecting VF: 2. entry: %cmp6.not = icmp eq i32 %N, 0 diff --git a/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll b/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll index 54cbab78b1e29..dd380d8ad2818 100644 --- a/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll @@ -19,12 +19,22 @@ target triple = "wasm32-unknown-wasi" %struct.FourFloats = type { float, float, float, float } ; CHECK-LABEL: two_ints_same_op -; CHECK: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 2 at %10 -; CHECK: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at %10 ; CHECK: LV: Scalar loop costs: 12. -; CHECK: LV: Vector loop of width 2 costs: 13. -; CHECK: LV: Vector loop of width 4 costs: 6. -; CHECK: LV: Selecting VF: 4 +; CHECK: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 2 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK: Cost for VF 2: 27 (Estimated cost per lane: 13.5) +; CHECK: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK: Cost for VF 4: 24 (Estimated cost per lane: 6.0) +; CHECK: LV: Selecting VF: 4. define hidden void @two_ints_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 br i1 %5, label %6, label %7 @@ -54,12 +64,22 @@ define hidden void @two_ints_same_op(ptr noalias nocapture noundef writeonly %0, } ; CHECK-LABEL: two_ints_vary_op -; CHECK: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 2 at %10 -; CHECK: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at %10 ; CHECK: LV: Scalar loop costs: 12. -; CHECK: LV: Vector loop of width 2 costs: 13. -; CHECK: LV: Vector loop of width 4 costs: 6. -; CHECK: LV: Selecting VF: 4 +; CHECK: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 2 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK: Cost for VF 2: 27 (Estimated cost per lane: 13.5) +; CHECK: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK: Cost for VF 4: 24 (Estimated cost per lane: 6.0) +; CHECK: LV: Selecting VF: 4. define hidden void @two_ints_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 br i1 %5, label %6, label %7 @@ -90,15 +110,15 @@ define hidden void @two_ints_vary_op(ptr noalias nocapture noundef writeonly %0, ; CHECK-LABEL: three_ints ; CHECK: LV: Scalar loop costs: 16. -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %10 = load i32, ptr %9 -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %12 = load i32, ptr %11 -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %25, ptr %26 -; CHECK: LV: Vector loop of width 2 costs: 30. -; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: %10 = load i32, ptr %9 -; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: %12 = load i32, ptr %11 -; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: store i32 %25, ptr %26 -; CHECK: LV: Vector loop of width 4 costs: 28. -; CHECK: LV: Selecting VF: 1 +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 2: 61 (Estimated cost per lane: 30.5) +; CHECK: Cost of 12 for VF 4: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 12 for VF 4: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 12 for VF 4: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 4: 115 (Estimated cost per lane: 28.8) +; CHECK: LV: Selecting VF: 1. define hidden void @three_ints(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 br i1 %5, label %6, label %7 @@ -136,19 +156,19 @@ define hidden void @three_ints(ptr noalias nocapture noundef writeonly %0, ptr n ; CHECK-LABEL: three_shorts ; CHECK: LV: Scalar loop costs: 16. -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %10 = load i16 -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %12 = load i16 -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %25 -; CHECK: LV: Vector loop of width 2 costs: 30. -; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: %10 = load i16 -; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: %12 = load i16 -; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: store i16 %25 -; CHECK: LV: Vector loop of width 4 costs: 28. -; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: %10 = load i16 -; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: %12 = load i16 -; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: store i16 %25 -; CHECK: LV: Vector loop of width 8 costs: 27. -; CHECK: LV: Selecting VF: 1 +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 2: 61 (Estimated cost per lane: 30.5) +; CHECK: Cost of 12 for VF 4: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 12 for VF 4: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 12 for VF 4: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 4: 115 (Estimated cost per lane: 28.8) +; CHECK: Cost of 24 for VF 8: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 24 for VF 8: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 24 for VF 8: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 8: 223 (Estimated cost per lane: 27.9) +; CHECK: LV: Selecting VF: 1. define hidden void @three_shorts(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 br i1 %5, label %6, label %7 @@ -185,24 +205,41 @@ define hidden void @three_shorts(ptr noalias nocapture noundef writeonly %0, ptr } ; CHECK-LABEL: four_shorts_same_op -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 20. -; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %10 = load i16 -; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %12 = load i16 -; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: store i16 -; CHECK: LV: Vector loop of width 2 costs: 31. -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i16 -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %12 = load i16 -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: store i16 -; CHECK: LV: Vector loop of width 4 costs: 15. -; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: %10 = load i16 -; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: %12 = load i16 -; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: store i16 -; CHECK: LV: Vector loop of width 8 costs: 26 -; CHECK: LV: Selecting VF: 4 +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 2: 62 (Estimated cost per lane: 31.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 4: 62 (Estimated cost per lane: 15.5) +; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 8: 212 (Estimated cost per lane: 26.5) +; CHECK: LV: Selecting VF: 4. define hidden void @four_shorts_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 br i1 %5, label %6, label %7 @@ -246,23 +283,41 @@ define hidden void @four_shorts_same_op(ptr noalias nocapture noundef writeonly } ; CHECK-LABEL: four_shorts_split_op -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 20. -; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %10 = load i16 -; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %12 = load i16 -; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: store i16 -; CHECK: LV: Vector loop of width 2 costs: 31. -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i16 -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %12 = load i16 -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: store i16 %31 -; CHECK: LV: Vector loop of width 4 costs: 15. -; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: %10 = load i16 -; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: %12 = load i16 -; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: store i16 %31 -; CHECK: LV: Vector loop of width 8 costs: 26. -; CHECK: LV: Selecting VF: 4 +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 2: 62 (Estimated cost per lane: 31.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 4: 62 (Estimated cost per lane: 15.5) +; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 8: 212 (Estimated cost per lane: 26.5) +; CHECK: LV: Selecting VF: 4. define hidden void @four_shorts_split_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 br i1 %5, label %6, label %7 @@ -306,23 +361,41 @@ define hidden void @four_shorts_split_op(ptr noalias nocapture noundef writeonly } ; CHECK-LABEL: four_shorts_interleave_op -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 20. -; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %10 = load i16 -; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %12 = load i16 -; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: store i16 -; CHECK: LV: Vector loop of width 2 costs: 31. -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i16 -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %12 = load i16 -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: store i16 -; CHECK: LV: Vector loop of width 4 costs: 15. -; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: %10 = load i16 -; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: %12 = load i16 -; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: store i16 -; CHECK: LV: Vector loop of width 8 costs: 26. -; CHECK: LV: Selecting VF: 4 +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 2: 62 (Estimated cost per lane: 31.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 4: 62 (Estimated cost per lane: 15.5) +; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 8: 212 (Estimated cost per lane: 26.5) +; CHECK: LV: Selecting VF: 4. define hidden void @four_shorts_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 br i1 %5, label %6, label %7 @@ -366,17 +439,51 @@ define hidden void @four_shorts_interleave_op(ptr noalias nocapture noundef writ } ; CHECK-LABEL: five_shorts -; CHECK: Cost of 42 for VF 4: INTERLEAVE-GROUP with factor 5 -; CHECK: Cost of 84 for VF 8: INTERLEAVE-GROUP with factor 5 -; CHECK: LV: Found an estimated cost of 84 for VF 8 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 84 for VF 8 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 84 for VF 8 For instruction: store i8 %37 -; CHECK: LV: Vector loop of width 8 costs: 32 -; CHECK: LV: Found an estimated cost of 168 for VF 16 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 168 for VF 16 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 168 for VF 16 For instruction: store i8 %37 -; CHECK: LV: Vector loop of width 16 costs: 32 -; CHECK: LV: Selecting VF: 1 +; CHECK: LV: Scalar loop costs: 24. +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 2: 99 (Estimated cost per lane: 49.5) +; CHECK: Cost of 42 for VF 4: INTERLEAVE-GROUP with factor 5 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK-NEXT: ir<%34> = load from index 4 +; CHECK: Cost of 42 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK-NEXT: store ir<%37> to index 4 +; CHECK: Cost for VF 4: 135 (Estimated cost per lane: 33.8) +; CHECK: Cost of 84 for VF 8: INTERLEAVE-GROUP with factor 5 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK-NEXT: ir<%34> = load from index 4 +; CHECK: Cost of 84 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK-NEXT: store ir<%37> to index 4 +; CHECK: Cost for VF 8: 261 (Estimated cost per lane: 32.6) +; CHECK: Cost of 168 for VF 16: INTERLEAVE-GROUP with factor 5 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK-NEXT: ir<%34> = load from index 4 +; CHECK: Cost of 168 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK-NEXT: store ir<%37> to index 4 +; CHECK: Cost for VF 16: 513 (Estimated cost per lane: 32.1) +; CHECK: LV: Selecting VF: 1. define hidden void @five_shorts(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 br i1 %5, label %6, label %7 @@ -427,26 +534,32 @@ define hidden void @five_shorts(ptr noalias nocapture noundef writeonly %0, ptr } ; CHECK-LABEL: two_bytes_same_op -; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 12. -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %13 = mul i8 -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 %13 -; CHECK: LV: Vector loop of width 2 costs: 26. -; CHECK: LV: Found an estimated cost of 11 for VF 4 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: %13 = mul i8 -; CHECK: LV: Found an estimated cost of 11 for VF 4 For instruction: store i8 -; CHECK: LV: Vector loop of width 4 costs: 15. -; CHECK: LV: Found an estimated cost of 7 for VF 8 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %13 = mul i8 -; CHECK: LV: Found an estimated cost of 7 for VF 8 For instruction: store i8 -; CHECK: LV: Vector loop of width 8 costs: 4. -; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 4 for VF 16 For instruction: %13 = mul i8 -; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: store i8 -; CHECK: LV: Vector loop of width 16 costs: 1. +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 2: 52 (Estimated cost per lane: 26.0) +; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK: Cost for VF 4: 61 (Estimated cost per lane: 15.2) +; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK: Cost for VF 8: 33 (Estimated cost per lane: 4.1) +; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK: Cost for VF 16: 30 (Estimated cost per lane: 1.9) ; CHECK: LV: Selecting VF: 16. define hidden void @two_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -477,26 +590,32 @@ define hidden void @two_bytes_same_op(ptr noalias nocapture noundef writeonly %0 } ; CHECK-LABEL: two_bytes_vary_op -; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 12. -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %13 = mul i8 -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 %13 -; CHECK: LV: Vector loop of width 2 costs: 23. -; CHECK: LV: Found an estimated cost of 11 for VF 4 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: %13 = mul i8 -; CHECK: LV: Found an estimated cost of 11 for VF 4 For instruction: store i8 -; CHECK: LV: Vector loop of width 4 costs: 12. -; CHECK: LV: Found an estimated cost of 7 for VF 8 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %13 = mul i8 -; CHECK: LV: Found an estimated cost of 7 for VF 8 For instruction: store i8 -; CHECK: LV: Vector loop of width 8 costs: 3. -; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 4 for VF 16 For instruction: %13 = mul i8 -; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: store i8 %19 -; CHECK: LV: Vector loop of width 16 costs: 1. +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 2: 47 (Estimated cost per lane: 23.5) +; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK: Cost for VF 4: 50 (Estimated cost per lane: 12.5) +; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK: Cost for VF 8: 30 (Estimated cost per lane: 3.8) +; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK: Cost for VF 16: 27 (Estimated cost per lane: 1.7) ; CHECK: LV: Selecting VF: 16. define hidden void @two_bytes_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -528,16 +647,22 @@ define hidden void @two_bytes_vary_op(ptr noalias nocapture noundef writeonly %0 ; CHECK-LABEL: three_bytes_same_op ; CHECK: LV: Scalar loop costs: 16. -; CHECK: LV: Vector loop of width 2 costs: 30. -; CHECK: LV: Vector loop of width 4 costs: 28. -; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: %10 = load i8, ptr %9 -; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: %12 = load i8, ptr %11 -; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: store i8 %25 -; CHECK: LV: Vector loop of width 8 costs: 27. -; CHECK: LV: Found an estimated cost of 48 for VF 16 For instruction: %10 = load i8, ptr %9 -; CHECK: LV: Found an estimated cost of 48 for VF 16 For instruction: %12 = load i8, ptr %11 -; CHECK: LV: Found an estimated cost of 48 for VF 16 For instruction: store i8 %25 -; CHECK: LV: Vector loop of width 16 costs: 27. +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 2: 61 (Estimated cost per lane: 30.5) +; CHECK: Cost of 12 for VF 4: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 12 for VF 4: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 12 for VF 4: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 4: 115 (Estimated cost per lane: 28.8) +; CHECK: Cost of 24 for VF 8: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 24 for VF 8: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 24 for VF 8: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 8: 223 (Estimated cost per lane: 27.9) +; CHECK: Cost of 48 for VF 16: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 48 for VF 16: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 48 for VF 16: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 16: 439 (Estimated cost per lane: 27.4) ; CHECK: LV: Selecting VF: 1. define hidden void @three_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -576,16 +701,22 @@ define hidden void @three_bytes_same_op(ptr noalias nocapture noundef writeonly ; CHECK-LABEL: three_bytes_interleave_op ; CHECK: LV: Scalar loop costs: 16. -; CHECK: LV: Vector loop of width 2 costs: 30. -; CHECK: LV: Vector loop of width 4 costs: 28. -; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: %10 = load i8, ptr %9 -; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: %12 = load i8, ptr %11 -; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: store i8 %25 -; CHECK: LV: Vector loop of width 8 costs: 27. -; CHECK: LV: Found an estimated cost of 48 for VF 16 For instruction: %10 = load i8, ptr %9 -; CHECK: LV: Found an estimated cost of 48 for VF 16 For instruction: %12 = load i8, ptr %11 -; CHECK: LV: Found an estimated cost of 48 for VF 16 For instruction: store i8 %25 -; CHECK: LV: Vector loop of width 16 costs: 27. +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 2: 61 (Estimated cost per lane: 30.5) +; CHECK: Cost of 12 for VF 4: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 12 for VF 4: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 12 for VF 4: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 4: 115 (Estimated cost per lane: 28.8) +; CHECK: Cost of 24 for VF 8: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 24 for VF 8: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 24 for VF 8: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 8: 223 (Estimated cost per lane: 27.9) +; CHECK: Cost of 48 for VF 16: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 48 for VF 16: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 48 for VF 16: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 16: 439 (Estimated cost per lane: 27.4) ; CHECK: LV: Selecting VF: 1. define hidden void @three_bytes_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -623,23 +754,44 @@ define hidden void @three_bytes_interleave_op(ptr noalias nocapture noundef writ } ; CHECK-LABEL: four_bytes_same_op -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 20. -; CHECK: LV: Vector loop of width 2 costs: 40. -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: store i8 -; CHECK: LV: Vector loop of width 4 costs: 15. -; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: store i8 -; CHECK: LV: Vector loop of width 8 costs: 10. -; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: store i8 -; CHECK: LV: Vector loop of width 16 costs: 25. +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 2: 80 (Estimated cost per lane: 40.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 4: 62 (Estimated cost per lane: 15.5) +; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 8: 86 (Estimated cost per lane: 10.8) +; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 16: 404 (Estimated cost per lane: 25.2) ; CHECK: LV: Selecting VF: 8. define hidden void @four_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -684,27 +836,45 @@ define hidden void @four_bytes_same_op(ptr noalias nocapture noundef writeonly % } ; CHECK-LABEL: four_bytes_split_op -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 20. -; CHECK: LV: Vector loop of width 2 costs: 45. -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: %13 = mul i8 -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: store i8 -; CHECK: LV: Vector loop of width 4 costs: 21. -; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %13 = mul i8 -; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: store i8 -; CHECK: LV: Vector loop of width 8 costs: 11. -; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 4 for VF 16 For instruction: %13 = mul i8 -; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: store i8 -; CHECK: LV: Vector loop of width 16 costs: 25 -; CHECK: LV: Selecting VF: 8 +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 2: 90 (Estimated cost per lane: 45.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 4: 84 (Estimated cost per lane: 21.0) +; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 8: 92 (Estimated cost per lane: 11.5) +; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 16: 410 (Estimated cost per lane: 25.6) +; CHECK: LV: Selecting VF: 8. define hidden void @four_bytes_split_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 br i1 %5, label %6, label %7 @@ -749,24 +919,45 @@ define hidden void @four_bytes_split_op(ptr noalias nocapture noundef writeonly ; CHECK-LABEL: four_bytes_interleave_op -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 20. -; CHECK: LV: Vector loop of width 2 costs: 40 -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: store i8 -; CHECK: LV: Vector loop of width 4 costs: 15 -; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: store i8 -; CHECK: LV: Vector loop of width 8 costs: 10 -; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: store i8 -; CHECK: LV: Vector loop of width 16 costs: 25 -; CHECK: LV: Selecting VF: 8 +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11> +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14> +; CHECK: Cost for VF 2: 80 (Estimated cost per lane: 40.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 4: 62 (Estimated cost per lane: 15.5) +; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 8: 86 (Estimated cost per lane: 10.8) +; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK: Cost for VF 16: 404 (Estimated cost per lane: 25.2) +; CHECK: LV: Selecting VF: 8. define hidden void @four_bytes_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 br i1 %5, label %6, label %7 @@ -811,22 +1002,84 @@ define hidden void @four_bytes_interleave_op(ptr noalias nocapture noundef write ; CHECK-LABEL: eight_bytes_same_op -; CHECK: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 -; CHECK: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 -; CHECK: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 -; CHECK: LV: Found an estimated cost of 66 for VF 4 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 66 for VF 4 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %55 -; CHECK: LV: Vector loop of width 4 costs: 74 -; CHECK: LV: Found an estimated cost of 132 for VF 8 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 132 for VF 8 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %55 -; CHECK: LV: Vector loop of width 8 costs: 54 -; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %55 -; CHECK: LV: Vector loop of width 16 costs: 51 -; CHECK: LV: Selecting VF: 1 +; CHECK: LV: Scalar loop costs: 36. +; CHECK: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK-NEXT: ir<%34> = load from index 4 +; CHECK-NEXT: ir<%40> = load from index 5 +; CHECK-NEXT: ir<%46> = load from index 6 +; CHECK-NEXT: ir<%52> = load from index 7 +; CHECK: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK-NEXT: store ir<%37> to index 4 +; CHECK-NEXT: store ir<%43> to index 5 +; CHECK-NEXT: store ir<%49> to index 6 +; CHECK-NEXT: store ir<%55> to index 7 +; CHECK: Cost for VF 2: 154 (Estimated cost per lane: 77.0) +; CHECK: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK-NEXT: ir<%34> = load from index 4 +; CHECK-NEXT: ir<%40> = load from index 5 +; CHECK-NEXT: ir<%46> = load from index 6 +; CHECK-NEXT: ir<%52> = load from index 7 +; CHECK: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK-NEXT: store ir<%37> to index 4 +; CHECK-NEXT: store ir<%43> to index 5 +; CHECK-NEXT: store ir<%49> to index 6 +; CHECK-NEXT: store ir<%55> to index 7 +; CHECK: Cost for VF 4: 298 (Estimated cost per lane: 74.5) +; CHECK: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK-NEXT: ir<%34> = load from index 4 +; CHECK-NEXT: ir<%40> = load from index 5 +; CHECK-NEXT: ir<%46> = load from index 6 +; CHECK-NEXT: ir<%52> = load from index 7 +; CHECK: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK-NEXT: store ir<%37> to index 4 +; CHECK-NEXT: store ir<%43> to index 5 +; CHECK-NEXT: store ir<%49> to index 6 +; CHECK-NEXT: store ir<%55> to index 7 +; CHECK: Cost for VF 8: 432 (Estimated cost per lane: 54.0) +; CHECK: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK-NEXT: ir<%34> = load from index 4 +; CHECK-NEXT: ir<%40> = load from index 5 +; CHECK-NEXT: ir<%46> = load from index 6 +; CHECK-NEXT: ir<%52> = load from index 7 +; CHECK: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK-NEXT: store ir<%37> to index 4 +; CHECK-NEXT: store ir<%43> to index 5 +; CHECK-NEXT: store ir<%49> to index 6 +; CHECK-NEXT: store ir<%55> to index 7 +; CHECK: Cost for VF 16: 828 (Estimated cost per lane: 51.8) +; CHECK: LV: Selecting VF: 1. define hidden void @eight_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 br i1 %5, label %6, label %7 @@ -898,11 +1151,84 @@ define hidden void @eight_bytes_same_op(ptr noalias nocapture noundef writeonly } ; CHECK-LABEL: eight_bytes_split_op -; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %55 -; CHECK: LV: Vector loop of width 16 costs: 50 -; CHECK: LV: Selecting VF: 1 +; CHECK: LV: Scalar loop costs: 36. +; CHECK: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK-NEXT: ir<%34> = load from index 4 +; CHECK-NEXT: ir<%40> = load from index 5 +; CHECK-NEXT: ir<%46> = load from index 6 +; CHECK-NEXT: ir<%52> = load from index 7 +; CHECK: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK-NEXT: store ir<%37> to index 4 +; CHECK-NEXT: store ir<%43> to index 5 +; CHECK-NEXT: store ir<%49> to index 6 +; CHECK-NEXT: store ir<%55> to index 7 +; CHECK: Cost for VF 2: 114 (Estimated cost per lane: 57.0) +; CHECK: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK-NEXT: ir<%34> = load from index 4 +; CHECK-NEXT: ir<%40> = load from index 5 +; CHECK-NEXT: ir<%46> = load from index 6 +; CHECK-NEXT: ir<%52> = load from index 7 +; CHECK: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK-NEXT: store ir<%37> to index 4 +; CHECK-NEXT: store ir<%43> to index 5 +; CHECK-NEXT: store ir<%49> to index 6 +; CHECK-NEXT: store ir<%55> to index 7 +; CHECK: Cost for VF 4: 210 (Estimated cost per lane: 52.5) +; CHECK: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK-NEXT: ir<%34> = load from index 4 +; CHECK-NEXT: ir<%40> = load from index 5 +; CHECK-NEXT: ir<%46> = load from index 6 +; CHECK-NEXT: ir<%52> = load from index 7 +; CHECK: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK-NEXT: store ir<%37> to index 4 +; CHECK-NEXT: store ir<%43> to index 5 +; CHECK-NEXT: store ir<%49> to index 6 +; CHECK-NEXT: store ir<%55> to index 7 +; CHECK: Cost for VF 8: 408 (Estimated cost per lane: 51.0) +; CHECK: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK-NEXT: ir<%34> = load from index 4 +; CHECK-NEXT: ir<%40> = load from index 5 +; CHECK-NEXT: ir<%46> = load from index 6 +; CHECK-NEXT: ir<%52> = load from index 7 +; CHECK: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK-NEXT: store ir<%37> to index 4 +; CHECK-NEXT: store ir<%43> to index 5 +; CHECK-NEXT: store ir<%49> to index 6 +; CHECK-NEXT: store ir<%55> to index 7 +; CHECK: Cost for VF 16: 804 (Estimated cost per lane: 50.2) +; CHECK: LV: Selecting VF: 1. define hidden void @eight_bytes_split_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 br i1 %5, label %6, label %7 @@ -974,11 +1300,84 @@ define hidden void @eight_bytes_split_op(ptr noalias nocapture noundef writeonly } ; CHECK-LABEL: eight_bytes_interleave_op -; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: %12 = load i8 -; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %55 -; CHECK: LV: Vector loop of width 16 costs: 50 -; CHECK: LV: Selecting VF: 1 +; CHECK: LV: Scalar loop costs: 36. +; CHECK: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK-NEXT: ir<%34> = load from index 4 +; CHECK-NEXT: ir<%40> = load from index 5 +; CHECK-NEXT: ir<%46> = load from index 6 +; CHECK-NEXT: ir<%52> = load from index 7 +; CHECK: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK-NEXT: store ir<%37> to index 4 +; CHECK-NEXT: store ir<%43> to index 5 +; CHECK-NEXT: store ir<%49> to index 6 +; CHECK-NEXT: store ir<%55> to index 7 +; CHECK: Cost for VF 2: 114 (Estimated cost per lane: 57.0) +; CHECK: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK-NEXT: ir<%34> = load from index 4 +; CHECK-NEXT: ir<%40> = load from index 5 +; CHECK-NEXT: ir<%46> = load from index 6 +; CHECK-NEXT: ir<%52> = load from index 7 +; CHECK: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK-NEXT: store ir<%37> to index 4 +; CHECK-NEXT: store ir<%43> to index 5 +; CHECK-NEXT: store ir<%49> to index 6 +; CHECK-NEXT: store ir<%55> to index 7 +; CHECK: Cost for VF 4: 210 (Estimated cost per lane: 52.5) +; CHECK: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK-NEXT: ir<%34> = load from index 4 +; CHECK-NEXT: ir<%40> = load from index 5 +; CHECK-NEXT: ir<%46> = load from index 6 +; CHECK-NEXT: ir<%52> = load from index 7 +; CHECK: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK-NEXT: store ir<%37> to index 4 +; CHECK-NEXT: store ir<%43> to index 5 +; CHECK-NEXT: store ir<%49> to index 6 +; CHECK-NEXT: store ir<%55> to index 7 +; CHECK: Cost for VF 8: 408 (Estimated cost per lane: 51.0) +; CHECK: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%16> = load from index 1 +; CHECK-NEXT: ir<%22> = load from index 2 +; CHECK-NEXT: ir<%28> = load from index 3 +; CHECK-NEXT: ir<%34> = load from index 4 +; CHECK-NEXT: ir<%40> = load from index 5 +; CHECK-NEXT: ir<%46> = load from index 6 +; CHECK-NEXT: ir<%52> = load from index 7 +; CHECK: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%14> +; CHECK-NEXT: store ir<%13> to index 0 +; CHECK-NEXT: store ir<%19> to index 1 +; CHECK-NEXT: store ir<%25> to index 2 +; CHECK-NEXT: store ir<%31> to index 3 +; CHECK-NEXT: store ir<%37> to index 4 +; CHECK-NEXT: store ir<%43> to index 5 +; CHECK-NEXT: store ir<%49> to index 6 +; CHECK-NEXT: store ir<%55> to index 7 +; CHECK: Cost for VF 16: 804 (Estimated cost per lane: 50.2) +; CHECK: LV: Selecting VF: 1. define hidden void @eight_bytes_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 br i1 %5, label %6, label %7 @@ -1051,14 +1450,32 @@ define hidden void @eight_bytes_interleave_op(ptr noalias nocapture noundef writ ; CHECK-LABEL: four_bytes_into_four_ints_same_op ; CHECK: LV: Scalar loop costs: 28. -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 14 for VF 2 For instruction: %17 = load i32 -; CHECK: LV: Found an estimated cost of 14 for VF 2 For instruction: store i32 -; CHECK: LV: Vector loop of width 2 costs: 44. -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 24 for VF 4 For instruction: %17 = load i32 -; CHECK: LV: Found an estimated cost of 24 for VF 4 For instruction: store i32 -; CHECK: LV: Vector loop of width 4 costs: 26. +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%13> = load ir<%12> +; CHECK: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 4 at %17, ir<%16> +; CHECK-NEXT: ir<%17> = load from index 0 +; CHECK-NEXT: ir<%27> = load from index 1 +; CHECK-NEXT: ir<%37> = load from index 2 +; CHECK-NEXT: ir<%47> = load from index 3 +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%20> = load ir<%19> +; CHECK: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%16> +; CHECK-NEXT: store ir<%18> to index 0 +; CHECK-NEXT: store ir<%28> to index 1 +; CHECK-NEXT: store ir<%38> to index 2 +; CHECK-NEXT: store ir<%48> to index 3 +; CHECK: Cost for VF 2: 88 (Estimated cost per lane: 44.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%20> = load from index 1 +; CHECK-NEXT: ir<%30> = load from index 2 +; CHECK-NEXT: ir<%40> = load from index 3 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %13 +; CHECK: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%16> +; CHECK-NEXT: store ir<%18> to index 0 +; CHECK-NEXT: store ir<%28> to index 1 +; CHECK-NEXT: store ir<%38> to index 2 +; CHECK-NEXT: store ir<%48> to index 3 +; CHECK: Cost for VF 4: 104 (Estimated cost per lane: 26.0) ; CHECK: LV: Selecting VF: 4. define hidden void @four_bytes_into_four_ints_same_op(ptr noalias nocapture noundef %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -1119,18 +1536,28 @@ define hidden void @four_bytes_into_four_ints_same_op(ptr noalias nocapture noun } ; CHECK-LABEL: four_bytes_into_four_ints_vary_op -; CHECK: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 21. -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %11 = zext i8 -; CHECK: LV: Found an estimated cost of 14 for VF 2 For instruction: store i32 -; CHECK: LV: Vector loop of width 2 costs: 35. -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %11 = zext i8 -; CHECK: LV: Found an estimated cost of 24 for VF 4 For instruction: store i32 -; CHECK: LV: Vector loop of width 4 costs: 20. +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9> +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%13> = load ir<%12> +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%18> = load ir<%17> +; CHECK: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%16> +; CHECK-NEXT: store ir<%15> to index 0 +; CHECK-NEXT: store ir<%23> to index 1 +; CHECK-NEXT: store ir<%31> to index 2 +; CHECK-NEXT: store ir<%38> to index 3 +; CHECK: Cost for VF 2: 71 (Estimated cost per lane: 35.5) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, ir<%9> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%18> = load from index 1 +; CHECK-NEXT: ir<%26> = load from index 2 +; CHECK-NEXT: ir<%34> = load from index 3 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %13 +; CHECK: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%16> +; CHECK-NEXT: store ir<%15> to index 0 +; CHECK-NEXT: store ir<%23> to index 1 +; CHECK-NEXT: store ir<%31> to index 2 +; CHECK-NEXT: store ir<%38> to index 3 +; CHECK: Cost for VF 4: 80 (Estimated cost per lane: 20.0) ; CHECK: LV: Selecting VF: 4. define hidden void @four_bytes_into_four_ints_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -1183,10 +1610,27 @@ define hidden void @four_bytes_into_four_ints_vary_op(ptr noalias nocapture noun ; CHECK-LABEL: scale_uv_row_down2 ; CHECK: LV: Scalar loop costs: 10. -; CHECK: LV: Vector loop of width 2 costs: 13. -; CHECK: LV: Vector loop of width 4 costs: 8. -; CHECK: LV: Vector loop of width 8 costs: 4. -; CHECK: LV: Vector loop of width 16 costs: 5. +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %11, ir<%10> +; CHECK-NEXT: ir<%11> = load from index 0 +; CHECK-NEXT: ir<%13> = load from index 1 +; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at , vp<%next.gep>.1 +; CHECK-NEXT: store ir<%11> to index 0 +; CHECK-NEXT: store ir<%13> to index 1 +; CHECK: Cost for VF 4: 35 (Estimated cost per lane: 8.8) +; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at %11, ir<%10> +; CHECK-NEXT: ir<%11> = load from index 0 +; CHECK-NEXT: ir<%13> = load from index 1 +; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at , vp<%next.gep>.1 +; CHECK-NEXT: store ir<%11> to index 0 +; CHECK-NEXT: store ir<%13> to index 1 +; CHECK: Cost for VF 8: 39 (Estimated cost per lane: 4.9) +; CHECK: Cost of 68 for VF 16: INTERLEAVE-GROUP with factor 4 at %11, ir<%10> +; CHECK-NEXT: ir<%11> = load from index 0 +; CHECK-NEXT: ir<%13> = load from index 1 +; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at , vp<%next.gep>.1 +; CHECK-NEXT: store ir<%11> to index 0 +; CHECK-NEXT: store ir<%13> to index 1 +; CHECK: Cost for VF 16: 80 (Estimated cost per lane: 5.0) ; CHECK: LV: Selecting VF: 8. define hidden void @scale_uv_row_down2(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) { %5 = icmp sgt i32 %3, 0 @@ -1215,19 +1659,41 @@ define hidden void @scale_uv_row_down2(ptr nocapture noundef readonly %0, i32 no ; CHECK-LABEL: scale_uv_row_down2_box ; CHECK: LV: Scalar loop costs: 26. -; CHECK: LV: Vector loop of width 2 costs: 39. -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %14 = load i8 -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %20 = load i8 -; CHECK: LV: Found an estimated cost of 11 for VF 4 For instruction: store i8 %48 -; CHECK: LV: Vector loop of width 4 costs: 18. -; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %14 = load i8 -; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %20 = load i8 -; CHECK: LV: Found an estimated cost of 7 for VF 8 For instruction: store i8 %48 -; CHECK: LV: Vector loop of width 8 costs: 11. -; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %14 = load i8 -; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %20 = load i8 -; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: store i8 %48 -; CHECK: LV: Vector loop of width 16 costs: 20. +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%14> = load vp<%next.gep> (!alias.scope {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%17> = load ir<%16> (!alias.scope {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%20> = load ir<%19> (!alias.scope {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%48>, ir<%49> +; CHECK: Cost for VF 2: 78 (Estimated cost per lane: 39.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %14, vp<%next.gep> +; CHECK-NEXT: ir<%14> = load from index 0 +; CHECK-NEXT: ir<%32> = load from index 1 +; CHECK-NEXT: ir<%17> = load from index 2 +; CHECK-NEXT: ir<%35> = load from index 3 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %20 +; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at , vp<%next.gep>.1 +; CHECK-NEXT: store ir<%30> to index 0 +; CHECK-NEXT: store ir<%48> to index 1 +; CHECK: Cost for VF 4: 73 (Estimated cost per lane: 18.2) +; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at %14, vp<%next.gep> +; CHECK-NEXT: ir<%14> = load from index 0 +; CHECK-NEXT: ir<%32> = load from index 1 +; CHECK-NEXT: ir<%17> = load from index 2 +; CHECK-NEXT: ir<%35> = load from index 3 +; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at %20 +; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at , vp<%next.gep>.1 +; CHECK-NEXT: store ir<%30> to index 0 +; CHECK-NEXT: store ir<%48> to index 1 +; CHECK: Cost for VF 8: 89 (Estimated cost per lane: 11.1) +; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at %14, vp<%next.gep> +; CHECK-NEXT: ir<%14> = load from index 0 +; CHECK-NEXT: ir<%32> = load from index 1 +; CHECK-NEXT: ir<%17> = load from index 2 +; CHECK-NEXT: ir<%35> = load from index 3 +; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at %20 +; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at , vp<%next.gep>.1 +; CHECK-NEXT: store ir<%30> to index 0 +; CHECK-NEXT: store ir<%48> to index 1 +; CHECK: Cost for VF 16: 322 (Estimated cost per lane: 20.1) ; CHECK: LV: Selecting VF: 8. define hidden void @scale_uv_row_down2_box(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) { %5 = icmp sgt i32 %3, 0 @@ -1293,18 +1759,38 @@ define hidden void @scale_uv_row_down2_box(ptr nocapture noundef readonly %0, i3 ; CHECK-LABEL: scale_uv_row_down2_linear ; CHECK: LV: Scalar loop costs: 18. -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %13 = load i8 -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 -; CHECK: LV: Vector loop of width 2 costs: 25. -; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 11 for VF 4 For instruction: store i8 -; CHECK: LV: Vector loop of width 4 costs: 11. -; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %10 = load i8 -; CHECK: LV: Found an estimated cost of 7 for VF 8 For instruction: store i8 -; CHECK: LV: Vector loop of width 8 costs: 6. -; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %10 = load i8 -; CHECK: LV: Vector loop of width 16 costs: 10. +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load vp<%next.gep> (!alias.scope {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%13> = load ir<%12> (!alias.scope {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%18>, vp<%next.gep>.1 (!alias.scope {{.*}}, !noalias {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%28>, ir<%29> +; CHECK: Cost for VF 2: 50 (Estimated cost per lane: 25.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, vp<%next.gep> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%20> = load from index 1 +; CHECK-NEXT: ir<%13> = load from index 2 +; CHECK-NEXT: ir<%23> = load from index 3 +; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at , vp<%next.gep>.1 +; CHECK-NEXT: store ir<%18> to index 0 +; CHECK-NEXT: store ir<%28> to index 1 +; CHECK: Cost for VF 4: 47 (Estimated cost per lane: 11.8) +; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at %10, vp<%next.gep> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%20> = load from index 1 +; CHECK-NEXT: ir<%13> = load from index 2 +; CHECK-NEXT: ir<%23> = load from index 3 +; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at , vp<%next.gep>.1 +; CHECK-NEXT: store ir<%18> to index 0 +; CHECK-NEXT: store ir<%28> to index 1 +; CHECK: Cost for VF 8: 55 (Estimated cost per lane: 6.9) +; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at %10, vp<%next.gep> +; CHECK-NEXT: ir<%10> = load from index 0 +; CHECK-NEXT: ir<%20> = load from index 1 +; CHECK-NEXT: ir<%13> = load from index 2 +; CHECK-NEXT: ir<%23> = load from index 3 +; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at , vp<%next.gep>.1 +; CHECK-NEXT: store ir<%18> to index 0 +; CHECK-NEXT: store ir<%28> to index 1 +; CHECK: Cost for VF 16: 174 (Estimated cost per lane: 10.9) ; CHECK: LV: Selecting VF: 8. define hidden void @scale_uv_row_down2_linear(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) { %5 = icmp sgt i32 %3, 0 @@ -1347,16 +1833,21 @@ define hidden void @scale_uv_row_down2_linear(ptr nocapture noundef readonly %0, } ; CHECK-LABEL: two_floats_same_op -; CHECK: LV: Scalar loop costs: 14 -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 14. -; CHECK: LV: Vector loop of width 2 costs: 19. -; CHECK: LV: Vector loop of width 4 costs: 15. +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%mul> to index 0 +; CHECK-NEXT: store ir<%mul8> to index 1 +; CHECK: Cost for VF 2: 38 (Estimated cost per lane: 19.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%mul> to index 0 +; CHECK-NEXT: store ir<%mul8> to index 1 +; CHECK: Cost for VF 4: 62 (Estimated cost per lane: 15.5) ; CHECK: LV: Selecting VF: 1. define hidden void @two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1388,16 +1879,21 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: two_floats_vary_op -; CHECK: LV: Scalar loop costs: 14 -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 14. -; CHECK: LV: Vector loop of width 2 costs: 19. -; CHECK: LV: Vector loop of width 4 costs: 15. +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%add> to index 0 +; CHECK-NEXT: store ir<%sub> to index 1 +; CHECK: Cost for VF 2: 38 (Estimated cost per lane: 19.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%add> to index 0 +; CHECK-NEXT: store ir<%sub> to index 1 +; CHECK: Cost for VF 4: 62 (Estimated cost per lane: 15.5) ; CHECK: LV: Selecting VF: 1. define hidden void @two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1429,13 +1925,22 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: two_bytes_two_floats_same_op -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: LV: Scalar loop costs: 18 -; CHECK: LV: Vector loop of width 2 costs: 27 -; CHECK: LV: Vector loop of width 4 costs: 15 +; CHECK: LV: Scalar loop costs: 18. +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%0> = load ir<%arrayidx> (!alias.scope {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%1> = load ir<%arrayidx1> (!alias.scope {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%2> = load ir<%y> (!alias.scope {{.*}}) +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx4> +; CHECK-NEXT: store ir<%mul> to index 0 +; CHECK-NEXT: store ir<%mul11> to index 1 +; CHECK: Cost for VF 2: 54 (Estimated cost per lane: 27.0) +; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at %1 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx4> +; CHECK-NEXT: store ir<%mul> to index 0 +; CHECK-NEXT: store ir<%mul11> to index 1 +; CHECK: Cost for VF 4: 60 (Estimated cost per lane: 15.0) ; CHECK: LV: Selecting VF: 4. define hidden void @two_bytes_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1471,13 +1976,22 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: two_bytes_two_floats_vary_op -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: LV: Scalar loop costs: 18 -; CHECK: LV: Vector loop of width 2 costs: 27 -; CHECK: LV: Vector loop of width 4 costs: 15 +; CHECK: LV: Scalar loop costs: 18. +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%0> = load ir<%arrayidx> (!alias.scope {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%1> = load ir<%arrayidx1> (!alias.scope {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%2> = load ir<%y> (!alias.scope {{.*}}) +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx4> +; CHECK-NEXT: store ir<%add> to index 0 +; CHECK-NEXT: store ir<%sub> to index 1 +; CHECK: Cost for VF 2: 54 (Estimated cost per lane: 27.0) +; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at %1 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx4> +; CHECK-NEXT: store ir<%add> to index 0 +; CHECK-NEXT: store ir<%sub> to index 1 +; CHECK: Cost for VF 4: 60 (Estimated cost per lane: 15.0) ; CHECK: LV: Selecting VF: 4. define hidden void @two_bytes_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1513,13 +2027,21 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: two_floats_two_bytes_same_op -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: LV: Scalar loop costs: 16 -; CHECK: LV: Vector loop of width 2 costs: 26 -; CHECK: LV: Vector loop of width 4 costs: 16. +; CHECK: LV: Scalar loop costs: 16. +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv>, ir<%arrayidx3> (!alias.scope {{.*}}, !noalias {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv9>, ir<%y11> (!alias.scope {{.*}}, !noalias {{.*}}) +; CHECK: Cost for VF 2: 52 (Estimated cost per lane: 26.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %1 +; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%conv> to index 0 +; CHECK-NEXT: store ir<%conv9> to index 1 +; CHECK: Cost for VF 4: 67 (Estimated cost per lane: 16.8) ; CHECK: LV: Selecting VF: 1. define hidden void @two_floats_two_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1553,13 +2075,21 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: two_floats_two_bytes_vary_op -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: LV: Scalar loop costs: 16 -; CHECK: LV: Vector loop of width 2 costs: 26 -; CHECK: LV: Vector loop of width 4 costs: 16. +; CHECK: LV: Scalar loop costs: 16. +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv>, ir<%arrayidx3> (!alias.scope {{.*}}, !noalias {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv8>, ir<%y10> (!alias.scope {{.*}}, !noalias {{.*}}) +; CHECK: Cost for VF 2: 52 (Estimated cost per lane: 26.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %1 +; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%conv> to index 0 +; CHECK-NEXT: store ir<%conv8> to index 1 +; CHECK: Cost for VF 4: 67 (Estimated cost per lane: 16.8) ; CHECK: LV: Selecting VF: 1. define hidden void @two_floats_two_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1593,15 +2123,23 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: two_shorts_two_floats_same_op -; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: LV: Scalar loop costs: 18 -; CHECK: LV: Vector loop of width 2 costs: 24 -; CHECK: LV: Vector loop of width 4 costs: 12 +; CHECK: LV: Scalar loop costs: 18. +; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 at %1 +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx4> +; CHECK-NEXT: store ir<%mul> to index 0 +; CHECK-NEXT: store ir<%mul11> to index 1 +; CHECK: Cost for VF 2: 48 (Estimated cost per lane: 24.0) +; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 at %1 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx4> +; CHECK-NEXT: store ir<%mul> to index 0 +; CHECK-NEXT: store ir<%mul11> to index 1 +; CHECK: Cost for VF 4: 48 (Estimated cost per lane: 12.0) ; CHECK: LV: Selecting VF: 4. define hidden void @two_shorts_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1637,15 +2175,23 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: two_shorts_two_floats_vary_op -; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: LV: Scalar loop costs: 18 -; CHECK: LV: Vector loop of width 2 costs: 24 -; CHECK: LV: Vector loop of width 4 costs: 12 +; CHECK: LV: Scalar loop costs: 18. +; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 at %1 +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx4> +; CHECK-NEXT: store ir<%add> to index 0 +; CHECK-NEXT: store ir<%sub> to index 1 +; CHECK: Cost for VF 2: 48 (Estimated cost per lane: 24.0) +; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 at %1 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx4> +; CHECK-NEXT: store ir<%add> to index 0 +; CHECK-NEXT: store ir<%sub> to index 1 +; CHECK: Cost for VF 4: 48 (Estimated cost per lane: 12.0) ; CHECK: LV: Selecting VF: 4. define hidden void @two_shorts_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1681,16 +2227,24 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: two_floats_two_shorts_same_op -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: LV: Scalar loop costs: 16 -; CHECK: LV: Vector loop of width 2 costs: 23 -; CHECK: LV: Vector loop of width 4 costs: 14 -; CHECK: LV: Selecting VF: 4 +; CHECK: LV: Scalar loop costs: 16. +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %1 +; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%conv> to index 0 +; CHECK-NEXT: store ir<%conv9> to index 1 +; CHECK: Cost for VF 2: 47 (Estimated cost per lane: 23.5) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %1 +; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%conv> to index 0 +; CHECK-NEXT: store ir<%conv9> to index 1 +; CHECK: Cost for VF 4: 59 (Estimated cost per lane: 14.8) +; CHECK: LV: Selecting VF: 4. define hidden void @two_floats_two_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp22.not = icmp eq i32 %N, 0 @@ -1723,16 +2277,24 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: two_floats_two_shorts_vary_op -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 -; CHECK: LV: Scalar loop costs: 16 -; CHECK: LV: Vector loop of width 2 costs: 23 -; CHECK: LV: Vector loop of width 4 costs: 14 -; CHECK: LV: Selecting VF: 4 +; CHECK: LV: Scalar loop costs: 16. +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %1 +; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%conv> to index 0 +; CHECK-NEXT: store ir<%conv8> to index 1 +; CHECK: Cost for VF 2: 47 (Estimated cost per lane: 23.5) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %1 +; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%conv> to index 0 +; CHECK-NEXT: store ir<%conv8> to index 1 +; CHECK: Cost for VF 4: 59 (Estimated cost per lane: 14.8) +; CHECK: LV: Selecting VF: 4. define hidden void @two_floats_two_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp21.not = icmp eq i32 %N, 0 @@ -1765,13 +2327,20 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: four_floats_same_op -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: LV: Scalar loop costs: 24 -; CHECK: LV: Vector loop of width 2 costs: 33 -; CHECK: LV: Vector loop of width 4 costs: 30 -; CHECK: LV: Selecting VF: 4 +; CHECK: LV: Scalar loop costs: 24. +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%mul> to index 0 +; CHECK-NEXT: store ir<%mul8> to index 1 +; CHECK-NEXT: store ir<%mul14> to index 2 +; CHECK-NEXT: store ir<%mul20> to index 3 +; CHECK: Cost for VF 2: 66 (Estimated cost per lane: 33.0) +; CHECK: Cost for VF 4: 12 (Estimated cost per lane: 3.0) +; CHECK: LV: Selecting VF: 4. define hidden void @four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp45.not = icmp eq i32 %N, 0 @@ -1816,16 +2385,30 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: four_floats_vary_op -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: LV: Scalar loop costs: 24 -; CHECK: LV: Vector loop of width 2 costs: 33 -; CHECK: LV: Vector loop of width 4 costs: 30 -; CHECK: LV: Selecting VF: 1 +; CHECK: LV: Scalar loop costs: 24. +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%add> to index 0 +; CHECK-NEXT: store ir<%sub> to index 1 +; CHECK-NEXT: store ir<%mul> to index 2 +; CHECK-NEXT: store ir<%div> to index 3 +; CHECK: Cost for VF 2: 66 (Estimated cost per lane: 33.0) +; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%add> to index 0 +; CHECK-NEXT: store ir<%sub> to index 1 +; CHECK-NEXT: store ir<%mul> to index 2 +; CHECK-NEXT: store ir<%div> to index 3 +; CHECK: Cost for VF 4: 120 (Estimated cost per lane: 30.0) +; CHECK: LV: Selecting VF: 1. define hidden void @four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp42.not = icmp eq i32 %N, 0 @@ -1870,14 +2453,29 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: four_bytes_four_floats_same_op -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: LV: Scalar loop costs: 32 -; CHECK: LV: Vector loop of width 2 costs: 51 -; CHECK: LV: Vector loop of width 4 costs: 27 -; CHECK: LV: Selecting VF: 4 +; CHECK: LV: Scalar loop costs: 32. +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%0> = load ir<%arrayidx> (!alias.scope {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%1> = load ir<%arrayidx1> (!alias.scope {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%2> = load ir<%y> (!alias.scope {{.*}}) +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx4> +; CHECK-NEXT: store ir<%mul> to index 0 +; CHECK-NEXT: store ir<%mul11> to index 1 +; CHECK-NEXT: store ir<%mul19> to index 2 +; CHECK-NEXT: store ir<%mul27> to index 3 +; CHECK: Cost for VF 2: 102 (Estimated cost per lane: 51.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %1 +; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx4> +; CHECK-NEXT: store ir<%mul> to index 0 +; CHECK-NEXT: store ir<%mul11> to index 1 +; CHECK-NEXT: store ir<%mul19> to index 2 +; CHECK-NEXT: store ir<%mul27> to index 3 +; CHECK: Cost for VF 4: 108 (Estimated cost per lane: 27.0) +; CHECK: LV: Selecting VF: 4. define hidden void @four_bytes_four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp52.not = icmp eq i32 %N, 0 @@ -1930,14 +2528,29 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: four_bytes_four_floats_vary_op -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: LV: Scalar loop costs: 32 -; CHECK: LV: Vector loop of width 2 costs: 51 -; CHECK: LV: Vector loop of width 4 costs: 27 -; CHECK: LV: Selecting VF: 4 +; CHECK: LV: Scalar loop costs: 32. +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%0> = load ir<%arrayidx> (!alias.scope {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%1> = load ir<%arrayidx1> (!alias.scope {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE ir<%2> = load ir<%y> (!alias.scope {{.*}}) +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx4> +; CHECK-NEXT: store ir<%mul> to index 0 +; CHECK-NEXT: store ir<%add> to index 1 +; CHECK-NEXT: store ir<%div> to index 2 +; CHECK-NEXT: store ir<%sub> to index 3 +; CHECK: Cost for VF 2: 102 (Estimated cost per lane: 51.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %1 +; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx4> +; CHECK-NEXT: store ir<%mul> to index 0 +; CHECK-NEXT: store ir<%add> to index 1 +; CHECK-NEXT: store ir<%div> to index 2 +; CHECK-NEXT: store ir<%sub> to index 3 +; CHECK: Cost for VF 4: 108 (Estimated cost per lane: 27.0) +; CHECK: LV: Selecting VF: 4. define hidden void @four_bytes_four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp49.not = icmp eq i32 %N, 0 @@ -1990,15 +2603,29 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: four_floats_four_bytes_same_op -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: LV: Scalar loop costs: 28 -; CHECK: LV: Vector loop of width 2 costs: 48 -; CHECK: LV: Vector loop of width 4 costs: 31 -; CHECK: LV: Selecting VF: 1 +; CHECK: LV: Scalar loop costs: 28. +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv>, ir<%arrayidx3> (!alias.scope {{.*}}, !noalias {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv9>, ir<%y11> (!alias.scope {{.*}}, !noalias {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv16>, ir<%z18> (!alias.scope {{.*}}, !noalias {{.*}}) +; CHECK: Cost for VF 2: 96 (Estimated cost per lane: 48.0) +; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %1 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%conv> to index 0 +; CHECK-NEXT: store ir<%conv9> to index 1 +; CHECK-NEXT: store ir<%conv16> to index 2 +; CHECK-NEXT: store ir<%conv23> to index 3 +; CHECK: Cost for VF 4: 126 (Estimated cost per lane: 31.5) +; CHECK: LV: Selecting VF: 1. define hidden void @four_floats_four_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp48.not = icmp eq i32 %N, 0 @@ -2047,15 +2674,29 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: four_floats_four_bytes_vary_op -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: LV: Scalar loop costs: 28 -; CHECK: LV: Vector loop of width 2 costs: 48 -; CHECK: LV: Vector loop of width 4 costs: 31 -; CHECK: LV: Selecting VF: 1 +; CHECK: LV: Scalar loop costs: 28. +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv>, ir<%arrayidx3> (!alias.scope {{.*}}, !noalias {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv8>, ir<%y10> (!alias.scope {{.*}}, !noalias {{.*}}) +; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv14>, ir<%z16> (!alias.scope {{.*}}, !noalias {{.*}}) +; CHECK: Cost for VF 2: 96 (Estimated cost per lane: 48.0) +; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %1 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%conv> to index 0 +; CHECK-NEXT: store ir<%conv8> to index 1 +; CHECK-NEXT: store ir<%conv14> to index 2 +; CHECK-NEXT: store ir<%conv20> to index 3 +; CHECK: Cost for VF 4: 126 (Estimated cost per lane: 31.5) +; CHECK: LV: Selecting VF: 1. define hidden void @four_floats_four_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp45.not = icmp eq i32 %N, 0 @@ -2104,16 +2745,32 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: four_shorts_four_floats_same_op -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: LV: Scalar loop costs: 32 -; CHECK: LV: Vector loop of width 2 costs: 41 -; CHECK: LV: Vector loop of width 4 costs: 25 -; CHECK: LV: Selecting VF: 4 +; CHECK: LV: Scalar loop costs: 32. +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %1 +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx4> +; CHECK-NEXT: store ir<%mul> to index 0 +; CHECK-NEXT: store ir<%mul11> to index 1 +; CHECK-NEXT: store ir<%mul19> to index 2 +; CHECK-NEXT: store ir<%mul27> to index 3 +; CHECK: Cost for VF 2: 82 (Estimated cost per lane: 41.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %1 +; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx4> +; CHECK-NEXT: store ir<%mul> to index 0 +; CHECK-NEXT: store ir<%mul11> to index 1 +; CHECK-NEXT: store ir<%mul19> to index 2 +; CHECK-NEXT: store ir<%mul27> to index 3 +; CHECK: Cost for VF 4: 100 (Estimated cost per lane: 25.0) +; CHECK: LV: Selecting VF: 4. define hidden void @four_shorts_four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp52.not = icmp eq i32 %N, 0 @@ -2166,16 +2823,32 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: four_shorts_four_floats_vary_op -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: LV: Scalar loop costs: 32 -; CHECK: LV: Vector loop of width 2 costs: 41 -; CHECK: LV: Vector loop of width 4 costs: 25 -; CHECK: LV: Selecting VF: 4 +; CHECK: LV: Scalar loop costs: 32. +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %1 +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx4> +; CHECK-NEXT: store ir<%mul> to index 0 +; CHECK-NEXT: store ir<%add> to index 1 +; CHECK-NEXT: store ir<%div> to index 2 +; CHECK-NEXT: store ir<%sub> to index 3 +; CHECK: Cost for VF 2: 82 (Estimated cost per lane: 41.0) +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %1 +; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx4> +; CHECK-NEXT: store ir<%mul> to index 0 +; CHECK-NEXT: store ir<%add> to index 1 +; CHECK-NEXT: store ir<%div> to index 2 +; CHECK-NEXT: store ir<%sub> to index 3 +; CHECK: Cost for VF 4: 100 (Estimated cost per lane: 25.0) +; CHECK: LV: Selecting VF: 4. define hidden void @four_shorts_four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp49.not = icmp eq i32 %N, 0 @@ -2228,16 +2901,32 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: four_floats_four_shorts_same_op -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: LV: Scalar loop costs: 28 -; CHECK: LV: Vector loop of width 2 costs: 41 -; CHECK: LV: Vector loop of width 4 costs: 29 -; CHECK: LV: Selecting VF: 1 +; CHECK: LV: Scalar loop costs: 28. +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %1 +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%conv> to index 0 +; CHECK-NEXT: store ir<%conv9> to index 1 +; CHECK-NEXT: store ir<%conv16> to index 2 +; CHECK-NEXT: store ir<%conv23> to index 3 +; CHECK: Cost for VF 2: 82 (Estimated cost per lane: 41.0) +; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %1 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%conv> to index 0 +; CHECK-NEXT: store ir<%conv9> to index 1 +; CHECK-NEXT: store ir<%conv16> to index 2 +; CHECK-NEXT: store ir<%conv23> to index 3 +; CHECK: Cost for VF 4: 118 (Estimated cost per lane: 29.5) +; CHECK: LV: Selecting VF: 1. define hidden void @four_floats_four_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp48.not = icmp eq i32 %N, 0 @@ -2286,16 +2975,32 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: four_floats_four_shorts_vary_op -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 -; CHECK: LV: Scalar loop costs: 28 -; CHECK: LV: Vector loop of width 2 costs: 41 -; CHECK: LV: Vector loop of width 4 costs: 29 -; CHECK: LV: Selecting VF: 1 +; CHECK: LV: Scalar loop costs: 28. +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %1 +; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%conv> to index 0 +; CHECK-NEXT: store ir<%conv8> to index 1 +; CHECK-NEXT: store ir<%conv14> to index 2 +; CHECK-NEXT: store ir<%conv20> to index 3 +; CHECK: Cost for VF 2: 82 (Estimated cost per lane: 41.0) +; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx> +; CHECK-NEXT: ir<%0> = load from index 0 +; CHECK-NEXT: ir<%2> = load from index 1 +; CHECK-NEXT: ir<%4> = load from index 2 +; CHECK-NEXT: ir<%6> = load from index 3 +; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %1 +; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx3> +; CHECK-NEXT: store ir<%conv> to index 0 +; CHECK-NEXT: store ir<%conv8> to index 1 +; CHECK-NEXT: store ir<%conv14> to index 2 +; CHECK-NEXT: store ir<%conv20> to index 3 +; CHECK: Cost for VF 4: 118 (Estimated cost per lane: 29.5) +; CHECK: LV: Selecting VF: 1. define hidden void @four_floats_four_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp45.not = icmp eq i32 %N, 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/handle-iptr-with-data-layout-to-not-assert.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/handle-iptr-with-data-layout-to-not-assert.ll index d0d414a869636..eb87b420ff654 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/handle-iptr-with-data-layout-to-not-assert.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/handle-iptr-with-data-layout-to-not-assert.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9] for VF [0-9] For instruction:\s*store ptr %[0-9], ptr %__last" --filter "LV: Found an estimated cost of [0-9] for VF [0-9] For instruction:\s*store ptr %[0-9]" --version 5 +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store ptr" --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at" --version 5 ; REQUIRES: asserts ; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s target triple = "x86_64-unknown-linux-gnu" @@ -6,9 +6,12 @@ target triple = "x86_64-unknown-linux-gnu" define ptr @foo(ptr %__first, ptr %__last) #0 { ; CHECK-LABEL: 'foo' ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store ptr %0, ptr %__last, align 8 -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: store ptr %0, ptr %__last, align 8 -; CHECK: LV: Found an estimated cost of 3 for VF 4 For instruction: store ptr %0, ptr %__last, align 8 -; CHECK: LV: Found an estimated cost of 3 for VF 8 For instruction: store ptr %0, ptr %__last, align 8 +; CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, vp<%next.gep> +; CHECK: ir<%0> = load from index 0 +; CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, vp<%next.gep> +; CHECK: ir<%0> = load from index 0 +; CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %0, vp<%next.gep> +; CHECK: ir<%0> = load from index 0 ; entry: %cmp.not1 = icmp eq ptr %__first, %__last diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-2.ll index 4fcbdcecc4790..5c43db13f12ca 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-2.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,39 +14,65 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; SSE2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; SSE2: ir<%v0> = load from index 0 +; SSE2: ir<%v1> = load from index 1 +; SSE2: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; SSE2: ir<%v0> = load from index 0 +; SSE2: ir<%v1> = load from index 1 +; SSE2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX1: ir<%v0> = load from index 0 +; AVX1: ir<%v1> = load from index 1 +; AVX1: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX1: ir<%v0> = load from index 0 +; AVX1: ir<%v1> = load from index 1 +; AVX1: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 12 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 24 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 22 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 92 for VF 64 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX512: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 22 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 92 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-3.ll index a6f190bd5eb96..a90795399544f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-3.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,39 +14,68 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 44 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 10 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 20 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 44 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 12 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 51 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 210 for VF 64 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 51 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 210 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-4.ll index 80a9ad64b8626..9fb4c01abe813 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-4.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,35 +14,74 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 10 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 20 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 40 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 84 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 8 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 22 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 92 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX512: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: Cost of 92 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-5.ll index d8aadbe04b72f..0be2dc2c43504 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-5.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load float, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,136 +14,59 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4 +; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 35 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 145 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4 +; AVX512: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: Cost of 18 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: Cost of 35 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: Cost of 145 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-6.ll index c51d8e0a89298..6b64df947f103 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-6.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,36 +14,88 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 37 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 76 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 37 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 76 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 210 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX512: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: Cost of 21 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: Cost of 210 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-7.ll index d8dc847535154..9f044950e99f0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-7.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load float, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,179 +14,61 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4 +; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4 +; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v6 = load float, ptr %in6, align 4 +; AVX2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 35 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 70 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4 +; AVX512: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-8.ll index 068d18badbb98..9c0cbfd45d415 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-8.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load float, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,187 +14,73 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load float, ptr %in7, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4 -; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load float, ptr %in7, align 4 +; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v7 = load float, ptr %in7, align 4 +; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v7 = load float, ptr %in7, align 4 +; AVX2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: ir<%v6> = load from index 6 +; AVX2: ir<%v7> = load from index 7 +; AVX2: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX512: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX512: LV: Found an estimated cost of 40 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load float, ptr %in7, align 4 -; AVX512: LV: Found an estimated cost of 92 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load float, ptr %in7, align 4 +; AVX512: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: ir<%v7> = load from index 7 +; AVX512: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: ir<%v7> = load from index 7 +; AVX512: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: ir<%v7> = load from index 7 +; AVX512: Cost of 92 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: ir<%v7> = load from index 7 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-2.ll index b796772f6c580..b3cc8811542f5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-2.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,67 +14,55 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 24 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 24 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 +; SSE2: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; SSE2: ir<%v0> = load from index 0 +; SSE2: ir<%v1> = load from index 1 +; SSE2: Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX1: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX1: ir<%v0> = load from index 0 +; AVX1: ir<%v1> = load from index 1 +; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 48 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 22 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load double, ptr %in1, align 8 +; AVX512: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-3.ll index 868a9c2ab62cf..c847f70e41b15 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-3.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,86 +14,57 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 +; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 28 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 16 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 32 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 +; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-4.ll index 5cbad3d97019c..d88c58571c773 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-4.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,99 +14,60 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 +; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX2: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8 +; AVX512: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-5.ll index d5d6be704b757..f0d72a118d2f2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-5.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,126 +14,57 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 +; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 35 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8 +; AVX512: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-6.ll index dba4cb412e307..d43a3b3746783 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-6.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,149 +14,80 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8 +; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8 +; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8 +; AVX2: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 11 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 51 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8 +; AVX512: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: Cost of 51 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-7.ll index a5a6a1e155d50..0d3f3b708ce08 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-7.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,165 +14,67 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8 +; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8 +; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v6 = load double, ptr %in6, align 8 +; AVX2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 70 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8 +; AVX512: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: Cost of 35 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: Cost of 70 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v6> = load ir<%in6> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v6> = load ir<%in6> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-8.ll index 63b93ca66e184..f739aadf7e138 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-8.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,171 +14,79 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load double, ptr %in7, align 8 +; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load double, ptr %in7, align 8 +; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v7 = load double, ptr %in7, align 8 +; AVX2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX512: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v7 = load double, ptr %in7, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v7 = load double, ptr %in7, align 8 +; AVX512: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: ir<%v7> = load from index 7 +; AVX512: Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: ir<%v7> = load from index 7 +; AVX512: Cost of 10 for VF 8: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 10 for VF 8: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 10 for VF 8: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 10 for VF 8: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 10 for VF 8: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 10 for VF 8: WIDEN ir<%v5> = load ir<%in5> +; AVX512: Cost of 10 for VF 8: WIDEN ir<%v6> = load ir<%in6> +; AVX512: Cost of 10 for VF 8: WIDEN ir<%v7> = load ir<%in7> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v6> = load ir<%in6> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v7> = load ir<%in7> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v6> = load ir<%in6> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v7> = load ir<%in7> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v5> = load ir<%in5> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v6> = load ir<%in6> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v7> = load ir<%in7> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-half.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-half.ll index ab767533f6e0d..796b34fd7c25b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-half.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-half.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF 32: INTERLEAVE-GROUP with factor [0-9]+ at %0," --version 5 ; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize -mattr=avx512fp16 %s 2>&1 | FileCheck %s ; REQUIRES: asserts target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" @@ -9,9 +9,11 @@ target triple = "i386-unknown-linux-gnu" ; Function Attrs: norecurse nounwind define void @stride8(half %k, i32 %width_) { +; CHECK-LABEL: 'stride8' +; CHECK: Cost of 148 for VF 32: INTERLEAVE-GROUP with factor 8 at %0, ir<%arrayidx> +; entry: -; CHECK: Cost of 148 for VF 32: INTERLEAVE-GROUP with factor 8 at %0, ir<%arrayidx> %cmp72 = icmp sgt i32 %width_, 0 br i1 %cmp72, label %for.body.lr.ph, label %for.cond.cleanup @@ -97,9 +99,11 @@ for.body: ; preds = %for.body.lr.ph, %fo ; Function Attrs: norecurse nounwind define void @stride3(half %k, i32 %width_) { +; CHECK-LABEL: 'stride3' +; CHECK: Cost of 18 for VF 32: INTERLEAVE-GROUP with factor 3 at %0, ir<%arrayidx> +; entry: -; CHECK: LV: Found an estimated cost of 18 for VF 32 For instruction: %0 = load half, ptr %arrayidx, align 4 %cmp27 = icmp sgt i32 %width_, 0 br i1 %cmp27, label %for.body.lr.ph, label %for.cond.cleanup diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-2.ll index 19f2d23a22afc..d589c446f5330 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-2.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i16, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -15,49 +15,86 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; SSE2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; SSE2: ir<%v0> = load from index 0 +; SSE2: ir<%v1> = load from index 1 +; SSE2: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; SSE2: ir<%v0> = load from index 0 +; SSE2: ir<%v1> = load from index 1 +; SSE2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX1: ir<%v0> = load from index 0 +; AVX1: ir<%v1> = load from index 1 +; AVX1: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX1: ir<%v0> = load from index 0 +; AVX1: ir<%v1> = load from index 1 +; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 7 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 11 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 22 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 11 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 22 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 7 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 10 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 20 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 284 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: Cost of 10 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: Cost of 20 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: Cost of 284 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 7 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 34 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512BW: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: Cost of 7 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: Cost of 34 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-3.ll index f87927cffc3c9..222dac04dc019 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-3.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i16, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -15,49 +15,95 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; SSE2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 11 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 31 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 62 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 10 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 31 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 62 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 59 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 426 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: Cost of 10 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: Cost of 30 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: Cost of 59 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: Cost of 426 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 7 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 9 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 18 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 81 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512BW: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: Cost of 18 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: Cost of 81 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-4.ll index f2e102e434d9d..5d822e5824e0f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-4.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i16, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -15,49 +15,112 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; SSE2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 35 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 79 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 158 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 79 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 158 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 34 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 77 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 154 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 568 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: Cost of 34 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: Cost of 77 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: Cost of 154 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: Cost of 568 for VF 64: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 9 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 12 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 34 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 148 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512BW: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: Cost of 34 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: Cost of 148 for VF 64: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-5.ll index 0124a86a1548c..3cdc803c73098 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-5.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i16, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -15,193 +15,104 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 +; SSE2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 25 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 175 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 355 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 710 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX512DQ: Cost of 25 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: Cost of 45 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: Cost of 85 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: Cost of 175 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: Cost of 355 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: Cost of 710 for VF 64: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 11 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 55 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 235 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i16, ptr %in4, align 2 +; AVX512BW: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: Cost of 28 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: Cost of 55 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: Cost of 235 for VF 64: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-6.ll index 3417008ba5f9a..1bfe9f6d31769 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-6.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i16, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -15,49 +15,146 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; SSE2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 42 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 112 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 224 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 42 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 224 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 41 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 109 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 218 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 852 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: Cost of 41 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: Cost of 109 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: Cost of 218 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: Cost of 852 for VF 64: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 13 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 13 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 81 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 342 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512BW: Cost of 13 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: Cost of 13 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: Cost of 17 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: Cost of 33 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: Cost of 81 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: Cost of 342 for VF 64: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-7.ll index 392a677c95e3e..3b313a83c05ce 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-7.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i16, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -15,265 +15,128 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2 -; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 -; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2 -; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2 +; SSE2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2 +; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512DQ: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512DQ: LV: Found an estimated cost of 121 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512DQ: LV: Found an estimated cost of 245 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512DQ: LV: Found an estimated cost of 497 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512DQ: LV: Found an estimated cost of 994 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i16, ptr %in6, align 2 +; AVX512DQ: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: Cost of 64 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: Cost of 121 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: Cost of 245 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: Cost of 497 for VF 32: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: Cost of 994 for VF 64: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512BW: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512BW: LV: Found an estimated cost of 15 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512BW: LV: Found an estimated cost of 19 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512BW: LV: Found an estimated cost of 56 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512BW: LV: Found an estimated cost of 112 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512BW: LV: Found an estimated cost of 469 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i16, ptr %in6, align 2 +; AVX512BW: Cost of 15 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: Cost of 15 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: Cost of 19 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: Cost of 112 for VF 32: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: Cost of 469 for VF 64: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-8.ll index cbe9fc4b579b2..281639de7a6ab 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-8.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i16, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -15,269 +15,140 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2 -; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2 +; SSE2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v7 = load i16, ptr %in7, align 2 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v7 = load i16, ptr %in7, align 2 +; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX512DQ: LV: Found an estimated cost of 68 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX512DQ: LV: Found an estimated cost of 136 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX512DQ: LV: Found an estimated cost of 280 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX512DQ: LV: Found an estimated cost of 568 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i16, ptr %in7, align 2 +; AVX512DQ: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: ir<%v7> = load from index 7 +; AVX512DQ: Cost of 68 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: ir<%v7> = load from index 7 +; AVX512DQ: Cost of 136 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: ir<%v7> = load from index 7 +; AVX512DQ: Cost of 280 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: ir<%v7> = load from index 7 +; AVX512DQ: Cost of 568 for VF 32: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: ir<%v7> = load from index 7 +; AVX512DQ: Cost of 1136 for VF 64: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: ir<%v7> = load from index 7 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX512BW: LV: Found an estimated cost of 17 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX512BW: LV: Found an estimated cost of 17 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX512BW: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX512BW: LV: Found an estimated cost of 64 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i16, ptr %in7, align 2 -; AVX512BW: LV: Found an estimated cost of 148 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i16, ptr %in7, align 2 +; AVX512BW: Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: ir<%v7> = load from index 7 +; AVX512BW: Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: ir<%v7> = load from index 7 +; AVX512BW: Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: ir<%v7> = load from index 7 +; AVX512BW: Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: ir<%v7> = load from index 7 +; AVX512BW: Cost of 148 for VF 32: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: ir<%v7> = load from index 7 +; AVX512BW: Cost of 616 for VF 64: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: ir<%v7> = load from index 7 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2-indices-0u.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2-indices-0u.ll index 2f01b40f1807a..ce92fccd3772e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2-indices-0u.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2-indices-0u.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,39 +14,50 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; SSE2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; SSE2: ir<%v0> = load from index 0 +; SSE2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; SSE2: ir<%v0> = load from index 0 +; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX1: ir<%v0> = load from index 0 +; AVX1: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX1: ir<%v0> = load from index 0 +; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 2 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 16 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 2 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 13 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 50 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX512: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: Cost of 1 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: Cost of 2 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: Cost of 13 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: Cost of 50 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2.ll index 0f7fb04348cda..7c11092830f9f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,39 +14,65 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; SSE2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; SSE2: ir<%v0> = load from index 0 +; SSE2: ir<%v1> = load from index 1 +; SSE2: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; SSE2: ir<%v0> = load from index 0 +; SSE2: ir<%v1> = load from index 1 +; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX1: ir<%v0> = load from index 0 +; AVX1: ir<%v1> = load from index 1 +; AVX1: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX1: ir<%v0> = load from index 0 +; AVX1: ir<%v1> = load from index 1 +; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 12 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 24 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 22 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 92 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX512: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 22 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 92 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-01u.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-01u.ll index 2a5d8860e005f..90915502dba2f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-01u.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-01u.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,39 +14,57 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 16 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 34 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 9 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 144 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX512: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 36 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 144 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-0uu.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-0uu.ll index e1fe312f9b22b..f036e897ad20c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-0uu.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-0uu.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,39 +14,45 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 11 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 23 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: Cost of 11 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: Cost of 23 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 3 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 21 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 78 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX512: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: Cost of 21 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: Cost of 78 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3.ll index cee1dc84445b5..6d02b6504333d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,39 +14,68 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 44 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 10 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 20 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 44 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 12 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 51 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 210 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 51 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 210 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-012u.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-012u.ll index 0f1e265a5c7de..bfbd85555db00 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-012u.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-012u.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,38 +14,64 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 67 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 16 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 32 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 67 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 17 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 71 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 17 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 71 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-01uu.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-01uu.ll index 269bb2ee98b76..96e9a7a710fae 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-01uu.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-01uu.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i32, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,67 +14,54 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 50 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 50 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 13 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 50 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load i32, ptr %in1, align 4 +; AVX512: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 50 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-0uuu.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-0uuu.ll index 2a6c6407d9eff..0bdefbcb269bf 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-0uuu.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-0uuu.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,39 +14,45 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 33 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: Cost of 16 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: Cost of 33 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 29 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX512: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: Cost of 29 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4.ll index 5289cfb6838dd..8fb210b0b2dbf 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,35 +14,74 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 10 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 20 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 40 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 84 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 8 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 22 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 92 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX512: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: Cost of 92 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-5.ll index 0b2f091221319..5b2f2d522563a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-5.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i32, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,136 +14,59 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 35 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 145 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4 +; AVX512: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: Cost of 18 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: Cost of 35 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: Cost of 145 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-6.ll index cf9b4799b2a2f..e0674c038ecf4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-6.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,36 +14,88 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 37 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 76 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 37 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 76 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 210 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX512: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: Cost of 21 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: Cost of 210 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-7.ll index 91bb334817c2d..754be0e80299f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-7.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i32, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,179 +14,61 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v6 = load i32, ptr %in6, align 4 +; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 35 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 70 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4 +; AVX512: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-8.ll index 1c9d2edf0ef93..2ab06d6f82b9c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-8.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i32, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,187 +14,73 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v7 = load i32, ptr %in7, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v7 = load i32, ptr %in7, align 4 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v7 = load i32, ptr %in7, align 4 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v7 = load i32, ptr %in7, align 4 +; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: ir<%v6> = load from index 6 +; AVX2: ir<%v7> = load from index 7 +; AVX2: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX512: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX512: LV: Found an estimated cost of 40 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i32, ptr %in7, align 4 -; AVX512: LV: Found an estimated cost of 92 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i32, ptr %in7, align 4 +; AVX512: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: ir<%v7> = load from index 7 +; AVX512: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: ir<%v7> = load from index 7 +; AVX512: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: ir<%v7> = load from index 7 +; AVX512: Cost of 92 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: ir<%v7> = load from index 7 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-2.ll index 9a23a5923fd7e..52276bce225eb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-2.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,67 +14,59 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 40 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 40 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 +; SSE2: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; SSE2: ir<%v0> = load from index 0 +; SSE2: ir<%v1> = load from index 1 +; SSE2: Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 36 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 36 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 72 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 72 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX1: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX1: ir<%v0> = load from index 0 +; AVX1: ir<%v1> = load from index 1 +; AVX1: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 48 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 22 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load i64, ptr %in1, align 8 +; AVX512: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-3.ll index d8c64e3c17357..e842981174205 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-3.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,86 +14,57 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 36 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 36 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 36 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 16 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 32 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 +; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-4.ll index 072220fb67e2a..328d0d6f8cef8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-4.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,99 +14,72 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX2: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8 +; AVX512: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-5.ll index d1a51ae3f779d..e534038b2e795 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-5.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,116 +14,62 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 35 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8 +; AVX512: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v4> = load ir<%in4> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-6.ll index 16e2ad92a0565..8647841feeaab 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-6.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,137 +14,86 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8 +; AVX2: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 11 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 51 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8 +; AVX512: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: Cost of 51 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v5> = load ir<%in5> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-7.ll index 73429492d47e8..972ebc51fdeec 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-7.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,165 +14,74 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v6 = load i64, ptr %in6, align 8 +; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 70 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8 +; AVX512: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: Cost of 35 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: Cost of 70 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v6> = load ir<%in6> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v6> = load ir<%in6> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v5> = load ir<%in5> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v6> = load ir<%in6> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-8.ll index 6656fc58548f4..e166fd5296d4b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-8.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,171 +14,79 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v7 = load i64, ptr %in7, align 8 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v7 = load i64, ptr %in7, align 8 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v7 = load i64, ptr %in7, align 8 +; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX512: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v7 = load i64, ptr %in7, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v7 = load i64, ptr %in7, align 8 +; AVX512: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: ir<%v7> = load from index 7 +; AVX512: Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512: ir<%v0> = load from index 0 +; AVX512: ir<%v1> = load from index 1 +; AVX512: ir<%v2> = load from index 2 +; AVX512: ir<%v3> = load from index 3 +; AVX512: ir<%v4> = load from index 4 +; AVX512: ir<%v5> = load from index 5 +; AVX512: ir<%v6> = load from index 6 +; AVX512: ir<%v7> = load from index 7 +; AVX512: Cost of 10 for VF 8: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 10 for VF 8: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 10 for VF 8: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 10 for VF 8: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 10 for VF 8: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 10 for VF 8: WIDEN ir<%v5> = load ir<%in5> +; AVX512: Cost of 10 for VF 8: WIDEN ir<%v6> = load ir<%in6> +; AVX512: Cost of 10 for VF 8: WIDEN ir<%v7> = load ir<%in7> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v6> = load ir<%in6> +; AVX512: Cost of 20 for VF 16: WIDEN ir<%v7> = load ir<%in7> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v6> = load ir<%in6> +; AVX512: Cost of 40 for VF 32: WIDEN ir<%v7> = load ir<%in7> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v4> = load ir<%in4> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v5> = load ir<%in5> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v6> = load ir<%in6> +; AVX512: Cost of 80 for VF 64: WIDEN ir<%v7> = load ir<%in7> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-2.ll index b684bac7373a7..eafd91b4bf950 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-2.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i8, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -15,49 +15,78 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: Cost of 8 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 7 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 270 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: Cost of 7 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: Cost of 270 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 9 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 17 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 41 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512BW: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: Cost of 17 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: Cost of 41 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-3.ll index 9ba44e2564d13..061f27cd8caee 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-3.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i8, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -15,49 +15,95 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 9 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 13 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 17 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: Cost of 17 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 9 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 14 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 16 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 405 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: Cost of 14 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: Cost of 405 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 13 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 13 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 16 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 25 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512BW: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: Cost of 13 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: Cost of 25 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-4.ll index 7e79d7e3fb716..3e15690abe167 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-4.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i8, ptr %in0" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -15,49 +15,112 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 13 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 26 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 13 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 26 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: Cost of 60 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 13 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 25 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 58 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 540 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: Cost of 13 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: Cost of 25 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: Cost of 58 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: Cost of 540 for VF 64: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 80 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 238 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512BW: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: Cost of 17 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: Cost of 33 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: Cost of 80 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: Cost of 238 for VF 64: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-5.ll index cce8887ad2447..2778841dbd3a1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-5.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i8, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -15,193 +15,104 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 335 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 675 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX512DQ: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: Cost of 45 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: Cost of 85 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: Cost of 165 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: Cost of 335 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: Cost of 675 for VF 64: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 41 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 99 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 198 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 395 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1 +; AVX512BW: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: Cost of 41 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: Cost of 99 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: Cost of 198 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: Cost of 395 for VF 64: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-6.ll index 259fb8d2c430f..281628dd5966d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-6.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i8, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -15,229 +15,146 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 17 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 46 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 88 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1 +; AVX2: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 20 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 46 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 +; AVX2: Cost of 88 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX2: ir<%v0> = load from index 0 +; AVX2: ir<%v1> = load from index 1 +; AVX2: ir<%v2> = load from index 2 +; AVX2: ir<%v3> = load from index 3 +; AVX2: ir<%v4> = load from index 4 +; AVX2: ir<%v5> = load from index 5 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 17 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 45 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 85 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 810 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1 +; AVX512DQ: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: Cost of 21 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: Cost of 45 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: Cost of 85 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: Cost of 810 for VF 64: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 25 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 49 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 119 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 237 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 591 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1 +; AVX512BW: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: Cost of 25 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: Cost of 49 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: Cost of 119 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: Cost of 237 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: Cost of 591 for VF 64: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-7.ll index e8a2637b9f5b4..efc6704e4785a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-7.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i8, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -15,265 +15,128 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1 +; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512DQ: LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512DQ: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512DQ: LV: Found an estimated cost of 233 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512DQ: LV: Found an estimated cost of 469 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512DQ: LV: Found an estimated cost of 945 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i8, ptr %in6, align 1 +; AVX512DQ: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: Cost of 62 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: Cost of 233 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: Cost of 469 for VF 32: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: Cost of 945 for VF 64: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512BW: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512BW: LV: Found an estimated cost of 29 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512BW: LV: Found an estimated cost of 57 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512BW: LV: Found an estimated cost of 138 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512BW: LV: Found an estimated cost of 413 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512BW: LV: Found an estimated cost of 826 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i8, ptr %in6, align 1 +; AVX512BW: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: Cost of 29 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: Cost of 57 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: Cost of 138 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: Cost of 413 for VF 32: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: Cost of 826 for VF 64: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-8.ll index da19a9a9faed2..dc06af6263551 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-8.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i8, ptr %in." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -15,301 +15,140 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1 -; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1 +; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; SSE2: Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v7 = load i8, ptr %in7, align 1 +; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX1: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v7 = load i8, ptr %in7, align 1 +; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0> +; AVX2: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0> ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX512DQ: LV: Found an estimated cost of 33 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX512DQ: LV: Found an estimated cost of 66 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX512DQ: LV: Found an estimated cost of 132 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX512DQ: LV: Found an estimated cost of 264 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX512DQ: LV: Found an estimated cost of 536 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX512DQ: LV: Found an estimated cost of 1080 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v7 = load i8, ptr %in7, align 1 +; AVX512DQ: Cost of 33 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: ir<%v7> = load from index 7 +; AVX512DQ: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: ir<%v7> = load from index 7 +; AVX512DQ: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: ir<%v7> = load from index 7 +; AVX512DQ: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: ir<%v7> = load from index 7 +; AVX512DQ: Cost of 536 for VF 32: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: ir<%v7> = load from index 7 +; AVX512DQ: Cost of 1080 for VF 64: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512DQ: ir<%v0> = load from index 0 +; AVX512DQ: ir<%v1> = load from index 1 +; AVX512DQ: ir<%v2> = load from index 2 +; AVX512DQ: ir<%v3> = load from index 3 +; AVX512DQ: ir<%v4> = load from index 4 +; AVX512DQ: ir<%v5> = load from index 5 +; AVX512DQ: ir<%v6> = load from index 6 +; AVX512DQ: ir<%v7> = load from index 7 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX512BW: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX512BW: LV: Found an estimated cost of 33 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX512BW: LV: Found an estimated cost of 65 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX512BW: LV: Found an estimated cost of 158 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX512BW: LV: Found an estimated cost of 472 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i8, ptr %in7, align 1 -; AVX512BW: LV: Found an estimated cost of 1100 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i8, ptr %in6, align 1 -; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v7 = load i8, ptr %in7, align 1 +; AVX512BW: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: ir<%v7> = load from index 7 +; AVX512BW: Cost of 33 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: ir<%v7> = load from index 7 +; AVX512BW: Cost of 65 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: ir<%v7> = load from index 7 +; AVX512BW: Cost of 158 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: ir<%v7> = load from index 7 +; AVX512BW: Cost of 472 for VF 32: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: ir<%v7> = load from index 7 +; AVX512BW: Cost of 1100 for VF 64: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0> +; AVX512BW: ir<%v0> = load from index 0 +; AVX512BW: ir<%v1> = load from index 1 +; AVX512BW: ir<%v2> = load from index 2 +; AVX512BW: ir<%v3> = load from index 3 +; AVX512BW: ir<%v4> = load from index 4 +; AVX512BW: ir<%v5> = load from index 5 +; AVX512BW: ir<%v6> = load from index 6 +; AVX512BW: ir<%v7> = load from index 7 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-2.ll index 85ab5b85c9958..d73dc5e0134d6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-2.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v1, ptr %out1" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,40 +13,72 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4 -; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, ptr %out1, align 4 -; SSE2: LV: Found an estimated cost of 14 for VF 4 For instruction: store float %v1, ptr %out1, align 4 -; SSE2: LV: Found an estimated cost of 28 for VF 8 For instruction: store float %v1, ptr %out1, align 4 -; SSE2: LV: Found an estimated cost of 56 for VF 16 For instruction: store float %v1, ptr %out1, align 4 +; SSE2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 14 for VF 4 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 8 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 60 for VF 16 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 120 for VF 32 For instruction: store float %v1, ptr %out1, align 4 +; AVX1: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 30 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 60 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 120 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4 -; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, ptr %out1, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 4 For instruction: store float %v1, ptr %out1, align 4 -; AVX2: LV: Found an estimated cost of 6 for VF 8 For instruction: store float %v1, ptr %out1, align 4 -; AVX2: LV: Found an estimated cost of 12 for VF 16 For instruction: store float %v1, ptr %out1, align 4 -; AVX2: LV: Found an estimated cost of 24 for VF 32 For instruction: store float %v1, ptr %out1, align 4 +; AVX2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4 -; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, ptr %out1, align 4 -; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: store float %v1, ptr %out1, align 4 -; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: store float %v1, ptr %out1, align 4 -; AVX512: LV: Found an estimated cost of 5 for VF 16 For instruction: store float %v1, ptr %out1, align 4 -; AVX512: LV: Found an estimated cost of 10 for VF 32 For instruction: store float %v1, ptr %out1, align 4 -; AVX512: LV: Found an estimated cost of 20 for VF 64 For instruction: store float %v1, ptr %out1, align 4 +; AVX512: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 10 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 20 for VF 64: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-3.ll index 77abfc65a3e59..d372ab153784a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-3.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v2, ptr %out2" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,40 +13,92 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 2 For instruction: store float %v2, ptr %out2, align 4 -; SSE2: LV: Found an estimated cost of 24 for VF 4 For instruction: store float %v2, ptr %out2, align 4 -; SSE2: LV: Found an estimated cost of 48 for VF 8 For instruction: store float %v2, ptr %out2, align 4 -; SSE2: LV: Found an estimated cost of 96 for VF 16 For instruction: store float %v2, ptr %out2, align 4 +; SSE2: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 22 for VF 4 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 45 for VF 8 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 90 for VF 16 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 180 for VF 32 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 22 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 45 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 90 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 180 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 2 For instruction: store float %v2, ptr %out2, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: store float %v2, ptr %out2, align 4 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: store float %v2, ptr %out2, align 4 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: store float %v2, ptr %out2, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: store float %v2, ptr %out2, align 4 +; AVX2: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 28 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 60 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4 -; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: store float %v2, ptr %out2, align 4 -; AVX512: LV: Found an estimated cost of 4 for VF 4 For instruction: store float %v2, ptr %out2, align 4 -; AVX512: LV: Found an estimated cost of 8 for VF 8 For instruction: store float %v2, ptr %out2, align 4 -; AVX512: LV: Found an estimated cost of 12 for VF 16 For instruction: store float %v2, ptr %out2, align 4 -; AVX512: LV: Found an estimated cost of 24 for VF 32 For instruction: store float %v2, ptr %out2, align 4 -; AVX512: LV: Found an estimated cost of 48 for VF 64 For instruction: store float %v2, ptr %out2, align 4 +; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 48 for VF 64: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-4.ll index 699c2eb63cb31..f2f7107cc1cc7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-4.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v3, ptr %out3" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,40 +13,112 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4 -; SSE2: LV: Found an estimated cost of 12 for VF 2 For instruction: store float %v3, ptr %out3, align 4 -; SSE2: LV: Found an estimated cost of 28 for VF 4 For instruction: store float %v3, ptr %out3, align 4 -; SSE2: LV: Found an estimated cost of 56 for VF 8 For instruction: store float %v3, ptr %out3, align 4 -; SSE2: LV: Found an estimated cost of 112 for VF 16 For instruction: store float %v3, ptr %out3, align 4 +; SSE2: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 60 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 120 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 240 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4 -; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store float %v3, ptr %out3, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: store float %v3, ptr %out3, align 4 -; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: store float %v3, ptr %out3, align 4 -; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction: store float %v3, ptr %out3, align 4 -; AVX2: LV: Found an estimated cost of 80 for VF 32 For instruction: store float %v3, ptr %out3, align 4 +; AVX2: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 20 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 40 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 80 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4 -; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store float %v3, ptr %out3, align 4 -; AVX512: LV: Found an estimated cost of 5 for VF 4 For instruction: store float %v3, ptr %out3, align 4 -; AVX512: LV: Found an estimated cost of 11 for VF 8 For instruction: store float %v3, ptr %out3, align 4 -; AVX512: LV: Found an estimated cost of 22 for VF 16 For instruction: store float %v3, ptr %out3, align 4 -; AVX512: LV: Found an estimated cost of 44 for VF 32 For instruction: store float %v3, ptr %out3, align 4 -; AVX512: LV: Found an estimated cost of 88 for VF 64 For instruction: store float %v3, ptr %out3, align 4 +; AVX512: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 44 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 88 for VF 64: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-5.ll index 86758b5a24fe9..5e29f47acaf35 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-5.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v4, ptr %out4" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,37 +13,132 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 -; SSE2: LV: Found an estimated cost of 20 for VF 2 For instruction: store float %v4, ptr %out4, align 4 -; SSE2: LV: Found an estimated cost of 44 for VF 4 For instruction: store float %v4, ptr %out4, align 4 -; SSE2: LV: Found an estimated cost of 88 for VF 8 For instruction: store float %v4, ptr %out4, align 4 +; SSE2: Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: store float %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: store float %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 75 for VF 8 For instruction: store float %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 150 for VF 16 For instruction: store float %v4, ptr %out4, align 4 +; AVX1: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 75 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 150 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 300 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 16 for VF 2 For instruction: store float %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 36 for VF 4 For instruction: store float %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 75 for VF 8 For instruction: store float %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 150 for VF 16 For instruction: store float %v4, ptr %out4, align 4 +; AVX2: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 75 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 150 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 300 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 -; AVX512: LV: Found an estimated cost of 7 for VF 2 For instruction: store float %v4, ptr %out4, align 4 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: store float %v4, ptr %out4, align 4 -; AVX512: LV: Found an estimated cost of 21 for VF 8 For instruction: store float %v4, ptr %out4, align 4 -; AVX512: LV: Found an estimated cost of 35 for VF 16 For instruction: store float %v4, ptr %out4, align 4 -; AVX512: LV: Found an estimated cost of 70 for VF 32 For instruction: store float %v4, ptr %out4, align 4 -; AVX512: LV: Found an estimated cost of 140 for VF 64 For instruction: store float %v4, ptr %out4, align 4 +; AVX512: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 21 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 35 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 70 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 140 for VF 64: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-6.ll index 53c8f59491e76..c003c1314575e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-6.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v5, ptr %out5" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,37 +13,152 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4 -; SSE2: LV: Found an estimated cost of 21 for VF 2 For instruction: store float %v5, ptr %out5, align 4 -; SSE2: LV: Found an estimated cost of 48 for VF 4 For instruction: store float %v5, ptr %out5, align 4 -; SSE2: LV: Found an estimated cost of 96 for VF 8 For instruction: store float %v5, ptr %out5, align 4 +; SSE2: Cost of 21 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 48 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 96 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 192 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 19 for VF 2 For instruction: store float %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 42 for VF 4 For instruction: store float %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 90 for VF 8 For instruction: store float %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: store float %v5, ptr %out5, align 4 +; AVX1: Cost of 19 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 42 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 90 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 180 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 360 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4 -; AVX2: LV: Found an estimated cost of 11 for VF 2 For instruction: store float %v5, ptr %out5, align 4 -; AVX2: LV: Found an estimated cost of 15 for VF 4 For instruction: store float %v5, ptr %out5, align 4 -; AVX2: LV: Found an estimated cost of 39 for VF 8 For instruction: store float %v5, ptr %out5, align 4 -; AVX2: LV: Found an estimated cost of 78 for VF 16 For instruction: store float %v5, ptr %out5, align 4 +; AVX2: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 15 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 39 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 78 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 360 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4 -; AVX512: LV: Found an estimated cost of 8 for VF 2 For instruction: store float %v5, ptr %out5, align 4 -; AVX512: LV: Found an estimated cost of 17 for VF 4 For instruction: store float %v5, ptr %out5, align 4 -; AVX512: LV: Found an estimated cost of 25 for VF 8 For instruction: store float %v5, ptr %out5, align 4 -; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: store float %v5, ptr %out5, align 4 -; AVX512: LV: Found an estimated cost of 102 for VF 32 For instruction: store float %v5, ptr %out5, align 4 -; AVX512: LV: Found an estimated cost of 204 for VF 64 For instruction: store float %v5, ptr %out5, align 4 +; AVX512: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 25 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 102 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 204 for VF 64: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-7.ll index 244bceb69f97d..d48a3409f9b3e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-7.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v6, ptr %out6" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,36 +13,172 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 -; SSE2: LV: Found an estimated cost of 23 for VF 2 For instruction: store float %v6, ptr %out6, align 4 -; SSE2: LV: Found an estimated cost of 52 for VF 4 For instruction: store float %v6, ptr %out6, align 4 -; SSE2: LV: Found an estimated cost of 104 for VF 8 For instruction: store float %v6, ptr %out6, align 4 +; SSE2: Cost of 23 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 52 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 104 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 208 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 50 for VF 4 For instruction: store float %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 105 for VF 8 For instruction: store float %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 210 for VF 16 For instruction: store float %v6, ptr %out6, align 4 +; AVX1: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 50 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 105 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 210 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 420 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 50 for VF 4 For instruction: store float %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 105 for VF 8 For instruction: store float %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 210 for VF 16 For instruction: store float %v6, ptr %out6, align 4 +; AVX2: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 50 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 105 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 210 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 420 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 -; AVX512: LV: Found an estimated cost of 10 for VF 2 For instruction: store float %v6, ptr %out6, align 4 -; AVX512: LV: Found an estimated cost of 20 for VF 4 For instruction: store float %v6, ptr %out6, align 4 -; AVX512: LV: Found an estimated cost of 40 for VF 8 For instruction: store float %v6, ptr %out6, align 4 -; AVX512: LV: Found an estimated cost of 70 for VF 16 For instruction: store float %v6, ptr %out6, align 4 -; AVX512: LV: Found an estimated cost of 140 for VF 32 For instruction: store float %v6, ptr %out6, align 4 +; AVX512: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 140 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 280 for VF 64: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-8.ll index c1fe64e324378..117966d4dbc39 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-8.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v7, ptr %out7" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,36 +13,192 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 -; SSE2: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v7, ptr %out7, align 4 -; SSE2: LV: Found an estimated cost of 56 for VF 4 For instruction: store float %v7, ptr %out7, align 4 -; SSE2: LV: Found an estimated cost of 112 for VF 8 For instruction: store float %v7, ptr %out7, align 4 +; SSE2: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 +; SSE2: Cost of 56 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 +; SSE2: Cost of 112 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 +; SSE2: Cost of 224 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: store float %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store float %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: store float %v7, ptr %out7, align 4 +; AVX1: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 56 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 240 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 480 for VF 32: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: store float %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 120 for VF 8 For instruction: store float %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 240 for VF 16 For instruction: store float %v7, ptr %out7, align 4 +; AVX2: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 56 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 240 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 480 for VF 32: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 -; AVX512: LV: Found an estimated cost of 11 for VF 2 For instruction: store float %v7, ptr %out7, align 4 -; AVX512: LV: Found an estimated cost of 23 for VF 4 For instruction: store float %v7, ptr %out7, align 4 -; AVX512: LV: Found an estimated cost of 46 for VF 8 For instruction: store float %v7, ptr %out7, align 4 -; AVX512: LV: Found an estimated cost of 92 for VF 16 For instruction: store float %v7, ptr %out7, align 4 -; AVX512: LV: Found an estimated cost of 184 for VF 32 For instruction: store float %v7, ptr %out7, align 4 +; AVX512: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: store ir<%v7> to index 7 +; AVX512: Cost of 23 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: store ir<%v7> to index 7 +; AVX512: Cost of 46 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: store ir<%v7> to index 7 +; AVX512: Cost of 92 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: store ir<%v7> to index 7 +; AVX512: Cost of 184 for VF 32: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: store ir<%v7> to index 7 +; AVX512: Cost of 368 for VF 64: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: store ir<%v7> to index 7 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-2.ll index dd6094e4a7d5c..c0e32fb5dee91 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-2.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v1, ptr %out1" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,40 +13,72 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 6 for VF 2 For instruction: store double %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 12 for VF 4 For instruction: store double %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 24 for VF 8 For instruction: store double %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 48 for VF 16 For instruction: store double %v1, ptr %out1, align 8 +; SSE2: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 24 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 48 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 6 for VF 2 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 4 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 28 for VF 8 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 56 for VF 16 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 112 for VF 32 For instruction: store double %v1, ptr %out1, align 8 +; AVX1: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 112 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: store double %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: store double %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: store double %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction: store double %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 48 for VF 32 For instruction: store double %v1, ptr %out1, align 8 +; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store double %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: store double %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction: store double %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 16 For instruction: store double %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 32 For instruction: store double %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 64 For instruction: store double %v1, ptr %out1, align 8 +; AVX512: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 10 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 20 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 40 for VF 64: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-3.ll index 3f4216bb3a1ef..2c24bd2095c89 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-3.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v2, ptr %out2" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,37 +13,92 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 2 For instruction: store double %v2, ptr %out2, align 8 -; SSE2: LV: Found an estimated cost of 20 for VF 4 For instruction: store double %v2, ptr %out2, align 8 -; SSE2: LV: Found an estimated cost of 40 for VF 8 For instruction: store double %v2, ptr %out2, align 8 +; SSE2: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 80 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 11 for VF 2 For instruction: store double %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: store double %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: store double %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: store double %v2, ptr %out2, align 8 +; AVX1: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 192 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store double %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: store double %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: store double %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: store double %v2, ptr %out2, align 8 +; AVX2: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 18 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 36 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 192 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: store double %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 8 for VF 4 For instruction: store double %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 12 for VF 8 For instruction: store double %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 24 for VF 16 For instruction: store double %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 48 for VF 32 For instruction: store double %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 96 for VF 64 For instruction: store double %v2, ptr %out2, align 8 +; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 96 for VF 64: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-4.ll index 44f9a14424cda..8b7ec565f1dde 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-4.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v3, ptr %out3" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,36 +13,112 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 -; SSE2: LV: Found an estimated cost of 12 for VF 2 For instruction: store double %v3, ptr %out3, align 8 -; SSE2: LV: Found an estimated cost of 24 for VF 4 For instruction: store double %v3, ptr %out3, align 8 -; SSE2: LV: Found an estimated cost of 48 for VF 8 For instruction: store double %v3, ptr %out3, align 8 +; SSE2: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: store double %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: store double %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 56 for VF 8 For instruction: store double %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 112 for VF 16 For instruction: store double %v3, ptr %out3, align 8 +; AVX1: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 224 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: store double %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction: store double %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: store double %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction: store double %v3, ptr %out3, align 8 +; AVX2: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 224 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store double %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 11 for VF 4 For instruction: store double %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 22 for VF 8 For instruction: store double %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 44 for VF 16 For instruction: store double %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 88 for VF 32 For instruction: store double %v3, ptr %out3, align 8 +; AVX512: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 44 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 88 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 176 for VF 64: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-5.ll index 5946c4d7df295..14216fca2fc90 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-5.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v4, ptr %out4" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,33 +13,132 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 -; SSE2: LV: Found an estimated cost of 18 for VF 2 For instruction: store double %v4, ptr %out4, align 8 -; SSE2: LV: Found an estimated cost of 36 for VF 4 For instruction: store double %v4, ptr %out4, align 8 +; SSE2: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 72 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 144 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: store double %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: store double %v4, ptr %out4, align 8 +; AVX1: Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 352 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 44 for VF 4 For instruction: store double %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 88 for VF 8 For instruction: store double %v4, ptr %out4, align 8 +; AVX2: Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 352 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 14 for VF 2 For instruction: store double %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 21 for VF 4 For instruction: store double %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 35 for VF 8 For instruction: store double %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 70 for VF 16 For instruction: store double %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 140 for VF 32 For instruction: store double %v4, ptr %out4, align 8 +; AVX512: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 140 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 280 for VF 64: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-6.ll index e9c97283fab18..f15e3ffc88103 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-6.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v5, ptr %out5" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,33 +13,152 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 -; SSE2: LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v5, ptr %out5, align 8 -; SSE2: LV: Found an estimated cost of 40 for VF 4 For instruction: store double %v5, ptr %out5, align 8 +; SSE2: Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 80 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 160 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 21 for VF 2 For instruction: store double %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: store double %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: store double %v5, ptr %out5, align 8 +; AVX1: Cost of 21 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 48 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 96 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 192 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 384 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 11 for VF 2 For instruction: store double %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 21 for VF 4 For instruction: store double %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 42 for VF 8 For instruction: store double %v5, ptr %out5, align 8 +; AVX2: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 42 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 192 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 384 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 17 for VF 2 For instruction: store double %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 25 for VF 4 For instruction: store double %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 51 for VF 8 For instruction: store double %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 102 for VF 16 For instruction: store double %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 204 for VF 32 For instruction: store double %v5, ptr %out5, align 8 +; AVX512: Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 25 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 51 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 102 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 204 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 408 for VF 64: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-7.ll index 555bbe8e44269..763c95910162a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-7.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v6, ptr %out6" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,33 +13,172 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 -; SSE2: LV: Found an estimated cost of 22 for VF 2 For instruction: store double %v6, ptr %out6, align 8 -; SSE2: LV: Found an estimated cost of 44 for VF 4 For instruction: store double %v6, ptr %out6, align 8 +; SSE2: Cost of 22 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v0> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 23 for VF 2 For instruction: store double %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: store double %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 104 for VF 8 For instruction: store double %v6, ptr %out6, align 8 +; AVX1: Cost of 23 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 52 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 104 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 208 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 416 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v0> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 23 for VF 2 For instruction: store double %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 52 for VF 4 For instruction: store double %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 104 for VF 8 For instruction: store double %v6, ptr %out6, align 8 +; AVX2: Cost of 23 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 52 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 104 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 208 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 416 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v0> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 4 For instruction: store double %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 70 for VF 8 For instruction: store double %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 140 for VF 16 For instruction: store double %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 280 for VF 32 For instruction: store double %v6, ptr %out6, align 8 +; AVX512: Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 70 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 140 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 280 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 560 for VF 64: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v0> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-8.ll index a11d86ce14ef7..90c4333ee7597 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-8.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v., ptr %out." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store double %v\., ptr %out" --filter "Cost of [0-9]+ for VF [0-9]+: (WIDEN store|REPLICATE store ir<%v\.>)" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,164 +13,41 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v0, ptr %out0, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v2, ptr %out2, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v3, ptr %out3, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v4, ptr %out4, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v5, ptr %out5, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v6, ptr %out6, align 8 -; SSE2: LV: Found an estimated cost of 24 for VF 2 For instruction: store double %v7, ptr %out7, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v0, ptr %out0, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v2, ptr %out2, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v3, ptr %out3, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v4, ptr %out4, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v5, ptr %out5, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v6, ptr %out6, align 8 -; SSE2: LV: Found an estimated cost of 48 for VF 4 For instruction: store double %v7, ptr %out7, align 8 -; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v0, ptr %out0, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store double %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v0, ptr %out0, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: store double %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v0, ptr %out0, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: store double %v7, ptr %out7, align 8 -; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v0, ptr %out0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store double %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v0, ptr %out0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: store double %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v0, ptr %out0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: store double %v7, ptr %out7, align 8 -; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v0, ptr %out0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 23 for VF 2 For instruction: store double %v7, ptr %out7, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v0, ptr %out0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 46 for VF 4 For instruction: store double %v7, ptr %out7, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v0, ptr %out0, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v7, ptr %out7, align 8 +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out0>, ir<%v0> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out1>, ir<%v1> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out2>, ir<%v2> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out3>, ir<%v3> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out4>, ir<%v4> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out5>, ir<%v5> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out6>, ir<%v6> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out7>, ir<%v7> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out0>, ir<%v0> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out1>, ir<%v1> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out2>, ir<%v2> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out3>, ir<%v3> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out4>, ir<%v4> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out5>, ir<%v5> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out6>, ir<%v6> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out7>, ir<%v7> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out0>, ir<%v0> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out1>, ir<%v1> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out2>, ir<%v2> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out3>, ir<%v3> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out4>, ir<%v4> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out5>, ir<%v5> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out6>, ir<%v6> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out7>, ir<%v7> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out0>, ir<%v0> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out1>, ir<%v1> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out2>, ir<%v2> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out3>, ir<%v3> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out4>, ir<%v4> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out5>, ir<%v5> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out6>, ir<%v6> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out7>, ir<%v7> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-2.ll index 5a48776cdad2f..ff1e2e9d786e1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-2.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v1, ptr %out1" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,50 +14,92 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 -; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2 -; SSE2: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2 -; SSE2: LV: Found an estimated cost of 34 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2 -; SSE2: LV: Found an estimated cost of 68 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2 +; SSE2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 34 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 68 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX1: LV: Found an estimated cost of 34 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX1: LV: Found an estimated cost of 70 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX1: LV: Found an estimated cost of 140 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX1: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 34 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 140 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX2: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX2: LV: Found an estimated cost of 6 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX2: LV: Found an estimated cost of 12 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 12 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 ; ; AVX512DQ-LABEL: 'test' -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512DQ: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512DQ: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512DQ: LV: Found an estimated cost of 4 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512DQ: LV: Found an estimated cost of 5 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512DQ: LV: Found an estimated cost of 10 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512DQ: LV: Found an estimated cost of 284 for VF 64 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX512DQ: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: Cost of 10 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: Cost of 284 for VF 64: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 ; ; AVX512BW-LABEL: 'test' -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512BW: LV: Found an estimated cost of 3 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512BW: LV: Found an estimated cost of 3 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512BW: LV: Found an estimated cost of 3 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512BW: LV: Found an estimated cost of 3 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512BW: LV: Found an estimated cost of 7 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512BW: LV: Found an estimated cost of 14 for VF 64 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX512BW: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: Cost of 7 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: Cost of 14 for VF 64: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-3.ll index 2cfb488d5007f..9d7bab77ae8f2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-3.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v2, ptr %out2" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,50 +14,118 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 -; SSE2: LV: Found an estimated cost of 16 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2 -; SSE2: LV: Found an estimated cost of 26 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2 -; SSE2: LV: Found an estimated cost of 51 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2 -; SSE2: LV: Found an estimated cost of 102 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2 +; SSE2: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 26 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 51 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 102 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 29 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 52 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 105 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 210 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX1: Cost of 15 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 29 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 52 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 105 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 210 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX2: LV: Found an estimated cost of 7 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX2: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 30 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 60 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 ; ; AVX512DQ-LABEL: 'test' -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512DQ: LV: Found an estimated cost of 7 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512DQ: LV: Found an estimated cost of 9 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512DQ: LV: Found an estimated cost of 15 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512DQ: LV: Found an estimated cost of 29 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512DQ: LV: Found an estimated cost of 57 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512DQ: LV: Found an estimated cost of 426 for VF 64 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX512DQ: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: Cost of 15 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: Cost of 29 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: Cost of 57 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: Cost of 426 for VF 64: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 ; ; AVX512BW-LABEL: 'test' -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512BW: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512BW: LV: Found an estimated cost of 6 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512BW: LV: Found an estimated cost of 6 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512BW: LV: Found an estimated cost of 12 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512BW: LV: Found an estimated cost of 18 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512BW: LV: Found an estimated cost of 36 for VF 64 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX512BW: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: Cost of 18 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: Cost of 36 for VF 64: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-4.ll index 2e4594fac2361..323f0ea8b3007 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-4.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v3, ptr %out3" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,50 +14,144 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 -; SSE2: LV: Found an estimated cost of 17 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2 -; SSE2: LV: Found an estimated cost of 34 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2 -; SSE2: LV: Found an estimated cost of 68 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2 -; SSE2: LV: Found an estimated cost of 136 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2 +; SSE2: Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 34 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 136 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 34 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 68 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 140 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 280 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX1: Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 34 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 140 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 280 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 36 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 72 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 ; ; AVX512DQ-LABEL: 'test' -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512DQ: LV: Found an estimated cost of 3 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512DQ: LV: Found an estimated cost of 7 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512DQ: LV: Found an estimated cost of 11 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512DQ: LV: Found an estimated cost of 34 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512DQ: LV: Found an estimated cost of 68 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512DQ: LV: Found an estimated cost of 568 for VF 64 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX512DQ: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: Cost of 34 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: Cost of 68 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: Cost of 568 for VF 64: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 ; ; AVX512BW-LABEL: 'test' -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512BW: LV: Found an estimated cost of 8 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512BW: LV: Found an estimated cost of 8 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512BW: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512BW: LV: Found an estimated cost of 17 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512BW: LV: Found an estimated cost of 34 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512BW: LV: Found an estimated cost of 68 for VF 64 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX512BW: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: Cost of 17 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: Cost of 34 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: Cost of 68 for VF 64: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-5.ll index f536f4438649d..1ca528a9ab95f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-5.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v4, ptr %out4" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,50 +14,170 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 -; SSE2: LV: Found an estimated cost of 22 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 -; SSE2: LV: Found an estimated cost of 43 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 -; SSE2: LV: Found an estimated cost of 85 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; SSE2: LV: Found an estimated cost of 170 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 +; SSE2: Cost of 22 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 43 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 85 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 170 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 175 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 350 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX1: Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 86 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 175 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 350 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 44 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 175 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 350 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX2: Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 86 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 175 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 350 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 ; ; AVX512DQ-LABEL: 'test' -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 47 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 176 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 355 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 710 for VF 64 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: Cost of 47 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: Cost of 86 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: Cost of 355 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: Cost of 710 for VF 64: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 ; ; AVX512BW-LABEL: 'test' -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512BW: LV: Found an estimated cost of 11 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512BW: LV: Found an estimated cost of 11 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512BW: LV: Found an estimated cost of 22 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512BW: LV: Found an estimated cost of 33 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512BW: LV: Found an estimated cost of 55 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512BW: LV: Found an estimated cost of 110 for VF 64 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512BW: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: Cost of 33 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: Cost of 55 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: Cost of 110 for VF 64: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-6.ll index 1b7522d01ae2e..04feb5c3e0252 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-6.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v5, ptr %out5" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,50 +14,196 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 -; SSE2: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2 -; SSE2: LV: Found an estimated cost of 51 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2 -; SSE2: LV: Found an estimated cost of 102 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2 -; SSE2: LV: Found an estimated cost of 204 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2 +; SSE2: Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 51 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 102 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 204 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 29 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 102 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 210 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 420 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX1: Cost of 29 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 52 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 102 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 210 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 420 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX2: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX2: LV: Found an estimated cost of 17 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX2: LV: Found an estimated cost of 24 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX2: LV: Found an estimated cost of 64 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX2: LV: Found an estimated cost of 102 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX2: Cost of 13 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 24 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 102 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 ; ; AVX512DQ-LABEL: 'test' -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512DQ: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512DQ: LV: Found an estimated cost of 18 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512DQ: LV: Found an estimated cost of 23 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512DQ: LV: Found an estimated cost of 61 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512DQ: LV: Found an estimated cost of 96 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512DQ: LV: Found an estimated cost of 852 for VF 64 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX512DQ: Cost of 13 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: Cost of 23 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: Cost of 61 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: Cost of 96 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: Cost of 852 for VF 64: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 ; ; AVX512BW-LABEL: 'test' -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512BW: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512BW: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512BW: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512BW: LV: Found an estimated cost of 40 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512BW: LV: Found an estimated cost of 81 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512BW: LV: Found an estimated cost of 162 for VF 64 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX512BW: Cost of 13 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: Cost of 13 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: Cost of 27 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: Cost of 40 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: Cost of 81 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: Cost of 162 for VF 64: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-7.ll index f6ed77887c802..e6dd98c0a5326 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-7.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v6, ptr %out6" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,50 +14,222 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 -; SSE2: LV: Found an estimated cost of 33 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 -; SSE2: LV: Found an estimated cost of 60 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 -; SSE2: LV: Found an estimated cost of 119 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; SSE2: LV: Found an estimated cost of 238 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 +; SSE2: Cost of 33 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 60 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 119 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 238 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 63 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 245 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 490 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX1: Cost of 35 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 63 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 245 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 490 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 63 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 120 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 245 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 490 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX2: Cost of 35 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 63 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 245 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 490 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 ; ; AVX512DQ-LABEL: 'test' -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 65 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 122 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 246 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 497 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 994 for VF 64 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: Cost of 35 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: Cost of 65 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: Cost of 122 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: Cost of 246 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: Cost of 497 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: Cost of 994 for VF 64: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 ; ; AVX512BW-LABEL: 'test' -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512BW: LV: Found an estimated cost of 16 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512BW: LV: Found an estimated cost of 16 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512BW: LV: Found an estimated cost of 32 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512BW: LV: Found an estimated cost of 64 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512BW: LV: Found an estimated cost of 112 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512BW: LV: Found an estimated cost of 224 for VF 64 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512BW: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: Cost of 16 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: Cost of 32 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: Cost of 112 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: Cost of 224 for VF 64: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-8.ll index cc82d48fadb2c..e1e267b2595c8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-8.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v7, ptr %out7" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,50 +14,248 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 -; SSE2: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 -; SSE2: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 -; SSE2: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; SSE2: LV: Found an estimated cost of 272 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 +; SSE2: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 +; SSE2: Cost of 68 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 +; SSE2: Cost of 136 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 +; SSE2: Cost of 272 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 560 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX1: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 68 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 136 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 280 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 560 for VF 32: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 560 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX2: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 68 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 136 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 280 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 560 for VF 32: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 ; ; AVX512DQ-LABEL: 'test' -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 568 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 1136 for VF 64 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: store ir<%v7> to index 7 +; AVX512DQ: Cost of 68 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: store ir<%v7> to index 7 +; AVX512DQ: Cost of 136 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: store ir<%v7> to index 7 +; AVX512DQ: Cost of 280 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: store ir<%v7> to index 7 +; AVX512DQ: Cost of 568 for VF 32: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: store ir<%v7> to index 7 +; AVX512DQ: Cost of 1136 for VF 64: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: store ir<%v7> to index 7 ; ; AVX512BW-LABEL: 'test' -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512BW: LV: Found an estimated cost of 18 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512BW: LV: Found an estimated cost of 18 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512BW: LV: Found an estimated cost of 37 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512BW: LV: Found an estimated cost of 74 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512BW: LV: Found an estimated cost of 148 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512BW: LV: Found an estimated cost of 296 for VF 64 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512BW: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: store ir<%v7> to index 7 +; AVX512BW: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: store ir<%v7> to index 7 +; AVX512BW: Cost of 37 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: store ir<%v7> to index 7 +; AVX512BW: Cost of 74 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: store ir<%v7> to index 7 +; AVX512BW: Cost of 148 for VF 32: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: store ir<%v7> to index 7 +; AVX512BW: Cost of 296 for VF 64: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: store ir<%v7> to index 7 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-2.ll index 0a33a96bb6ba1..95523a3dce3fc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-2.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v1, ptr %out1" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,40 +13,72 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4 -; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, ptr %out1, align 4 -; SSE2: LV: Found an estimated cost of 30 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4 -; SSE2: LV: Found an estimated cost of 60 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4 -; SSE2: LV: Found an estimated cost of 120 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4 +; SSE2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 30 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 60 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 120 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 18 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 38 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 76 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 152 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX1: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 38 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 76 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 152 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX2: LV: Found an estimated cost of 3 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX2: LV: Found an estimated cost of 6 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX2: LV: Found an estimated cost of 12 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX2: LV: Found an estimated cost of 24 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX512: LV: Found an estimated cost of 5 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX512: LV: Found an estimated cost of 10 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX512: LV: Found an estimated cost of 20 for VF 64 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX512: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 10 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 20 for VF 64: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-3.ll index ad8eca60b43bd..07fe59c0f36c3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-3.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v2, ptr %out2" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,40 +13,92 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4 -; SSE2: LV: Found an estimated cost of 23 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4 -; SSE2: LV: Found an estimated cost of 48 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4 -; SSE2: LV: Found an estimated cost of 96 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4 -; SSE2: LV: Found an estimated cost of 192 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4 +; SSE2: Cost of 23 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 48 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 96 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 192 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 57 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 114 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 228 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 57 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 114 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 228 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX2: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 28 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 60 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX512: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX512: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX512: LV: Found an estimated cost of 12 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX512: LV: Found an estimated cost of 24 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX512: LV: Found an estimated cost of 48 for VF 64 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 48 for VF 64: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-4.ll index 6c86f56a2da3c..0949b11043e22 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-4.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v3, ptr %out3" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,40 +13,112 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4 -; SSE2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4 -; SSE2: LV: Found an estimated cost of 60 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4 -; SSE2: LV: Found an estimated cost of 120 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4 -; SSE2: LV: Found an estimated cost of 240 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4 +; SSE2: Cost of 28 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 60 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 240 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 76 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 152 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 304 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 76 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 152 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 304 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX2: LV: Found an estimated cost of 80 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX2: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 20 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 40 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 80 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX512: LV: Found an estimated cost of 5 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX512: LV: Found an estimated cost of 11 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX512: LV: Found an estimated cost of 22 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX512: LV: Found an estimated cost of 44 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX512: LV: Found an estimated cost of 88 for VF 64 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX512: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 44 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 88 for VF 64: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-5.ll index f4fbbec3a46f5..f2a50db468e5b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-5.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v4, ptr %out4" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,37 +13,132 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 -; SSE2: LV: Found an estimated cost of 40 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4 -; SSE2: LV: Found an estimated cost of 84 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4 -; SSE2: LV: Found an estimated cost of 168 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 +; SSE2: Cost of 40 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 84 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 168 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 336 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 46 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 95 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 190 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX1: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 95 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 190 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 380 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 46 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 95 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 190 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX2: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 95 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 190 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 380 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX512: LV: Found an estimated cost of 7 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX512: LV: Found an estimated cost of 21 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX512: LV: Found an estimated cost of 35 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX512: LV: Found an estimated cost of 70 for VF 32 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX512: LV: Found an estimated cost of 140 for VF 64 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX512: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 21 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 35 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 70 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 140 for VF 64: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-6.ll index 4f35f667276d8..36b06e06a26b6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-6.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v5, ptr %out5" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,37 +13,152 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4 -; SSE2: LV: Found an estimated cost of 45 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4 -; SSE2: LV: Found an estimated cost of 96 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4 -; SSE2: LV: Found an estimated cost of 192 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4 +; SSE2: Cost of 45 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 96 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 192 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 384 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 28 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 114 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 228 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4 +; AVX1: Cost of 28 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 54 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 114 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 228 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 456 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX2: LV: Found an estimated cost of 11 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX2: LV: Found an estimated cost of 15 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX2: LV: Found an estimated cost of 39 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX2: LV: Found an estimated cost of 78 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4 +; AVX2: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 15 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 39 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 78 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 456 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX512: LV: Found an estimated cost of 8 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX512: LV: Found an estimated cost of 17 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX512: LV: Found an estimated cost of 25 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX512: LV: Found an estimated cost of 102 for VF 32 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX512: LV: Found an estimated cost of 204 for VF 64 For instruction: store i32 %v5, ptr %out5, align 4 +; AVX512: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 25 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 102 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 204 for VF 64: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-7.ll index 9a4f580a7a2ca..49aa508224965 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-7.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v6, ptr %out6" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,36 +13,172 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 -; SSE2: LV: Found an estimated cost of 51 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4 -; SSE2: LV: Found an estimated cost of 108 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4 -; SSE2: LV: Found an estimated cost of 216 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 +; SSE2: Cost of 51 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 108 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 216 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 432 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 64 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 133 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 266 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX1: Cost of 35 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 64 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 133 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 266 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 532 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 35 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 133 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 266 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX2: Cost of 35 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 64 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 133 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 266 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 532 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX512: LV: Found an estimated cost of 10 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX512: LV: Found an estimated cost of 20 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX512: LV: Found an estimated cost of 40 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX512: LV: Found an estimated cost of 70 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX512: LV: Found an estimated cost of 140 for VF 32 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX512: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 140 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 280 for VF 64: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-8.ll index 2c4ca9993f53d..55cad6f0afd75 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-8.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v7, ptr %out7" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,36 +13,192 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 -; SSE2: LV: Found an estimated cost of 56 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4 -; SSE2: LV: Found an estimated cost of 120 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4 -; SSE2: LV: Found an estimated cost of 240 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 +; SSE2: Cost of 56 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 +; SSE2: Cost of 120 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 +; SSE2: Cost of 240 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 +; SSE2: Cost of 480 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 152 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 304 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX1: Cost of 36 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 72 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 152 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 304 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 608 for VF 32: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 36 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 72 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 152 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 304 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX2: Cost of 36 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 72 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 152 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 304 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 608 for VF 32: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX512: LV: Found an estimated cost of 11 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX512: LV: Found an estimated cost of 23 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX512: LV: Found an estimated cost of 46 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX512: LV: Found an estimated cost of 92 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX512: LV: Found an estimated cost of 184 for VF 32 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX512: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: store ir<%v7> to index 7 +; AVX512: Cost of 23 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: store ir<%v7> to index 7 +; AVX512: Cost of 46 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: store ir<%v7> to index 7 +; AVX512: Cost of 92 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: store ir<%v7> to index 7 +; AVX512: Cost of 184 for VF 32: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: store ir<%v7> to index 7 +; AVX512: Cost of 368 for VF 64: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: store ir<%v7> to index 7 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-2.ll index 6fbc678408f6b..9610349875d56 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-2.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v1, ptr %out1" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,40 +13,72 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 14 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 28 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 56 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 112 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8 +; SSE2: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 10 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 22 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 44 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 88 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 176 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 22 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 44 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 88 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 176 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 48 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 64 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX512: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 10 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 20 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: Cost of 40 for VF 64: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-3.ll index fe1dad3c3effc..2b1d991b2a9ec 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-3.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v2, ptr %out2" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,37 +13,92 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; SSE2: LV: Found an estimated cost of 22 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8 -; SSE2: LV: Found an estimated cost of 44 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8 -; SSE2: LV: Found an estimated cost of 88 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 +; SSE2: Cost of 22 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX1: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 33 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 66 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 264 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX2: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 18 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 36 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: Cost of 264 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 8 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 12 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 24 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 48 for VF 32 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 96 for VF 64 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: Cost of 96 for VF 64: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-4.ll index 4ebb2283f9b7f..953f7a94396b4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-4.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v3, ptr %out3" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,36 +13,112 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; SSE2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8 -; SSE2: LV: Found an estimated cost of 56 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8 -; SSE2: LV: Found an estimated cost of 112 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 +; SSE2: Cost of 28 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 56 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 112 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 224 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 176 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX1: Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 352 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX2: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: Cost of 352 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 11 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 22 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 44 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 88 for VF 32 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX512: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 44 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 88 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: Cost of 176 for VF 64: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-5.ll index 79c64ecfe457d..8fc4d18cc706f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-5.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v4, ptr %out4" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,33 +13,132 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; SSE2: LV: Found an estimated cost of 38 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 -; SSE2: LV: Found an estimated cost of 76 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 +; SSE2: Cost of 38 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 76 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 152 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 304 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 55 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 110 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX1: Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 55 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 110 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 220 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 440 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 55 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 110 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX2: Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 55 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 110 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 220 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 440 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 14 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 21 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 35 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 70 for VF 16 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 140 for VF 32 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX512: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 140 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: Cost of 280 for VF 64: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-6.ll index 05909fa7a1fb9..4647cbab6b60d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-6.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v5, ptr %out5" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,33 +13,152 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; SSE2: LV: Found an estimated cost of 44 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8 -; SSE2: LV: Found an estimated cost of 88 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 +; SSE2: Cost of 44 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 88 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 176 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 352 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8 +; AVX1: Cost of 30 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 528 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 11 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 21 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 42 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8 +; AVX2: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 42 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: Cost of 528 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 17 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 25 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 51 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 102 for VF 16 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 204 for VF 32 For instruction: store i64 %v5, ptr %out5, align 8 +; AVX512: Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 25 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 51 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 102 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 204 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: Cost of 408 for VF 64: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-7.ll index 881c7867614b7..b0a8727383234 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-7.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v6, ptr %out6" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,33 +13,172 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; SSE2: LV: Found an estimated cost of 50 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 -; SSE2: LV: Found an estimated cost of 100 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 +; SSE2: Cost of 50 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 100 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 200 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 400 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 77 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 154 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX1: Cost of 36 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 77 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 154 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 308 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 616 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 36 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 77 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 154 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX2: Cost of 36 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 77 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 154 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 308 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 616 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 70 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 140 for VF 16 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 280 for VF 32 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX512: Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 70 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 140 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 280 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: Cost of 560 for VF 64: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-8.ll index 572f90b41debc..4c6710e763c9e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-8.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v., ptr %out." +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i64 %v\., ptr %out" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at ,|WIDEN store|REPLICATE store ir<%v\.>)" --filter "^ store ir<%v.?> to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -13,164 +13,188 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v0, ptr %out0, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 -; SSE2: LV: Found an estimated cost of 56 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v0, ptr %out0, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 -; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 -; SSE2: LV: Found an estimated cost of 112 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 +; SSE2: Cost of 56 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 +; SSE2: Cost of 112 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 +; SSE2: Cost of 224 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 +; SSE2: Cost of 448 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 40 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 88 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 176 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX1: Cost of 40 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 88 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 176 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 352 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 704 for VF 32: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 40 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 88 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 176 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX2: Cost of 40 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 88 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 176 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 352 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 704 for VF 32: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 ; ; AVX512-LABEL: 'test' -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 23 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 46 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v0, ptr %out0, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX512: Cost of 23 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: store ir<%v7> to index 7 +; AVX512: Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512: store ir<%v> to index 0 +; AVX512: store ir<%v1> to index 1 +; AVX512: store ir<%v2> to index 2 +; AVX512: store ir<%v3> to index 3 +; AVX512: store ir<%v4> to index 4 +; AVX512: store ir<%v5> to index 5 +; AVX512: store ir<%v6> to index 6 +; AVX512: store ir<%v7> to index 7 +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out0>, ir<%v> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out1>, ir<%v1> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out2>, ir<%v2> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out3>, ir<%v3> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out4>, ir<%v4> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out5>, ir<%v5> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out6>, ir<%v6> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out7>, ir<%v7> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out0>, ir<%v> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out1>, ir<%v1> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out2>, ir<%v2> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out3>, ir<%v3> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out4>, ir<%v4> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out5>, ir<%v5> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out6>, ir<%v6> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out7>, ir<%v7> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out0>, ir<%v> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out1>, ir<%v1> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out2>, ir<%v2> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out3>, ir<%v3> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out4>, ir<%v4> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out5>, ir<%v5> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out6>, ir<%v6> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out7>, ir<%v7> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out0>, ir<%v> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out1>, ir<%v1> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out2>, ir<%v2> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out3>, ir<%v3> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out4>, ir<%v4> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out5>, ir<%v5> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out6>, ir<%v6> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out7>, ir<%v7> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-2.ll index 87b5f4d3858dc..91c82cc8fd5b7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-2.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v1, ptr %out1" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,50 +14,106 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1 -; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1 -; SSE2: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1 -; SSE2: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1 -; SSE2: LV: Found an estimated cost of 126 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1 +; SSE2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: Cost of 126 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX1: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX1: LV: Found an estimated cost of 66 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX1: LV: Found an estimated cost of 134 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1 +; AVX1: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 66 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: Cost of 134 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX2: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX2: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX2: LV: Found an estimated cost of 6 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1 +; AVX2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: ir<%v> = load from index 0 +; AVX2: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: ir<%v> = load from index 0 +; AVX2: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: ir<%v> = load from index 0 +; AVX2: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: ir<%v> = load from index 0 +; AVX2: Cost of 6 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 ; ; AVX512DQ-LABEL: 'test' -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512DQ: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512DQ: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512DQ: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512DQ: LV: Found an estimated cost of 4 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512DQ: LV: Found an estimated cost of 5 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512DQ: LV: Found an estimated cost of 270 for VF 64 For instruction: store i8 %v1, ptr %out1, align 1 +; AVX512DQ: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 5 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 270 for VF 64: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 ; ; AVX512BW-LABEL: 'test' -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512BW: LV: Found an estimated cost of 4 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512BW: LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512BW: LV: Found an estimated cost of 4 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512BW: LV: Found an estimated cost of 8 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512BW: LV: Found an estimated cost of 20 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512BW: LV: Found an estimated cost of 41 for VF 64 For instruction: store i8 %v1, ptr %out1, align 1 +; AVX512BW: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 20 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 41 for VF 64: INTERLEAVE-GROUP with factor 2 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-3.ll index 9a7503b63f9ff..779134134fe39 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-3.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v2, ptr %out2" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,50 +14,132 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 -; SSE2: LV: Found an estimated cost of 26 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1 -; SSE2: LV: Found an estimated cost of 52 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1 -; SSE2: LV: Found an estimated cost of 101 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1 -; SSE2: LV: Found an estimated cost of 204 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1 +; SSE2: Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 52 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 101 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: Cost of 204 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX1: LV: Found an estimated cost of 53 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX1: LV: Found an estimated cost of 100 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX1: LV: Found an estimated cost of 201 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1 +; AVX1: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 27 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 53 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 100 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: Cost of 201 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX2: LV: Found an estimated cost of 9 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX2: LV: Found an estimated cost of 13 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1 +; AVX2: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: ir<%v> = load from index 0 +; AVX2: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: ir<%v> = load from index 0 +; AVX2: Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: ir<%v> = load from index 0 +; AVX2: Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: ir<%v> = load from index 0 +; AVX2: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 ; ; AVX512DQ-LABEL: 'test' -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512DQ: LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512DQ: LV: Found an estimated cost of 7 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512DQ: LV: Found an estimated cost of 9 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512DQ: LV: Found an estimated cost of 14 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512DQ: LV: Found an estimated cost of 15 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512DQ: LV: Found an estimated cost of 405 for VF 64 For instruction: store i8 %v2, ptr %out2, align 1 +; AVX512DQ: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 14 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 15 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 405 for VF 64: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 ; ; AVX512BW-LABEL: 'test' -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512BW: LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512BW: LV: Found an estimated cost of 8 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512BW: LV: Found an estimated cost of 16 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512BW: LV: Found an estimated cost of 13 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512BW: LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512BW: LV: Found an estimated cost of 29 for VF 64 For instruction: store i8 %v2, ptr %out2, align 1 +; AVX512BW: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 16 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 29 for VF 64: INTERLEAVE-GROUP with factor 3 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-4.ll index e072d707e23e5..38b86e812f2dc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-4.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v3, ptr %out3" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,50 +14,158 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 -; SSE2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1 -; SSE2: LV: Found an estimated cost of 60 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1 -; SSE2: LV: Found an estimated cost of 124 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1 -; SSE2: LV: Found an estimated cost of 252 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1 +; SSE2: Cost of 28 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 60 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 124 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: Cost of 252 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX1: LV: Found an estimated cost of 268 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1 +; AVX1: Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 33 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 66 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: Cost of 268 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX2: LV: Found an estimated cost of 5 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX2: LV: Found an estimated cost of 5 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX2: LV: Found an estimated cost of 10 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1 +; AVX2: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: ir<%v> = load from index 0 +; AVX2: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: ir<%v> = load from index 0 +; AVX2: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: ir<%v> = load from index 0 +; AVX2: Cost of 10 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: ir<%v> = load from index 0 +; AVX2: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 ; ; AVX512DQ-LABEL: 'test' -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512DQ: LV: Found an estimated cost of 5 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512DQ: LV: Found an estimated cost of 5 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512DQ: LV: Found an estimated cost of 5 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512DQ: LV: Found an estimated cost of 9 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512DQ: LV: Found an estimated cost of 14 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512DQ: LV: Found an estimated cost of 540 for VF 64 For instruction: store i8 %v3, ptr %out3, align 1 +; AVX512DQ: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 14 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 540 for VF 64: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 ; ; AVX512BW-LABEL: 'test' -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512BW: LV: Found an estimated cost of 11 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512BW: LV: Found an estimated cost of 11 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512BW: LV: Found an estimated cost of 11 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512BW: LV: Found an estimated cost of 12 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512BW: LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512BW: LV: Found an estimated cost of 28 for VF 64 For instruction: store i8 %v3, ptr %out3, align 1 +; AVX512BW: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 28 for VF 64: INTERLEAVE-GROUP with factor 4 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-5.ll index 1c079204cc3bb..d2245df5aa9b0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-5.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v4, ptr %out4" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,50 +14,180 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 -; SSE2: LV: Found an estimated cost of 44 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1 -; SSE2: LV: Found an estimated cost of 87 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 -; SSE2: LV: Found an estimated cost of 178 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 -; SSE2: LV: Found an estimated cost of 360 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 +; SSE2: Cost of 44 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 87 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 178 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: Cost of 360 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX1: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX1: LV: Found an estimated cost of 84 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX1: LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX1: LV: Found an estimated cost of 335 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX1: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 84 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 166 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: Cost of 335 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX2: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX2: LV: Found an estimated cost of 84 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX2: LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX2: LV: Found an estimated cost of 335 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX2: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 84 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 166 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: Cost of 335 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 ; ; AVX512DQ-LABEL: 'test' -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 24 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 87 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 336 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 675 for VF 64 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX512DQ: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 87 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 166 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 336 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 675 for VF 64: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 ; ; AVX512BW-LABEL: 'test' -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512BW: LV: Found an estimated cost of 15 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512BW: LV: Found an estimated cost of 31 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512BW: LV: Found an estimated cost of 79 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512BW: LV: Found an estimated cost of 158 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512BW: LV: Found an estimated cost of 237 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512BW: LV: Found an estimated cost of 395 for VF 64 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX512BW: Cost of 15 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 31 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 79 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 158 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 237 for VF 32: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 395 for VF 64: INTERLEAVE-GROUP with factor 5 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-6.ll index 0b260d46b9173..8c603581aa08e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-6.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v5, ptr %out5" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,50 +14,210 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 -; SSE2: LV: Found an estimated cost of 49 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1 -; SSE2: LV: Found an estimated cost of 98 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1 -; SSE2: LV: Found an estimated cost of 201 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1 -; SSE2: LV: Found an estimated cost of 408 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1 +; SSE2: Cost of 49 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 98 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 201 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: Cost of 408 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX1: LV: Found an estimated cost of 198 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX1: LV: Found an estimated cost of 402 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX1: Cost of 27 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 53 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 100 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 198 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: Cost of 402 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX2: LV: Found an estimated cost of 10 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX2: LV: Found an estimated cost of 96 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX2: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: ir<%v> = load from index 0 +; AVX2: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: ir<%v> = load from index 0 +; AVX2: Cost of 18 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: ir<%v> = load from index 0 +; AVX2: Cost of 30 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: ir<%v> = load from index 0 +; AVX2: Cost of 96 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 ; ; AVX512DQ-LABEL: 'test' -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512DQ: LV: Found an estimated cost of 10 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512DQ: LV: Found an estimated cost of 12 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512DQ: LV: Found an estimated cost of 19 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512DQ: LV: Found an estimated cost of 29 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512DQ: LV: Found an estimated cost of 93 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512DQ: LV: Found an estimated cost of 810 for VF 64 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX512DQ: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 19 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 29 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 93 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 810 for VF 64: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 ; ; AVX512BW-LABEL: 'test' -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512BW: LV: Found an estimated cost of 18 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512BW: LV: Found an estimated cost of 38 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512BW: LV: Found an estimated cost of 98 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512BW: LV: Found an estimated cost of 197 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512BW: LV: Found an estimated cost of 295 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512BW: LV: Found an estimated cost of 591 for VF 64 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX512BW: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 38 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 98 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 197 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 295 for VF 32: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 591 for VF 64: INTERLEAVE-GROUP with factor 6 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-7.ll index b69559c6dae62..d10cab37a697d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-7.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v6, ptr %out6" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,50 +14,231 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 -; SSE2: LV: Found an estimated cost of 57 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1 -; SSE2: LV: Found an estimated cost of 112 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 -; SSE2: LV: Found an estimated cost of 225 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 -; SSE2: LV: Found an estimated cost of 456 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 +; SSE2: Cost of 57 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 112 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 225 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: Cost of 456 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX1: LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX1: LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX1: LV: Found an estimated cost of 232 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX1: LV: Found an estimated cost of 469 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX1: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 63 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 119 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 232 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: Cost of 469 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX2: LV: Found an estimated cost of 34 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX2: LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX2: LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX2: LV: Found an estimated cost of 232 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX2: LV: Found an estimated cost of 469 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX2: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 63 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 119 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 232 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: Cost of 469 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 ; ; AVX512DQ-LABEL: 'test' -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 234 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 470 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 945 for VF 64 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX512DQ: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: Cost of 63 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 121 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 234 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 470 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 945 for VF 64: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 ; ; AVX512BW-LABEL: 'test' -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512BW: LV: Found an estimated cost of 22 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512BW: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512BW: LV: Found an estimated cost of 118 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512BW: LV: Found an estimated cost of 236 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512BW: LV: Found an estimated cost of 472 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512BW: LV: Found an estimated cost of 826 for VF 64 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX512BW: Cost of 22 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 118 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 236 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 472 for VF 32: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 826 for VF 64: INTERLEAVE-GROUP with factor 7 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-8.ll index 078528b58f6f7..2f8d2ff1cc8d2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-8.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v7, ptr %out7" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 @@ -14,50 +14,258 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 -; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 -; SSE2: LV: Found an estimated cost of 56 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1 -; SSE2: LV: Found an estimated cost of 120 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 -; SSE2: LV: Found an estimated cost of 248 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 -; SSE2: LV: Found an estimated cost of 504 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 +; SSE2: Cost of 56 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 +; SSE2: Cost of 120 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 +; SSE2: Cost of 248 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 +; SSE2: Cost of 504 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; SSE2: store ir<%v> to index 0 +; SSE2: store ir<%v1> to index 1 +; SSE2: store ir<%v2> to index 2 +; SSE2: store ir<%v3> to index 3 +; SSE2: store ir<%v4> to index 4 +; SSE2: store ir<%v5> to index 5 +; SSE2: store ir<%v6> to index 6 +; SSE2: store ir<%v7> to index 7 ; ; AVX1-LABEL: 'test' -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX1: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX1: LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX1: Cost of 33 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 +; AVX1: Cost of 536 for VF 32: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX1: store ir<%v> to index 0 +; AVX1: store ir<%v1> to index 1 +; AVX1: store ir<%v2> to index 2 +; AVX1: store ir<%v3> to index 3 +; AVX1: store ir<%v4> to index 4 +; AVX1: store ir<%v5> to index 5 +; AVX1: store ir<%v6> to index 6 +; AVX1: store ir<%v7> to index 7 ; ; AVX2-LABEL: 'test' -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX2: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX2: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX2: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX2: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX2: LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX2: Cost of 33 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 +; AVX2: Cost of 536 for VF 32: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX2: store ir<%v> to index 0 +; AVX2: store ir<%v1> to index 1 +; AVX2: store ir<%v2> to index 2 +; AVX2: store ir<%v3> to index 3 +; AVX2: store ir<%v4> to index 4 +; AVX2: store ir<%v5> to index 5 +; AVX2: store ir<%v6> to index 6 +; AVX2: store ir<%v7> to index 7 ; ; AVX512DQ-LABEL: 'test' -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 1080 for VF 64 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX512DQ: Cost of 33 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: store ir<%v7> to index 7 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: store ir<%v7> to index 7 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: store ir<%v7> to index 7 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: store ir<%v7> to index 7 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 536 for VF 32: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: store ir<%v7> to index 7 +; AVX512DQ: ir<%v> = load from index 0 +; AVX512DQ: Cost of 1080 for VF 64: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512DQ: store ir<%v> to index 0 +; AVX512DQ: store ir<%v1> to index 1 +; AVX512DQ: store ir<%v2> to index 2 +; AVX512DQ: store ir<%v3> to index 3 +; AVX512DQ: store ir<%v4> to index 4 +; AVX512DQ: store ir<%v5> to index 5 +; AVX512DQ: store ir<%v6> to index 6 +; AVX512DQ: store ir<%v7> to index 7 ; ; AVX512BW-LABEL: 'test' -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512BW: LV: Found an estimated cost of 25 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512BW: LV: Found an estimated cost of 53 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512BW: LV: Found an estimated cost of 137 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512BW: LV: Found an estimated cost of 275 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512BW: LV: Found an estimated cost of 550 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512BW: LV: Found an estimated cost of 1100 for VF 64 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX512BW: Cost of 25 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: store ir<%v7> to index 7 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 53 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: store ir<%v7> to index 7 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 137 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: store ir<%v7> to index 7 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 275 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: store ir<%v7> to index 7 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 550 for VF 32: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: store ir<%v7> to index 7 +; AVX512BW: ir<%v> = load from index 0 +; AVX512BW: Cost of 1100 for VF 64: INTERLEAVE-GROUP with factor 8 at , ir<%out0> +; AVX512BW: store ir<%v> to index 0 +; AVX512BW: store ir<%v1> to index 1 +; AVX512BW: store ir<%v2> to index 2 +; AVX512BW: store ir<%v3> to index 3 +; AVX512BW: store ir<%v4> to index 4 +; AVX512BW: store ir<%v5> to index 5 +; AVX512BW: store ir<%v6> to index 6 +; AVX512BW: store ir<%v7> to index 7 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-load-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-load-i16.ll index 665b0c3bac040..3f3ca1b215dde 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-load-i16.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-load-i16.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%i[2,4] = load i16, ptr %[a-zA-Z0-7]+, align 2" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%i[2,4] = load i16, ptr %[a-zA-Z0-7]+, align 2" --filter "Cost of [0-9]+ for VF [0-9]+: (REPLICATE ir<%i[24]> = load|INTERLEAVE-GROUP with factor [0-9]+ at %i[24])" --filter "^ ir<.* = load from index" ; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=DISABLED_MASKED_STRIDED ; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=ENABLED_MASKED_STRIDED ; REQUIRES: asserts @@ -22,26 +22,30 @@ define void @test1(ptr noalias nocapture %points, ptr noalias nocapture readonly ; DISABLED_MASKED_STRIDED-LABEL: 'test1' ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: Cost of 6 for VF 2: REPLICATE ir<%i2> = load ir<%arrayidx2> +; DISABLED_MASKED_STRIDED: Cost of 6 for VF 2: REPLICATE ir<%i4> = load ir<%arrayidx7> +; DISABLED_MASKED_STRIDED: Cost of 13 for VF 4: REPLICATE ir<%i2> = load ir<%arrayidx2> +; DISABLED_MASKED_STRIDED: Cost of 13 for VF 4: REPLICATE ir<%i4> = load ir<%arrayidx7> +; DISABLED_MASKED_STRIDED: Cost of 27 for VF 8: REPLICATE ir<%i2> = load ir<%arrayidx2> +; DISABLED_MASKED_STRIDED: Cost of 27 for VF 8: REPLICATE ir<%i4> = load ir<%arrayidx7> +; DISABLED_MASKED_STRIDED: Cost of 55 for VF 16: REPLICATE ir<%i2> = load ir<%arrayidx2> +; DISABLED_MASKED_STRIDED: Cost of 55 for VF 16: REPLICATE ir<%i4> = load ir<%arrayidx7> ; ; ENABLED_MASKED_STRIDED-LABEL: 'test1' ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 17 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2> +; ENABLED_MASKED_STRIDED: ir<%i2> = load from index 0 +; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 1 +; ENABLED_MASKED_STRIDED: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2> +; ENABLED_MASKED_STRIDED: ir<%i2> = load from index 0 +; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 1 +; ENABLED_MASKED_STRIDED: Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2> +; ENABLED_MASKED_STRIDED: ir<%i2> = load from index 0 +; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 1 +; ENABLED_MASKED_STRIDED: Cost of 17 for VF 16: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2> +; ENABLED_MASKED_STRIDED: ir<%i2> = load from index 0 +; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 1 ; entry: br label %for.body @@ -79,26 +83,30 @@ define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias no ; DISABLED_MASKED_STRIDED-LABEL: 'test2' ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 2: REPLICATE ir<%i2> = load ir<%arrayidx2> (S->V) +; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 2: REPLICATE ir<%i4> = load ir<%arrayidx7> (S->V) +; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 4: REPLICATE ir<%i2> = load ir<%arrayidx2> (S->V) +; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 4: REPLICATE ir<%i4> = load ir<%arrayidx7> (S->V) +; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 8: REPLICATE ir<%i2> = load ir<%arrayidx2> (S->V) +; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 8: REPLICATE ir<%i4> = load ir<%arrayidx7> (S->V) +; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 16: REPLICATE ir<%i2> = load ir<%arrayidx2> (S->V) +; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 16: REPLICATE ir<%i4> = load ir<%arrayidx7> (S->V) ; ; ENABLED_MASKED_STRIDED-LABEL: 'test2' ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 17 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>, vp<[[VP8:%[0-9]+]]> +; ENABLED_MASKED_STRIDED: ir<%i2> = load from index 0 +; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 1 +; ENABLED_MASKED_STRIDED: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>, vp<[[VP8]]> +; ENABLED_MASKED_STRIDED: ir<%i2> = load from index 0 +; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 1 +; ENABLED_MASKED_STRIDED: Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>, vp<[[VP8]]> +; ENABLED_MASKED_STRIDED: ir<%i2> = load from index 0 +; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 1 +; ENABLED_MASKED_STRIDED: Cost of 17 for VF 16: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>, vp<[[VP8]]> +; ENABLED_MASKED_STRIDED: ir<%i2> = load from index 0 +; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 1 ; entry: %cmp15 = icmp sgt i32 %numPoints, 0 @@ -146,26 +154,22 @@ define void @test(ptr noalias nocapture %points, ptr noalias nocapture readonly ; DISABLED_MASKED_STRIDED-LABEL: 'test' ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx6, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx6, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx6, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx6, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx6, align 2 +; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 2: REPLICATE ir<%i4> = load ir<%arrayidx6> (S->V) +; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 4: REPLICATE ir<%i4> = load ir<%arrayidx6> (S->V) +; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 8: REPLICATE ir<%i4> = load ir<%arrayidx6> (S->V) +; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 16: REPLICATE ir<%i4> = load ir<%arrayidx6> (S->V) ; ; ENABLED_MASKED_STRIDED-LABEL: 'test' ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 7 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 9 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 9 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx6, align 2 +; ENABLED_MASKED_STRIDED: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at %i4, ir<%arrayidx6>, ir<%cmp1> +; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 0 +; ENABLED_MASKED_STRIDED: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 3 at %i4, ir<%arrayidx6>, ir<%cmp1> +; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 0 +; ENABLED_MASKED_STRIDED: Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 3 at %i4, ir<%arrayidx6>, ir<%cmp1> +; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 0 +; ENABLED_MASKED_STRIDED: Cost of 14 for VF 16: INTERLEAVE-GROUP with factor 3 at %i4, ir<%arrayidx6>, ir<%cmp1> +; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 0 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll index c2c04ce6f5ff5..3623358228bd4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %[0,2], ptr %[a-zA-Z0-7]+, align 2" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i16 %[0,2], ptr %[a-zA-Z0-7]+, align 2" --filter "Cost of [1-9][0-9]* for VF [0-9]+: (profitable to scalarize\s+store i16 %[02]|REPLICATE store ir<%[02]>|INTERLEAVE-GROUP with factor [0-9]+ at )" ; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=DISABLED_MASKED_STRIDED ; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=ENABLED_MASKED_STRIDED ; REQUIRES: asserts @@ -22,30 +22,29 @@ define void @test1(ptr noalias nocapture %points, ptr noalias nocapture readonly ; DISABLED_MASKED_STRIDED-LABEL: 'test1' ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: Cost of 6 for VF 2: REPLICATE store ir<%0>, ir<%arrayidx2> +; DISABLED_MASKED_STRIDED: Cost of 6 for VF 2: REPLICATE store ir<%2>, ir<%arrayidx7> +; DISABLED_MASKED_STRIDED: Cost of 13 for VF 4: REPLICATE store ir<%0>, ir<%arrayidx2> +; DISABLED_MASKED_STRIDED: Cost of 13 for VF 4: REPLICATE store ir<%2>, ir<%arrayidx7> +; DISABLED_MASKED_STRIDED: Cost of 27 for VF 8: REPLICATE store ir<%0>, ir<%arrayidx2> +; DISABLED_MASKED_STRIDED: Cost of 27 for VF 8: REPLICATE store ir<%2>, ir<%arrayidx7> +; DISABLED_MASKED_STRIDED: Cost of 55 for VF 16: REPLICATE store ir<%0>, ir<%arrayidx2> +; DISABLED_MASKED_STRIDED: Cost of 55 for VF 16: REPLICATE store ir<%2>, ir<%arrayidx7> ; ; ENABLED_MASKED_STRIDED-LABEL: 'test1' ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: Cost of 6 for VF 2: REPLICATE store ir<%0>, ir<%arrayidx2> +; ENABLED_MASKED_STRIDED: Cost of 6 for VF 2: REPLICATE store ir<%2>, ir<%arrayidx7> +; ENABLED_MASKED_STRIDED: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx2> +; ENABLED_MASKED_STRIDED: store ir<%0> to index 0 +; ENABLED_MASKED_STRIDED: store ir<%2> to index 1 +; ENABLED_MASKED_STRIDED: Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx2> +; ENABLED_MASKED_STRIDED: store ir<%0> to index 0 +; ENABLED_MASKED_STRIDED: store ir<%2> to index 1 +; ENABLED_MASKED_STRIDED: Cost of 27 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx2> +; ENABLED_MASKED_STRIDED: store ir<%0> to index 0 +; ENABLED_MASKED_STRIDED: store ir<%2> to index 1 ; entry: br label %for.body @@ -83,30 +82,30 @@ define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias no ; DISABLED_MASKED_STRIDED-LABEL: 'test2' ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 17 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 35 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 71 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: Cost of 8 for VF 2: REPLICATE store ir<%0>, ir<%arrayidx2> +; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 2: REPLICATE store ir<%2>, ir<%arrayidx7> +; DISABLED_MASKED_STRIDED: Cost of 17 for VF 4: REPLICATE store ir<%0>, ir<%arrayidx2> +; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 4: REPLICATE store ir<%2>, ir<%arrayidx7> +; DISABLED_MASKED_STRIDED: Cost of 35 for VF 8: REPLICATE store ir<%0>, ir<%arrayidx2> +; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 8: REPLICATE store ir<%2>, ir<%arrayidx7> +; DISABLED_MASKED_STRIDED: Cost of 71 for VF 16: REPLICATE store ir<%0>, ir<%arrayidx2> +; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 16: REPLICATE store ir<%2>, ir<%arrayidx7> ; ; ENABLED_MASKED_STRIDED-LABEL: 'test2' ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: Cost of 13 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx2>, vp<[[VP8:%[0-9]+]]> +; ENABLED_MASKED_STRIDED: store ir<%0> to index 0 +; ENABLED_MASKED_STRIDED: store ir<%2> to index 1 +; ENABLED_MASKED_STRIDED: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx2>, vp<[[VP8]]> +; ENABLED_MASKED_STRIDED: store ir<%0> to index 0 +; ENABLED_MASKED_STRIDED: store ir<%2> to index 1 +; ENABLED_MASKED_STRIDED: Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx2>, vp<[[VP8]]> +; ENABLED_MASKED_STRIDED: store ir<%0> to index 0 +; ENABLED_MASKED_STRIDED: store ir<%2> to index 1 +; ENABLED_MASKED_STRIDED: Cost of 27 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%arrayidx2>, vp<[[VP8]]> +; ENABLED_MASKED_STRIDED: store ir<%0> to index 0 +; ENABLED_MASKED_STRIDED: store ir<%2> to index 1 ; entry: %cmp15 = icmp sgt i32 %numPoints, 0 @@ -153,19 +152,17 @@ for.end: define void @test(ptr noalias nocapture %points, ptr noalias nocapture readonly %x, ptr noalias nocapture readnone %y) { ; DISABLED_MASKED_STRIDED-LABEL: 'test' ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx6, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx6, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, ptr %arrayidx6, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, ptr %arrayidx6, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, ptr %arrayidx6, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %0, ptr %arrayidx6, align 2 +; DISABLED_MASKED_STRIDED: Cost of 2 for VF 2: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2 +; DISABLED_MASKED_STRIDED: Cost of 4 for VF 4: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2 +; DISABLED_MASKED_STRIDED: Cost of 8 for VF 8: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2 +; DISABLED_MASKED_STRIDED: Cost of 16 for VF 16: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2 ; ; ENABLED_MASKED_STRIDED-LABEL: 'test' ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, ptr %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, ptr %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, ptr %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %0, ptr %arrayidx6, align 2 +; ENABLED_MASKED_STRIDED: Cost of 2 for VF 2: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2 +; ENABLED_MASKED_STRIDED: Cost of 4 for VF 4: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2 +; ENABLED_MASKED_STRIDED: Cost of 8 for VF 8: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2 +; ENABLED_MASKED_STRIDED: Cost of 16 for VF 16: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i32-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i32-with-i8-index.ll index 5e67bd57754e4..986d7b7104d88 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i32-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i32-with-i8-index.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %valB, ptr %out, align 4" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i32 %valB, ptr %out" --filter "Cost of [1-9][0-9]* for VF [0-9]+: (profitable to scalarize\s+store i32 %valB|WIDEN store .*, ir<%valB>|REPLICATE store ir<%valB>)" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 @@ -18,42 +18,42 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4 -; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i32 %valB, ptr %out, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 8 For instruction: store i32 %valB, ptr %out, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 16 For instruction: store i32 %valB, ptr %out, align 4 +; SSE2: Cost of 2 for VF 2: profitable to scalarize store i32 %valB, ptr %out, align 4 +; SSE2: Cost of 5 for VF 4: profitable to scalarize store i32 %valB, ptr %out, align 4 +; SSE2: Cost of 11 for VF 8: profitable to scalarize store i32 %valB, ptr %out, align 4 +; SSE2: Cost of 22 for VF 16: profitable to scalarize store i32 %valB, ptr %out, align 4 ; ; SSE42-LABEL: 'test' ; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4 -; SSE42: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4 -; SSE42: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, ptr %out, align 4 -; SSE42: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4 -; SSE42: LV: Found an estimated cost of 16 for VF 16 For instruction: store i32 %valB, ptr %out, align 4 +; SSE42: Cost of 2 for VF 2: profitable to scalarize store i32 %valB, ptr %out, align 4 +; SSE42: Cost of 4 for VF 4: profitable to scalarize store i32 %valB, ptr %out, align 4 +; SSE42: Cost of 8 for VF 8: profitable to scalarize store i32 %valB, ptr %out, align 4 +; SSE42: Cost of 16 for VF 16: profitable to scalarize store i32 %valB, ptr %out, align 4 ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4 -; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4 -; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, ptr %out, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 16 For instruction: store i32 %valB, ptr %out, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 32 For instruction: store i32 %valB, ptr %out, align 4 +; AVX1: Cost of 2 for VF 2: profitable to scalarize store i32 %valB, ptr %out, align 4 +; AVX1: Cost of 4 for VF 4: profitable to scalarize store i32 %valB, ptr %out, align 4 +; AVX1: Cost of 8 for VF 8: profitable to scalarize store i32 %valB, ptr %out, align 4 +; AVX1: Cost of 17 for VF 16: profitable to scalarize store i32 %valB, ptr %out, align 4 +; AVX1: Cost of 34 for VF 32: profitable to scalarize store i32 %valB, ptr %out, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4 -; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4 -; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, ptr %out, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4 -; AVX2: LV: Found an estimated cost of 17 for VF 16 For instruction: store i32 %valB, ptr %out, align 4 -; AVX2: LV: Found an estimated cost of 34 for VF 32 For instruction: store i32 %valB, ptr %out, align 4 +; AVX2: Cost of 2 for VF 2: profitable to scalarize store i32 %valB, ptr %out, align 4 +; AVX2: Cost of 4 for VF 4: profitable to scalarize store i32 %valB, ptr %out, align 4 +; AVX2: Cost of 8 for VF 8: profitable to scalarize store i32 %valB, ptr %out, align 4 +; AVX2: Cost of 17 for VF 16: profitable to scalarize store i32 %valB, ptr %out, align 4 +; AVX2: Cost of 34 for VF 32: profitable to scalarize store i32 %valB, ptr %out, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 10 for VF 4 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 72 for VF 64 For instruction: store i32 %valB, ptr %out, align 4 +; AVX512: Cost of 5 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 10 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out>, ir<%valB>, ir<%canStore> +; AVX512: Cost of 18 for VF 16: WIDEN store ir<%out>, ir<%valB>, ir<%canStore> +; AVX512: Cost of 36 for VF 32: WIDEN store ir<%out>, ir<%valB>, ir<%canStore> +; AVX512: Cost of 72 for VF 64: WIDEN store ir<%out>, ir<%valB>, ir<%canStore> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i64-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i64-with-i8-index.ll index faa2aa43d4934..56d3f973b3177 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i64-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i64-with-i8-index.ll @@ -1,8 +1,8 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %valB, ptr %out, align 8" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i64 %valB, ptr %out" --filter "Cost of [1-9][0-9]* for VF [0-9]+: (profitable to scalarize\s+store i64 %valB|WIDEN store .*, ir<%valB>|REPLICATE store ir<%valB>)" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2,AVX2-NOFAST +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 @@ -18,42 +18,42 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8 -; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i64 %valB, ptr %out, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, ptr %out, align 8 -; SSE2: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, ptr %out, align 8 +; SSE2: Cost of 2 for VF 2: profitable to scalarize store i64 %valB, ptr %out, align 8 +; SSE2: Cost of 5 for VF 4: profitable to scalarize store i64 %valB, ptr %out, align 8 +; SSE2: Cost of 10 for VF 8: profitable to scalarize store i64 %valB, ptr %out, align 8 +; SSE2: Cost of 20 for VF 16: profitable to scalarize store i64 %valB, ptr %out, align 8 ; ; SSE42-LABEL: 'test' ; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8 -; SSE42: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8 -; SSE42: LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, ptr %out, align 8 -; SSE42: LV: Found an estimated cost of 8 for VF 8 For instruction: store i64 %valB, ptr %out, align 8 -; SSE42: LV: Found an estimated cost of 16 for VF 16 For instruction: store i64 %valB, ptr %out, align 8 +; SSE42: Cost of 2 for VF 2: profitable to scalarize store i64 %valB, ptr %out, align 8 +; SSE42: Cost of 4 for VF 4: profitable to scalarize store i64 %valB, ptr %out, align 8 +; SSE42: Cost of 8 for VF 8: profitable to scalarize store i64 %valB, ptr %out, align 8 +; SSE42: Cost of 16 for VF 16: profitable to scalarize store i64 %valB, ptr %out, align 8 ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8 -; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8 -; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, ptr %out, align 8 -; AVX1: LV: Found an estimated cost of 9 for VF 8 For instruction: store i64 %valB, ptr %out, align 8 -; AVX1: LV: Found an estimated cost of 18 for VF 16 For instruction: store i64 %valB, ptr %out, align 8 -; AVX1: LV: Found an estimated cost of 36 for VF 32 For instruction: store i64 %valB, ptr %out, align 8 +; AVX1: Cost of 2 for VF 2: profitable to scalarize store i64 %valB, ptr %out, align 8 +; AVX1: Cost of 4 for VF 4: profitable to scalarize store i64 %valB, ptr %out, align 8 +; AVX1: Cost of 9 for VF 8: profitable to scalarize store i64 %valB, ptr %out, align 8 +; AVX1: Cost of 18 for VF 16: profitable to scalarize store i64 %valB, ptr %out, align 8 +; AVX1: Cost of 36 for VF 32: profitable to scalarize store i64 %valB, ptr %out, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8 -; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8 -; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, ptr %out, align 8 -; AVX2: LV: Found an estimated cost of 9 for VF 8 For instruction: store i64 %valB, ptr %out, align 8 -; AVX2: LV: Found an estimated cost of 18 for VF 16 For instruction: store i64 %valB, ptr %out, align 8 -; AVX2-NOFAST: LV: Found an estimated cost of 36 for VF 32 For instruction: store i64 %valB, ptr %out, align 8 +; AVX2: Cost of 2 for VF 2: profitable to scalarize store i64 %valB, ptr %out, align 8 +; AVX2: Cost of 4 for VF 4: profitable to scalarize store i64 %valB, ptr %out, align 8 +; AVX2: Cost of 9 for VF 8: profitable to scalarize store i64 %valB, ptr %out, align 8 +; AVX2: Cost of 18 for VF 16: profitable to scalarize store i64 %valB, ptr %out, align 8 +; AVX2: Cost of 36 for VF 32: profitable to scalarize store i64 %valB, ptr %out, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 11 for VF 4 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: store i64 %valB, ptr %out, align 8 +; AVX512: Cost of 5 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 11 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out>, ir<%valB>, ir<%canStore> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out>, ir<%valB>, ir<%canStore> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out>, ir<%valB>, ir<%canStore> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out>, ir<%valB>, ir<%canStore> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i16.ll index 1d51a32a520a9..89df198fc74a9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i16.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i16.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %valB, ptr %out, align 2" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i16 %valB, ptr %out" --filter "Cost of [1-9][0-9]* for VF [0-9]+: (profitable to scalarize\s+store i16 %valB|WIDEN store .*, ir<%valB>|REPLICATE store ir<%valB>)" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 @@ -17,35 +17,35 @@ target triple = "x86_64-unknown-linux-gnu" define void @test(ptr %C) { ; SSE-LABEL: 'test' ; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2 -; SSE: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, ptr %out, align 2 -; SSE: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %valB, ptr %out, align 2 -; SSE: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %valB, ptr %out, align 2 -; SSE: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %valB, ptr %out, align 2 +; SSE: Cost of 2 for VF 2: profitable to scalarize store i16 %valB, ptr %out, align 2 +; SSE: Cost of 4 for VF 4: profitable to scalarize store i16 %valB, ptr %out, align 2 +; SSE: Cost of 8 for VF 8: profitable to scalarize store i16 %valB, ptr %out, align 2 +; SSE: Cost of 16 for VF 16: profitable to scalarize store i16 %valB, ptr %out, align 2 ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2 -; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, ptr %out, align 2 -; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %valB, ptr %out, align 2 -; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %valB, ptr %out, align 2 -; AVX1: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %valB, ptr %out, align 2 -; AVX1: LV: Found an estimated cost of 33 for VF 32 For instruction: store i16 %valB, ptr %out, align 2 +; AVX1: Cost of 2 for VF 2: profitable to scalarize store i16 %valB, ptr %out, align 2 +; AVX1: Cost of 4 for VF 4: profitable to scalarize store i16 %valB, ptr %out, align 2 +; AVX1: Cost of 8 for VF 8: profitable to scalarize store i16 %valB, ptr %out, align 2 +; AVX1: Cost of 16 for VF 16: profitable to scalarize store i16 %valB, ptr %out, align 2 +; AVX1: Cost of 33 for VF 32: profitable to scalarize store i16 %valB, ptr %out, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2 -; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, ptr %out, align 2 -; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %valB, ptr %out, align 2 -; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %valB, ptr %out, align 2 -; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %valB, ptr %out, align 2 -; AVX2: LV: Found an estimated cost of 33 for VF 32 For instruction: store i16 %valB, ptr %out, align 2 +; AVX2: Cost of 2 for VF 2: profitable to scalarize store i16 %valB, ptr %out, align 2 +; AVX2: Cost of 4 for VF 4: profitable to scalarize store i16 %valB, ptr %out, align 2 +; AVX2: Cost of 8 for VF 8: profitable to scalarize store i16 %valB, ptr %out, align 2 +; AVX2: Cost of 16 for VF 16: profitable to scalarize store i16 %valB, ptr %out, align 2 +; AVX2: Cost of 33 for VF 32: profitable to scalarize store i16 %valB, ptr %out, align 2 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2 -; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, ptr %out, align 2 -; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %valB, ptr %out, align 2 -; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: store i16 %valB, ptr %out, align 2 -; AVX512: LV: Found an estimated cost of 1 for VF 16 For instruction: store i16 %valB, ptr %out, align 2 -; AVX512: LV: Found an estimated cost of 1 for VF 32 For instruction: store i16 %valB, ptr %out, align 2 -; AVX512: LV: Found an estimated cost of 2 for VF 64 For instruction: store i16 %valB, ptr %out, align 2 +; AVX512: Cost of 2 for VF 2: WIDEN store vp<[[VP7:%[0-9]+]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX512: Cost of 2 for VF 4: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX512: Cost of 1 for VF 8: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX512: Cost of 1 for VF 16: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX512: Cost of 1 for VF 32: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX512: Cost of 2 for VF 64: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i32.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i32.ll index f011d06d319bb..0111cc162b4de 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i32.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i32.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %valB, ptr %out, align 4" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i32 %valB, ptr %out" --filter "Cost of [1-9][0-9]* for VF [0-9]+: (profitable to scalarize\s+store i32 %valB|WIDEN store .*, ir<%valB>|REPLICATE store ir<%valB>)" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 @@ -17,42 +17,42 @@ target triple = "x86_64-unknown-linux-gnu" define void @test(ptr %C) { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4 -; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4 -; SSE2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i32 %valB, ptr %out, align 4 -; SSE2: LV: Found an estimated cost of 11 for VF 8 For instruction: store i32 %valB, ptr %out, align 4 -; SSE2: LV: Found an estimated cost of 22 for VF 16 For instruction: store i32 %valB, ptr %out, align 4 +; SSE2: Cost of 2 for VF 2: profitable to scalarize store i32 %valB, ptr %out, align 4 +; SSE2: Cost of 5 for VF 4: profitable to scalarize store i32 %valB, ptr %out, align 4 +; SSE2: Cost of 11 for VF 8: profitable to scalarize store i32 %valB, ptr %out, align 4 +; SSE2: Cost of 22 for VF 16: profitable to scalarize store i32 %valB, ptr %out, align 4 ; ; SSE42-LABEL: 'test' ; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4 -; SSE42: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4 -; SSE42: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, ptr %out, align 4 -; SSE42: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4 -; SSE42: LV: Found an estimated cost of 16 for VF 16 For instruction: store i32 %valB, ptr %out, align 4 +; SSE42: Cost of 2 for VF 2: profitable to scalarize store i32 %valB, ptr %out, align 4 +; SSE42: Cost of 4 for VF 4: profitable to scalarize store i32 %valB, ptr %out, align 4 +; SSE42: Cost of 8 for VF 8: profitable to scalarize store i32 %valB, ptr %out, align 4 +; SSE42: Cost of 16 for VF 16: profitable to scalarize store i32 %valB, ptr %out, align 4 ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4 -; AVX1: LV: Found an estimated cost of 9 for VF 2 For instruction: store i32 %valB, ptr %out, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: store i32 %valB, ptr %out, align 4 -; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4 -; AVX1: LV: Found an estimated cost of 16 for VF 16 For instruction: store i32 %valB, ptr %out, align 4 -; AVX1: LV: Found an estimated cost of 32 for VF 32 For instruction: store i32 %valB, ptr %out, align 4 +; AVX1: Cost of 9 for VF 2: WIDEN store vp<[[VP7:%[0-9]+]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX1: Cost of 8 for VF 4: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX1: Cost of 8 for VF 8: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX1: Cost of 16 for VF 16: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX1: Cost of 32 for VF 32: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4 -; AVX2: LV: Found an estimated cost of 9 for VF 2 For instruction: store i32 %valB, ptr %out, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: store i32 %valB, ptr %out, align 4 -; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4 -; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: store i32 %valB, ptr %out, align 4 -; AVX2: LV: Found an estimated cost of 32 for VF 32 For instruction: store i32 %valB, ptr %out, align 4 +; AVX2: Cost of 9 for VF 2: WIDEN store vp<[[VP7:%[0-9]+]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX2: Cost of 8 for VF 4: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX2: Cost of 8 for VF 8: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX2: Cost of 16 for VF 16: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX2: Cost of 32 for VF 32: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 1 for VF 16 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 2 for VF 32 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 4 for VF 64 For instruction: store i32 %valB, ptr %out, align 4 +; AVX512: Cost of 2 for VF 2: WIDEN store vp<[[VP7:%[0-9]+]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX512: Cost of 1 for VF 4: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX512: Cost of 1 for VF 8: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX512: Cost of 1 for VF 16: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX512: Cost of 2 for VF 32: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX512: Cost of 4 for VF 64: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i64.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i64.ll index c004b16ae207d..afa821586bb1c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i64.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i64.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %valB, ptr %out, align 8" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i64 %valB, ptr %out" --filter "Cost of [1-9][0-9]* for VF [0-9]+: (profitable to scalarize\s+store i64 %valB|WIDEN store .*, ir<%valB>|REPLICATE store ir<%valB>)" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 @@ -17,42 +17,42 @@ target triple = "x86_64-unknown-linux-gnu" define void @test(ptr %C) { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8 -; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8 -; SSE2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i64 %valB, ptr %out, align 8 -; SSE2: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, ptr %out, align 8 -; SSE2: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, ptr %out, align 8 +; SSE2: Cost of 2 for VF 2: profitable to scalarize store i64 %valB, ptr %out, align 8 +; SSE2: Cost of 5 for VF 4: profitable to scalarize store i64 %valB, ptr %out, align 8 +; SSE2: Cost of 10 for VF 8: profitable to scalarize store i64 %valB, ptr %out, align 8 +; SSE2: Cost of 20 for VF 16: profitable to scalarize store i64 %valB, ptr %out, align 8 ; ; SSE42-LABEL: 'test' ; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8 -; SSE42: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8 -; SSE42: LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, ptr %out, align 8 -; SSE42: LV: Found an estimated cost of 8 for VF 8 For instruction: store i64 %valB, ptr %out, align 8 -; SSE42: LV: Found an estimated cost of 16 for VF 16 For instruction: store i64 %valB, ptr %out, align 8 +; SSE42: Cost of 2 for VF 2: profitable to scalarize store i64 %valB, ptr %out, align 8 +; SSE42: Cost of 4 for VF 4: profitable to scalarize store i64 %valB, ptr %out, align 8 +; SSE42: Cost of 8 for VF 8: profitable to scalarize store i64 %valB, ptr %out, align 8 +; SSE42: Cost of 16 for VF 16: profitable to scalarize store i64 %valB, ptr %out, align 8 ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8 -; AVX1: LV: Found an estimated cost of 8 for VF 2 For instruction: store i64 %valB, ptr %out, align 8 -; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: store i64 %valB, ptr %out, align 8 -; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: store i64 %valB, ptr %out, align 8 -; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: store i64 %valB, ptr %out, align 8 -; AVX1: LV: Found an estimated cost of 64 for VF 32 For instruction: store i64 %valB, ptr %out, align 8 +; AVX1: Cost of 8 for VF 2: WIDEN store vp<[[VP7:%[0-9]+]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX1: Cost of 8 for VF 4: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX1: Cost of 16 for VF 8: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX1: Cost of 32 for VF 16: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX1: Cost of 64 for VF 32: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8 -; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: store i64 %valB, ptr %out, align 8 -; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: store i64 %valB, ptr %out, align 8 -; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: store i64 %valB, ptr %out, align 8 -; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: store i64 %valB, ptr %out, align 8 -; AVX2: LV: Found an estimated cost of 64 for VF 32 For instruction: store i64 %valB, ptr %out, align 8 +; AVX2: Cost of 8 for VF 2: WIDEN store vp<[[VP7:%[0-9]+]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX2: Cost of 8 for VF 4: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX2: Cost of 16 for VF 8: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX2: Cost of 32 for VF 16: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX2: Cost of 64 for VF 32: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 2 for VF 16 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 4 for VF 32 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 8 for VF 64 For instruction: store i64 %valB, ptr %out, align 8 +; AVX512: Cost of 1 for VF 2: WIDEN store vp<[[VP7:%[0-9]+]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX512: Cost of 1 for VF 4: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX512: Cost of 1 for VF 8: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX512: Cost of 2 for VF 16: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX512: Cost of 4 for VF 32: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) +; AVX512: Cost of 8 for VF 64: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}}) ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i8.ll index 8bbe624849783..862a572643895 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i8.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %valB, ptr %out, align 1" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i8 %valB, ptr %out" --filter "Cost of [1-9][0-9]* for VF [0-9]+: (profitable to scalarize\s+store i8 %valB|WIDEN store .*, ir<%valB>|REPLICATE store ir<%valB>)" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 @@ -17,42 +17,42 @@ target triple = "x86_64-unknown-linux-gnu" define void @test(ptr %C) { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1 -; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, ptr %out, align 1 -; SSE2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i8 %valB, ptr %out, align 1 -; SSE2: LV: Found an estimated cost of 11 for VF 8 For instruction: store i8 %valB, ptr %out, align 1 -; SSE2: LV: Found an estimated cost of 23 for VF 16 For instruction: store i8 %valB, ptr %out, align 1 +; SSE2: Cost of 2 for VF 2: profitable to scalarize store i8 %valB, ptr %out, align 1 +; SSE2: Cost of 5 for VF 4: profitable to scalarize store i8 %valB, ptr %out, align 1 +; SSE2: Cost of 11 for VF 8: profitable to scalarize store i8 %valB, ptr %out, align 1 +; SSE2: Cost of 23 for VF 16: profitable to scalarize store i8 %valB, ptr %out, align 1 ; ; SSE42-LABEL: 'test' ; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1 -; SSE42: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, ptr %out, align 1 -; SSE42: LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %valB, ptr %out, align 1 -; SSE42: LV: Found an estimated cost of 8 for VF 8 For instruction: store i8 %valB, ptr %out, align 1 -; SSE42: LV: Found an estimated cost of 16 for VF 16 For instruction: store i8 %valB, ptr %out, align 1 +; SSE42: Cost of 2 for VF 2: profitable to scalarize store i8 %valB, ptr %out, align 1 +; SSE42: Cost of 4 for VF 4: profitable to scalarize store i8 %valB, ptr %out, align 1 +; SSE42: Cost of 8 for VF 8: profitable to scalarize store i8 %valB, ptr %out, align 1 +; SSE42: Cost of 16 for VF 16: profitable to scalarize store i8 %valB, ptr %out, align 1 ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1 -; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, ptr %out, align 1 -; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %valB, ptr %out, align 1 -; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i8 %valB, ptr %out, align 1 -; AVX1: LV: Found an estimated cost of 16 for VF 16 For instruction: store i8 %valB, ptr %out, align 1 -; AVX1: LV: Found an estimated cost of 32 for VF 32 For instruction: store i8 %valB, ptr %out, align 1 +; AVX1: Cost of 2 for VF 2: profitable to scalarize store i8 %valB, ptr %out, align 1 +; AVX1: Cost of 4 for VF 4: profitable to scalarize store i8 %valB, ptr %out, align 1 +; AVX1: Cost of 8 for VF 8: profitable to scalarize store i8 %valB, ptr %out, align 1 +; AVX1: Cost of 16 for VF 16: profitable to scalarize store i8 %valB, ptr %out, align 1 +; AVX1: Cost of 32 for VF 32: profitable to scalarize store i8 %valB, ptr %out, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1 -; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, ptr %out, align 1 -; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %valB, ptr %out, align 1 -; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i8 %valB, ptr %out, align 1 -; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: store i8 %valB, ptr %out, align 1 -; AVX2: LV: Found an estimated cost of 32 for VF 32 For instruction: store i8 %valB, ptr %out, align 1 +; AVX2: Cost of 2 for VF 2: profitable to scalarize store i8 %valB, ptr %out, align 1 +; AVX2: Cost of 4 for VF 4: profitable to scalarize store i8 %valB, ptr %out, align 1 +; AVX2: Cost of 8 for VF 8: profitable to scalarize store i8 %valB, ptr %out, align 1 +; AVX2: Cost of 16 for VF 16: profitable to scalarize store i8 %valB, ptr %out, align 1 +; AVX2: Cost of 32 for VF 32: profitable to scalarize store i8 %valB, ptr %out, align 1 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1 -; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, ptr %out, align 1 -; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %valB, ptr %out, align 1 -; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %valB, ptr %out, align 1 -; AVX512: LV: Found an estimated cost of 1 for VF 16 For instruction: store i8 %valB, ptr %out, align 1 -; AVX512: LV: Found an estimated cost of 1 for VF 32 For instruction: store i8 %valB, ptr %out, align 1 -; AVX512: LV: Found an estimated cost of 1 for VF 64 For instruction: store i8 %valB, ptr %out, align 1 +; AVX512: Cost of 2 for VF 2: WIDEN store vp<[[VP7:%[0-9]+]]>, ir<%valB>, ir<%canStore> +; AVX512: Cost of 2 for VF 4: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> +; AVX512: Cost of 2 for VF 8: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> +; AVX512: Cost of 1 for VF 16: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> +; AVX512: Cost of 1 for VF 32: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> +; AVX512: Cost of 1 for VF 64: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i16-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i16-with-i8-index.ll index bd6b16831d09c..aa89dbbcd72ac 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i16-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i16-with-i8-index.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %valB, ptr %out, align 2" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i16 %valB, ptr %out" --filter "Cost of [0-9]+ for VF [0-9]+: (WIDEN store|REPLICATE store ir<%valB>)" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 @@ -18,42 +18,42 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2 -; SSE2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i16 %valB, ptr %out, align 2 -; SSE2: LV: Found an estimated cost of 56 for VF 4 For instruction: store i16 %valB, ptr %out, align 2 -; SSE2: LV: Found an estimated cost of 112 for VF 8 For instruction: store i16 %valB, ptr %out, align 2 -; SSE2: LV: Found an estimated cost of 224 for VF 16 For instruction: store i16 %valB, ptr %out, align 2 +; SSE2: Cost of 28 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; SSE2: Cost of 56 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; SSE2: Cost of 112 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; SSE2: Cost of 224 for VF 16: REPLICATE store ir<%valB>, ir<%out> ; ; SSE42-LABEL: 'test' ; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2 -; SSE42: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %valB, ptr %out, align 2 -; SSE42: LV: Found an estimated cost of 52 for VF 4 For instruction: store i16 %valB, ptr %out, align 2 -; SSE42: LV: Found an estimated cost of 104 for VF 8 For instruction: store i16 %valB, ptr %out, align 2 -; SSE42: LV: Found an estimated cost of 208 for VF 16 For instruction: store i16 %valB, ptr %out, align 2 +; SSE42: Cost of 26 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; SSE42: Cost of 52 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; SSE42: Cost of 104 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; SSE42: Cost of 208 for VF 16: REPLICATE store ir<%valB>, ir<%out> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2 -; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %valB, ptr %out, align 2 -; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i16 %valB, ptr %out, align 2 -; AVX1: LV: Found an estimated cost of 106 for VF 8 For instruction: store i16 %valB, ptr %out, align 2 -; AVX1: LV: Found an estimated cost of 213 for VF 16 For instruction: store i16 %valB, ptr %out, align 2 -; AVX1: LV: Found an estimated cost of 426 for VF 32 For instruction: store i16 %valB, ptr %out, align 2 +; AVX1: Cost of 26 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; AVX1: Cost of 53 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; AVX1: Cost of 106 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; AVX1: Cost of 213 for VF 16: REPLICATE store ir<%valB>, ir<%out> +; AVX1: Cost of 426 for VF 32: REPLICATE store ir<%valB>, ir<%out> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2 -; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %valB, ptr %out, align 2 -; AVX2: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %valB, ptr %out, align 2 -; AVX2: LV: Found an estimated cost of 26 for VF 8 For instruction: store i16 %valB, ptr %out, align 2 -; AVX2: LV: Found an estimated cost of 53 for VF 16 For instruction: store i16 %valB, ptr %out, align 2 -; AVX2: LV: Found an estimated cost of 106 for VF 32 For instruction: store i16 %valB, ptr %out, align 2 +; AVX2: Cost of 6 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; AVX2: Cost of 13 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; AVX2: Cost of 26 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; AVX2: Cost of 53 for VF 16: REPLICATE store ir<%valB>, ir<%out> +; AVX2: Cost of 106 for VF 32: REPLICATE store ir<%valB>, ir<%out> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2 -; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %valB, ptr %out, align 2 -; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %valB, ptr %out, align 2 -; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %valB, ptr %out, align 2 -; AVX512: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %valB, ptr %out, align 2 -; AVX512: LV: Found an estimated cost of 111 for VF 32 For instruction: store i16 %valB, ptr %out, align 2 -; AVX512: LV: Found an estimated cost of 222 for VF 64 For instruction: store i16 %valB, ptr %out, align 2 +; AVX512: Cost of 6 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 13 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 27 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 55 for VF 16: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 111 for VF 32: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 222 for VF 64: REPLICATE store ir<%valB>, ir<%out> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i32-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i32-with-i8-index.ll index de76eb0782c0d..816ddfbadc4d1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i32-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i32-with-i8-index.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %valB, ptr %out, align 4" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i32 %valB, ptr %out" --filter "Cost of [0-9]+ for VF [0-9]+: (WIDEN store|REPLICATE store ir<%valB>)" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 @@ -18,42 +18,42 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4 -; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction: store i32 %valB, ptr %out, align 4 -; SSE2: LV: Found an estimated cost of 59 for VF 4 For instruction: store i32 %valB, ptr %out, align 4 -; SSE2: LV: Found an estimated cost of 118 for VF 8 For instruction: store i32 %valB, ptr %out, align 4 -; SSE2: LV: Found an estimated cost of 236 for VF 16 For instruction: store i32 %valB, ptr %out, align 4 +; SSE2: Cost of 29 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; SSE2: Cost of 59 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; SSE2: Cost of 118 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; SSE2: Cost of 236 for VF 16: REPLICATE store ir<%valB>, ir<%out> ; ; SSE42-LABEL: 'test' ; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4 -; SSE42: LV: Found an estimated cost of 26 for VF 2 For instruction: store i32 %valB, ptr %out, align 4 -; SSE42: LV: Found an estimated cost of 52 for VF 4 For instruction: store i32 %valB, ptr %out, align 4 -; SSE42: LV: Found an estimated cost of 104 for VF 8 For instruction: store i32 %valB, ptr %out, align 4 -; SSE42: LV: Found an estimated cost of 208 for VF 16 For instruction: store i32 %valB, ptr %out, align 4 +; SSE42: Cost of 26 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; SSE42: Cost of 52 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; SSE42: Cost of 104 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; SSE42: Cost of 208 for VF 16: REPLICATE store ir<%valB>, ir<%out> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4 -; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i32 %valB, ptr %out, align 4 -; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i32 %valB, ptr %out, align 4 -; AVX1: LV: Found an estimated cost of 107 for VF 8 For instruction: store i32 %valB, ptr %out, align 4 -; AVX1: LV: Found an estimated cost of 214 for VF 16 For instruction: store i32 %valB, ptr %out, align 4 -; AVX1: LV: Found an estimated cost of 428 for VF 32 For instruction: store i32 %valB, ptr %out, align 4 +; AVX1: Cost of 26 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; AVX1: Cost of 53 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; AVX1: Cost of 107 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; AVX1: Cost of 214 for VF 16: REPLICATE store ir<%valB>, ir<%out> +; AVX1: Cost of 428 for VF 32: REPLICATE store ir<%valB>, ir<%out> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4 -; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %valB, ptr %out, align 4 -; AVX2: LV: Found an estimated cost of 13 for VF 4 For instruction: store i32 %valB, ptr %out, align 4 -; AVX2: LV: Found an estimated cost of 27 for VF 8 For instruction: store i32 %valB, ptr %out, align 4 -; AVX2: LV: Found an estimated cost of 54 for VF 16 For instruction: store i32 %valB, ptr %out, align 4 -; AVX2: LV: Found an estimated cost of 108 for VF 32 For instruction: store i32 %valB, ptr %out, align 4 +; AVX2: Cost of 6 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; AVX2: Cost of 13 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; AVX2: Cost of 27 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; AVX2: Cost of 54 for VF 16: REPLICATE store ir<%valB>, ir<%out> +; AVX2: Cost of 108 for VF 32: REPLICATE store ir<%valB>, ir<%out> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: store i32 %valB, ptr %out, align 4 -; AVX512: LV: Found an estimated cost of 72 for VF 64 For instruction: store i32 %valB, ptr %out, align 4 +; AVX512: Cost of 6 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 13 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out>, ir<%valB> +; AVX512: Cost of 18 for VF 16: WIDEN store ir<%out>, ir<%valB> +; AVX512: Cost of 36 for VF 32: WIDEN store ir<%out>, ir<%valB> +; AVX512: Cost of 72 for VF 64: WIDEN store ir<%out>, ir<%valB> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i64-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i64-with-i8-index.ll index c69711d1b71d5..64c41c2a31311 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i64-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i64-with-i8-index.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %valB, ptr %out, align 8" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i64 %valB, ptr %out" --filter "Cost of [0-9]+ for VF [0-9]+: (WIDEN store|REPLICATE store ir<%valB>)" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 @@ -18,42 +18,42 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8 -; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction: store i64 %valB, ptr %out, align 8 -; SSE2: LV: Found an estimated cost of 58 for VF 4 For instruction: store i64 %valB, ptr %out, align 8 -; SSE2: LV: Found an estimated cost of 116 for VF 8 For instruction: store i64 %valB, ptr %out, align 8 -; SSE2: LV: Found an estimated cost of 232 for VF 16 For instruction: store i64 %valB, ptr %out, align 8 +; SSE2: Cost of 29 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; SSE2: Cost of 58 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; SSE2: Cost of 116 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; SSE2: Cost of 232 for VF 16: REPLICATE store ir<%valB>, ir<%out> ; ; SSE42-LABEL: 'test' ; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8 -; SSE42: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %valB, ptr %out, align 8 -; SSE42: LV: Found an estimated cost of 52 for VF 4 For instruction: store i64 %valB, ptr %out, align 8 -; SSE42: LV: Found an estimated cost of 104 for VF 8 For instruction: store i64 %valB, ptr %out, align 8 -; SSE42: LV: Found an estimated cost of 208 for VF 16 For instruction: store i64 %valB, ptr %out, align 8 +; SSE42: Cost of 26 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; SSE42: Cost of 52 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; SSE42: Cost of 104 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; SSE42: Cost of 208 for VF 16: REPLICATE store ir<%valB>, ir<%out> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8 -; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %valB, ptr %out, align 8 -; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i64 %valB, ptr %out, align 8 -; AVX1: LV: Found an estimated cost of 108 for VF 8 For instruction: store i64 %valB, ptr %out, align 8 -; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: store i64 %valB, ptr %out, align 8 -; AVX1: LV: Found an estimated cost of 432 for VF 32 For instruction: store i64 %valB, ptr %out, align 8 +; AVX1: Cost of 26 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; AVX1: Cost of 54 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; AVX1: Cost of 108 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; AVX1: Cost of 216 for VF 16: REPLICATE store ir<%valB>, ir<%out> +; AVX1: Cost of 432 for VF 32: REPLICATE store ir<%valB>, ir<%out> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8 -; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i64 %valB, ptr %out, align 8 -; AVX2: LV: Found an estimated cost of 14 for VF 4 For instruction: store i64 %valB, ptr %out, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: store i64 %valB, ptr %out, align 8 -; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction: store i64 %valB, ptr %out, align 8 -; AVX2: LV: Found an estimated cost of 112 for VF 32 For instruction: store i64 %valB, ptr %out, align 8 +; AVX2: Cost of 6 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; AVX2: Cost of 14 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; AVX2: Cost of 28 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; AVX2: Cost of 56 for VF 16: REPLICATE store ir<%valB>, ir<%out> +; AVX2: Cost of 112 for VF 32: REPLICATE store ir<%valB>, ir<%out> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: store i64 %valB, ptr %out, align 8 -; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: store i64 %valB, ptr %out, align 8 +; AVX512: Cost of 6 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 14 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out>, ir<%valB> +; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out>, ir<%valB> +; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out>, ir<%valB> +; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out>, ir<%valB> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i8-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i8-with-i8-index.ll index 4f62383d67927..1be3a62186e16 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i8-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i8-with-i8-index.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %valB, ptr %out, align 1" +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i8 %valB, ptr %out" --filter "Cost of [0-9]+ for VF [0-9]+: (WIDEN store|REPLICATE store ir<%valB>)" ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 @@ -18,42 +18,42 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; SSE2-LABEL: 'test' ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1 -; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction: store i8 %valB, ptr %out, align 1 -; SSE2: LV: Found an estimated cost of 59 for VF 4 For instruction: store i8 %valB, ptr %out, align 1 -; SSE2: LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %valB, ptr %out, align 1 -; SSE2: LV: Found an estimated cost of 239 for VF 16 For instruction: store i8 %valB, ptr %out, align 1 +; SSE2: Cost of 29 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; SSE2: Cost of 59 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; SSE2: Cost of 119 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; SSE2: Cost of 239 for VF 16: REPLICATE store ir<%valB>, ir<%out> ; ; SSE42-LABEL: 'test' ; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1 -; SSE42: LV: Found an estimated cost of 26 for VF 2 For instruction: store i8 %valB, ptr %out, align 1 -; SSE42: LV: Found an estimated cost of 52 for VF 4 For instruction: store i8 %valB, ptr %out, align 1 -; SSE42: LV: Found an estimated cost of 104 for VF 8 For instruction: store i8 %valB, ptr %out, align 1 -; SSE42: LV: Found an estimated cost of 208 for VF 16 For instruction: store i8 %valB, ptr %out, align 1 +; SSE42: Cost of 26 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; SSE42: Cost of 52 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; SSE42: Cost of 104 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; SSE42: Cost of 208 for VF 16: REPLICATE store ir<%valB>, ir<%out> ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1 -; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i8 %valB, ptr %out, align 1 -; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i8 %valB, ptr %out, align 1 -; AVX1: LV: Found an estimated cost of 106 for VF 8 For instruction: store i8 %valB, ptr %out, align 1 -; AVX1: LV: Found an estimated cost of 212 for VF 16 For instruction: store i8 %valB, ptr %out, align 1 -; AVX1: LV: Found an estimated cost of 425 for VF 32 For instruction: store i8 %valB, ptr %out, align 1 +; AVX1: Cost of 26 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; AVX1: Cost of 53 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; AVX1: Cost of 106 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; AVX1: Cost of 212 for VF 16: REPLICATE store ir<%valB>, ir<%out> +; AVX1: Cost of 425 for VF 32: REPLICATE store ir<%valB>, ir<%out> ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1 -; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 %valB, ptr %out, align 1 -; AVX2: LV: Found an estimated cost of 13 for VF 4 For instruction: store i8 %valB, ptr %out, align 1 -; AVX2: LV: Found an estimated cost of 26 for VF 8 For instruction: store i8 %valB, ptr %out, align 1 -; AVX2: LV: Found an estimated cost of 52 for VF 16 For instruction: store i8 %valB, ptr %out, align 1 -; AVX2: LV: Found an estimated cost of 105 for VF 32 For instruction: store i8 %valB, ptr %out, align 1 +; AVX2: Cost of 6 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; AVX2: Cost of 13 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; AVX2: Cost of 26 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; AVX2: Cost of 52 for VF 16: REPLICATE store ir<%valB>, ir<%out> +; AVX2: Cost of 105 for VF 32: REPLICATE store ir<%valB>, ir<%out> ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1 -; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 %valB, ptr %out, align 1 -; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: store i8 %valB, ptr %out, align 1 -; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: store i8 %valB, ptr %out, align 1 -; AVX512: LV: Found an estimated cost of 54 for VF 16 For instruction: store i8 %valB, ptr %out, align 1 -; AVX512: LV: Found an estimated cost of 109 for VF 32 For instruction: store i8 %valB, ptr %out, align 1 -; AVX512: LV: Found an estimated cost of 219 for VF 64 For instruction: store i8 %valB, ptr %out, align 1 +; AVX512: Cost of 6 for VF 2: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 13 for VF 4: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 27 for VF 8: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 54 for VF 16: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 109 for VF 32: REPLICATE store ir<%valB>, ir<%out> +; AVX512: Cost of 219 for VF 64: REPLICATE store ir<%valB>, ir<%out> ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i16.ll index 46150a6955566..3e1c0d47b5f57 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i16.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i16.ll @@ -10,11 +10,11 @@ target triple = "x86_64-unknown-linux-gnu" define void @load_i16_stride2() { ;CHECK-LABEL: load_i16_stride2 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 32 For instruction: %1 = load +;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 2 at %1, +;CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 2 at %1, +;CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %1, +;CHECK: Cost of 2 for VF 16: INTERLEAVE-GROUP with factor 2 at %1, +;CHECK: Cost of 3 for VF 32: INTERLEAVE-GROUP with factor 2 at %1, entry: br label %for.body @@ -36,11 +36,11 @@ for.end: ; preds = %for.body define void @load_i16_stride3() { ;CHECK-LABEL: load_i16_stride3 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 5 for VF 32 For instruction: %1 = load +;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 3 at %1, +;CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 3 at %1, +;CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 3 at %1, +;CHECK: Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 3 at %1, +;CHECK: Cost of 5 for VF 32: INTERLEAVE-GROUP with factor 3 at %1, entry: br label %for.body @@ -62,11 +62,11 @@ for.end: ; preds = %for.body define void @load_i16_stride4() { ;CHECK-LABEL: load_i16_stride4 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 8 for VF 32 For instruction: %1 = load +;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 4 at %1, +;CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 4 at %1, +;CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 4 at %1, +;CHECK: Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 4 at %1, +;CHECK: Cost of 8 for VF 32: INTERLEAVE-GROUP with factor 4 at %1, entry: br label %for.body @@ -88,11 +88,11 @@ for.end: ; preds = %for.body define void @load_i16_stride5() { ;CHECK-LABEL: load_i16_stride5 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 5 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 10 for VF 32 For instruction: %1 = load +;CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 5 at %1, +;CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 5 at %1, +;CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 5 at %1, +;CHECK: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 5 at %1, +;CHECK: Cost of 10 for VF 32: INTERLEAVE-GROUP with factor 5 at %1, entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i32.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i32.ll index c88bbffca8213..2aa6686a5dcc3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i32.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i32.ll @@ -10,10 +10,10 @@ target triple = "x86_64-unknown-linux-gnu" define void @load_int_stride2() { ;CHECK-LABEL: load_int_stride2 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 16 For instruction: %1 = load +;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 2 at %1, +;CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 2 at %1, +;CHECK: Cost of 1 for VF 8: INTERLEAVE-GROUP with factor 2 at %1, +;CHECK: Cost of 2 for VF 16: INTERLEAVE-GROUP with factor 2 at %1, entry: br label %for.body @@ -35,10 +35,10 @@ for.end: ; preds = %for.body define void @load_int_stride3() { ;CHECK-LABEL: load_int_stride3 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 16 For instruction: %1 = load +;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 3 at %1, +;CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 3 at %1, +;CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 3 at %1, +;CHECK: Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 3 at %1, entry: br label %for.body @@ -60,10 +60,10 @@ for.end: ; preds = %for.body define void @load_int_stride4() { ;CHECK-LABEL: load_int_stride4 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 5 for VF 16 For instruction: %1 = load +;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 4 at %1, +;CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 4 at %1, +;CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 4 at %1, +;CHECK: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 4 at %1, entry: br label %for.body @@ -85,10 +85,10 @@ for.end: ; preds = %for.body define void @load_int_stride5() { ;CHECK-LABEL: load_int_stride5 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 6 for VF 16 For instruction: %1 = load +;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 5 at %1, +;CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 5 at %1, +;CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 5 at %1, +;CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 5 at %1, entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i64.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i64.ll index 21f6128f6fca5..fece9ddd655d9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i64.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i64.ll @@ -10,9 +10,9 @@ target triple = "x86_64-unknown-linux-gnu" define void @load_i64_stride2() { ;CHECK-LABEL: load_i64_stride2 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load +;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 2 at %1, +;CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 2 at %1, +;CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %1, entry: br label %for.body @@ -34,9 +34,9 @@ for.end: ; preds = %for.body define void @load_i64_stride3() { ;CHECK-LABEL: load_i64_stride3 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 8 For instruction: %1 = load +;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 3 at %1, +;CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 3 at %1, +;CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 3 at %1, entry: br label %for.body @@ -58,9 +58,9 @@ for.end: ; preds = %for.body define void @load_i64_stride4() { ;CHECK-LABEL: load_i64_stride4 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 5 for VF 8 For instruction: %1 = load +;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 4 at %1, +;CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 4 at %1, +;CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 4 at %1, entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i8.ll index 022a1374f1bbe..ae0a2f793feb3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i8.ll @@ -10,12 +10,12 @@ target triple = "x86_64-unknown-linux-gnu" define void @load_i8_stride2() { ;CHECK-LABEL: load_i8_stride2 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 4 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 8 for VF 32 For instruction: %1 = load -;CHECK: Found an estimated cost of 20 for VF 64 For instruction: %1 = load +;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 2 at %1, +;CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 2 at %1, +;CHECK: Cost of 1 for VF 8: INTERLEAVE-GROUP with factor 2 at %1, +;CHECK: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at %1, +;CHECK: Cost of 8 for VF 32: INTERLEAVE-GROUP with factor 2 at %1, +;CHECK: Cost of 20 for VF 64: INTERLEAVE-GROUP with factor 2 at %1, entry: br label %for.body @@ -37,12 +37,12 @@ for.end: ; preds = %for.body define void @load_i8_stride3() { ;CHECK-LABEL: load_i8_stride3 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 4 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 13 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 16 for VF 32 For instruction: %1 = load -;CHECK: Found an estimated cost of 25 for VF 64 For instruction: %1 = load +;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 3 at %1, +;CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 3 at %1, +;CHECK: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 3 at %1, +;CHECK: Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 3 at %1, +;CHECK: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 3 at %1, +;CHECK: Cost of 25 for VF 64: INTERLEAVE-GROUP with factor 3 at %1, entry: br label %for.body @@ -64,12 +64,12 @@ for.end: ; preds = %for.body define void @load_i8_stride4() { ;CHECK-LABEL: load_i8_stride4 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 4 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 8 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 20 for VF 32 For instruction: %1 = load -;CHECK: Found an estimated cost of 59 for VF 64 For instruction: %1 = load +;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 4 at %1, +;CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 4 at %1, +;CHECK: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 4 at %1, +;CHECK: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 4 at %1, +;CHECK: Cost of 20 for VF 32: INTERLEAVE-GROUP with factor 4 at %1, +;CHECK: Cost of 59 for VF 64: INTERLEAVE-GROUP with factor 4 at %1, entry: br label %for.body @@ -91,12 +91,12 @@ for.end: ; preds = %for.body define void @load_i8_stride5() { ;CHECK-LABEL: load_i8_stride5 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 4 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 8 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 20 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 39 for VF 32 For instruction: %1 = load -;CHECK: Found an estimated cost of 78 for VF 64 For instruction: %1 = load +;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 5 at %1, +;CHECK: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 5 at %1, +;CHECK: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 5 at %1, +;CHECK: Cost of 20 for VF 16: INTERLEAVE-GROUP with factor 5 at %1, +;CHECK: Cost of 39 for VF 32: INTERLEAVE-GROUP with factor 5 at %1, +;CHECK: Cost of 78 for VF 64: INTERLEAVE-GROUP with factor 5 at %1, entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll index 3c39a1d4e2463..83955f1a188bd 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll @@ -12,7 +12,7 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) ; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32 ; CHECK: Cost of 0 for VF 2: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost of 0 for VF 2: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<%0> -; CHECK: Cost of 0 for VF 2: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK: Cost of 0 for VF 2: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>, vp<%0> ; CHECK: Cost of 0 for VF 2: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4> ; CHECK: Cost of 0 for VF 2: vp<%5> = vector-pointer inbounds ir<%g.src> ; CHECK: Cost of 1 for VF 2: WIDEN ir<%l> = load vp<%5> @@ -25,12 +25,20 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) ; CHECK: Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1> ; CHECK: Cost of 0 for VF 2: EMIT branch-on-count vp<%index.next>, vp<%2> ; CHECK: Cost of 0 for VF 2: vector loop backedge +; CHECK: Cost of 0 for VF 2: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%2>, middle.block ], [ ir<0>, ir-bb ] +; CHECK: Cost of 0 for VF 2: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] (extra operand: vp<%bc.resume.val> from scalar.ph) +; CHECK: Cost of 0 for VF 2: IR %g.src = getelementptr inbounds i64, ptr %src, i64 %iv +; CHECK: Cost of 0 for VF 2: IR %l = load i64, ptr %g.src, align 8 +; CHECK: Cost of 0 for VF 2: IR %iv.4 = add nuw nsw i64 %iv, 4 +; CHECK: Cost of 0 for VF 2: IR %c = icmp ule i64 %l, 128 +; CHECK: Cost of 0 for VF 2: EMIT vp<%cmp.n> = icmp eq ir<32>, vp<%2> +; CHECK: Cost of 0 for VF 2: EMIT branch-on-cond vp<%cmp.n> ; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1 ; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] ; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32 ; CHECK: Cost of 0 for VF 4: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost of 0 for VF 4: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<%0> -; CHECK: Cost of 0 for VF 4: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK: Cost of 0 for VF 4: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>, vp<%0> ; CHECK: Cost of 0 for VF 4: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4> ; CHECK: Cost of 0 for VF 4: vp<%5> = vector-pointer inbounds ir<%g.src> ; CHECK: Cost of 1 for VF 4: WIDEN ir<%l> = load vp<%5> @@ -43,9 +51,16 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) ; CHECK: Cost of 0 for VF 4: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1> ; CHECK: Cost of 0 for VF 4: EMIT branch-on-count vp<%index.next>, vp<%2> ; CHECK: Cost of 0 for VF 4: vector loop backedge -; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1 -; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] -; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32 +; CHECK: Cost of 0 for VF 4: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%2>, middle.block ], [ ir<0>, ir-bb ] +; CHECK: Cost of 0 for VF 4: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] (extra operand: vp<%bc.resume.val> from scalar.ph) +; CHECK: Cost of 0 for VF 4: IR %g.src = getelementptr inbounds i64, ptr %src, i64 %iv +; CHECK: Cost of 0 for VF 4: IR %l = load i64, ptr %g.src, align 8 +; CHECK: Cost of 0 for VF 4: IR %iv.4 = add nuw nsw i64 %iv, 4 +; CHECK: Cost of 0 for VF 4: IR %c = icmp ule i64 %l, 128 +; CHECK: Cost of 0 for VF 4: EMIT vp<%cmp.n> = icmp eq ir<32>, vp<%2> +; CHECK: Cost of 0 for VF 4: EMIT branch-on-cond vp<%cmp.n> +; CHECK: Cost of 0 for VF 4: EMIT vp<%cmp.n> = icmp eq ir<32>, vp<%2> +; CHECK: Cost of 0 for VF 4: EMIT branch-on-cond vp<%cmp.n> ; entry: br label %loop.header