diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/binop-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/binop-costs.ll
index ff1dee41e62bf..8e4c6d470c9be 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/binop-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/binop-costs.ll
@@ -17,12 +17,6 @@ define void @udiv_rhs_opt_cost(ptr %dst) #0 {
 ; CHECK:  Cost of 0 for VF vscale x 2: IR %div = udiv i8 %iv.trunc, 3
 ; CHECK:  Cost of 5 for VF vscale x 4: CLONE ir<%div> = udiv vp<[[VP7]]>, ir<3>
 ; CHECK:  Cost of 0 for VF vscale x 4: IR %div = udiv i8 %iv.trunc, 3
-; CHECK:  LV: Found an estimated cost of 5 for VF 1 For instruction: %div = udiv i8 %iv.trunc, 3
-; CHECK:  LV: Found an estimated cost of 5 for VF 2 For instruction: %div = udiv i8 %iv.trunc, 3
-; CHECK:  LV: Found an estimated cost of 5 for VF 4 For instruction: %div = udiv i8 %iv.trunc, 3
-; CHECK:  LV: Found an estimated cost of 5 for VF vscale x 1 For instruction: %div = udiv i8 %iv.trunc, 3
-; CHECK:  LV: Found an estimated cost of 5 for VF vscale x 2 For instruction: %div = udiv i8 %iv.trunc, 3
-; CHECK:  LV: Found an estimated cost of 5 for VF vscale x 4 For instruction: %div = udiv i8 %iv.trunc, 3
 ;
 entry:
   br label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll
index a44a16455445c..bcd1c28318450 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll
@@ -8,6 +8,7 @@ target triple = "aarch64-unknown-linux-gnu"
 
 define void @zext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
 ; CHECK-COST-LABEL: LV: Checking a loop in 'zext_i8_i16'
+; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv = zext i8 %0 to i32
 ; CHECK-COST: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i16
 ; CHECK-COST: Cost of 1 for VF 4: WIDEN-CAST ir<%conv> = zext ir<%0> to i16
 ; CHECK-COST: Cost of 1 for VF 8: WIDEN-CAST ir<%conv> = zext ir<%0> to i16
@@ -16,7 +17,6 @@ define void @zext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur
 ; CHECK-COST: Cost of 1 for VF vscale x 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i16
 ; CHECK-COST: Cost of 1 for VF vscale x 4: WIDEN-CAST ir<%conv> = zext ir<%0> to i16
 ; CHECK-COST: Cost of 0 for VF vscale x 8: WIDEN-CAST ir<%conv> = zext ir<%0> to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv = zext i8 %0 to i32
 ; CHECK-LABEL: define void @zext_i8_i16
 ; CHECK-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i32 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
index 99139da67bb78..b0738cad80064 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
@@ -84,22 +84,26 @@ define void @goo(ptr nocapture noundef %a, i32 noundef signext %n) {
 ; CHECK-SCALAR:      LV(REG): VF = 1
 ; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 1 item
 ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
-; CHECK-LMUL1:       LV(REG): VF = vscale x 2
+; CHECK-LMUL1-LABEL: goo
+; CHECK-LMUL1:       LV(REG): VF = vscale x 1
 ; CHECK-LMUL1-NEXT:  LV(REG): Found max usage: 2 item
-; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
-; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
-; CHECK-LMUL2:       LV(REG): VF = vscale x 4
+; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
+; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 1 registers
+; CHECK-LMUL2-LABEL: goo
+; CHECK-LMUL2:       LV(REG): VF = vscale x 2
 ; CHECK-LMUL2-NEXT:  LV(REG): Found max usage: 2 item
-; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
-; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
-; CHECK-LMUL4:       LV(REG): VF = vscale x 8
+; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
+; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
+; CHECK-LMUL4-LABEL: goo
+; CHECK-LMUL4:       LV(REG): VF = vscale x 4
 ; CHECK-LMUL4-NEXT:  LV(REG): Found max usage: 2 item
-; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
-; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 8 registers
-; CHECK-LMUL8:       LV(REG): VF = vscale x 16
+; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
+; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
+; CHECK-LMUL8-LABEL: goo
+; CHECK-LMUL8:       LV(REG): VF = vscale x 8
 ; CHECK-LMUL8-NEXT:  LV(REG): Found max usage: 2 item
-; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
-; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 16 registers
+; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
+; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 8 registers
 entry:
   %cmp3 = icmp sgt i32 %n, 0
   br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction-cost.ll
index 10d83f4ad125e..fe39700d1787c 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction-cost.ll
@@ -3,8 +3,8 @@
 ; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
 ; RUN: -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck %s
 
+; CHECK: Cost of 0 for VF vscale x 4: WIDEN-REDUCTION-PHI ir<%rdx> = phi
 ; CHECK: Cost of 2 for VF vscale x 4: WIDEN-INTRINSIC vp<%{{.+}}> = call llvm.vp.merge(ir<true>, ir<%add>, ir<%rdx>, vp<%{{.+}}>)
-; CHECK: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction:   %rdx = phi i32 [ %start, %entry ], [ %add, %loop ]
 
 define i32 @add(ptr %a, i64 %n, i32 %start) {
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/WebAssembly/int-mac-reduction-costs.ll b/llvm/test/Transforms/LoopVectorize/WebAssembly/int-mac-reduction-costs.ll
index d23c2272d9c0d..9f824d1a963eb 100644
--- a/llvm/test/Transforms/LoopVectorize/WebAssembly/int-mac-reduction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/WebAssembly/int-mac-reduction-costs.ll
@@ -11,17 +11,17 @@ define hidden i32 @i32_mac_s8(ptr nocapture noundef readonly %a, ptr nocapture n
 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv2 = sext i8 %1 to i32
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul nsw i32 %conv2, %conv
 
-; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction:   %0 = load i8, ptr %arrayidx, align 1
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %conv = sext i8 %0 to i32
-; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction:   %1 = load i8, ptr %arrayidx1, align 1
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %conv2 = sext i8 %1 to i32
-; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %mul = mul nsw i32 %conv2, %conv
-
-; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %0 = load i8, ptr %arrayidx, align 1
-; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %conv = sext i8 %0 to i32
-; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load i8, ptr %arrayidx1, align 1
-; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %conv2 = sext i8 %1 to i32
-; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %mul = mul nsw i32 %conv2, %conv
+; CHECK: Cost of 3 for VF 2: WIDEN ir<%0> = load
+; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv> = sext ir<%0> to i32
+; CHECK: Cost of 3 for VF 2: WIDEN ir<%1> = load
+; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv2> = sext ir<%1> to i32
+; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nsw ir<%conv2>, ir<%conv>
+
+; CHECK: Cost of 2 for VF 4: WIDEN ir<%0> = load
+; CHECK: Cost of 1 for VF 4: WIDEN-CAST ir<%conv> = sext ir<%0> to i32
+; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load
+; CHECK: Cost of 1 for VF 4: WIDEN-CAST ir<%conv2> = sext ir<%1> to i32
+; CHECK: Cost of 1 for VF 4: WIDEN ir<%mul> = mul nsw ir<%conv2>, ir<%conv>
 ; CHECK: LV: Selecting VF: 4.
 entry:
   %cmp7.not = icmp eq i32 %N, 0
@@ -55,17 +55,17 @@ define hidden i32 @i32_mac_s16(ptr nocapture noundef readonly %a, ptr nocapture
 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv2 = sext i16 %1 to i32
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul nsw i32 %conv2, %conv
 
-; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %0 = load i16, ptr %arrayidx, align 2
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %conv = sext i16 %0 to i32
-; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %1 = load i16, ptr %arrayidx1, align 2
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %conv2 = sext i16 %1 to i32
-; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %mul = mul nsw i32 %conv2, %conv
-
-; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %0 = load i16, ptr %arrayidx, align 2
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %conv = sext i16 %0 to i32
-; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load i16, ptr %arrayidx1, align 2
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %conv2 = sext i16 %1 to i32
-; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %mul = mul nsw i32 %conv2, %conv
+; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load
+; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv> = sext ir<%0> to i32
+; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load
+; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv2> = sext ir<%1> to i32
+; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nsw ir<%conv2>, ir<%conv>
+
+; CHECK: Cost of 2 for VF 4: WIDEN ir<%0> = load
+; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv> = sext ir<%0> to i32
+; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load
+; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv2> = sext ir<%1> to i32
+; CHECK: Cost of 1 for VF 4: WIDEN ir<%mul> = mul nsw ir<%conv2>, ir<%conv>
 ; CHECK: LV: Selecting VF: 4.
 entry:
   %cmp7.not = icmp eq i32 %N, 0
@@ -99,11 +99,11 @@ define hidden i64 @i64_mac_s16(ptr nocapture noundef readonly %a, ptr nocapture
 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv2 = sext i16 %1 to i64
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul nsw i64 %conv2, %conv
 
-; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %0 = load i16, ptr %arrayidx, align 2
-; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %conv = sext i16 %0 to i64
-; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %1 = load i16, ptr %arrayidx1, align 2
-; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %conv2 = sext i16 %1 to i64
-; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %mul = mul nsw i64 %conv2, %conv
+; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load
+; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = sext ir<%0> to i64
+; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load
+; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv2> = sext ir<%1> to i64
+; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nsw ir<%conv2>, ir<%conv>
 ; CHECK: LV: Selecting VF: 2.
 entry:
   %cmp7.not = icmp eq i32 %N, 0
@@ -136,10 +136,10 @@ define hidden i64 @i64_mac_s32(ptr nocapture noundef readonly %a, ptr nocapture
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul i32 %1, %0
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %conv = sext i32 %mul to i64
 
-; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %0 = load i32, ptr %arrayidx, align 4
-; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %1 = load i32, ptr %arrayidx1, align 4
-; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %mul = mul i32 %1, %0
-; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %conv = sext i32 %mul to i64
+; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load
+; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load
+; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul ir<%1>, ir<%0>
+; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = sext ir<%mul> to i64
 ; CHECK: LV: Selecting VF: 2.
 entry:
   %cmp6.not = icmp eq i32 %N, 0
@@ -172,17 +172,17 @@ define hidden i32 @i32_mac_u8(ptr nocapture noundef readonly %a, ptr nocapture n
 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv2 = zext i8 %1 to i32
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul nuw nsw i32 %conv2, %conv
 
-; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction:   %0 = load i8, ptr %arrayidx, align 1
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %conv = zext i8 %0 to i32
-; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction:   %1 = load i8, ptr %arrayidx1, align 1
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %conv2 = zext i8 %1 to i32
-; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %mul = mul nuw nsw i32 %conv2, %conv
-
-; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %0 = load i8, ptr %arrayidx, align 1
-; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %conv = zext i8 %0 to i32
-; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load i8, ptr %arrayidx1, align 1
-; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %conv2 = zext i8 %1 to i32
-; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %mul = mul nuw nsw i32 %conv2, %conv
+; CHECK: Cost of 3 for VF 2: WIDEN ir<%0> = load
+; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i32
+; CHECK: Cost of 3 for VF 2: WIDEN ir<%1> = load
+; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv2> = zext ir<%1> to i32
+; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nuw nsw ir<%conv2>, ir<%conv>
+
+; CHECK: Cost of 2 for VF 4: WIDEN ir<%0> = load
+; CHECK: Cost of 1 for VF 4: WIDEN-CAST ir<%conv> = zext ir<%0> to i32
+; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load
+; CHECK: Cost of 1 for VF 4: WIDEN-CAST ir<%conv2> = zext ir<%1> to i32
+; CHECK: Cost of 1 for VF 4: WIDEN ir<%mul> = mul nuw nsw ir<%conv2>, ir<%conv>
 ; CHECK: LV: Selecting VF: 4.
 entry:
   %cmp7.not = icmp eq i32 %N, 0
@@ -216,17 +216,17 @@ define hidden i32 @i32_mac_u16(ptr nocapture noundef readonly %a, ptr nocapture
 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv2 = zext i16 %1 to i32
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul nuw nsw i32 %conv2, %conv
 
-; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %0 = load i16, ptr %arrayidx, align 2
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %conv = zext i16 %0 to i32
-; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %1 = load i16, ptr %arrayidx1, align 2
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %conv2 = zext i16 %1 to i32
-; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %mul = mul nuw nsw i32 %conv2, %conv
-
-; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %0 = load i16, ptr %arrayidx, align 2
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %conv = zext i16 %0 to i32
-; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load i16, ptr %arrayidx1, align 2
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %conv2 = zext i16 %1 to i32
-; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %mul = mul nuw nsw i32 %conv2, %conv
+; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load
+; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i32
+; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load
+; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv2> = zext ir<%1> to i32
+; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nuw nsw ir<%conv2>, ir<%conv>
+
+; CHECK: Cost of 2 for VF 4: WIDEN ir<%0> = load
+; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv> = zext ir<%0> to i32
+; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load
+; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv2> = zext ir<%1> to i32
+; CHECK: Cost of 1 for VF 4: WIDEN ir<%mul> = mul nuw nsw ir<%conv2>, ir<%conv>
 ; CHECK: LV: Selecting VF: 4.
 entry:
   %cmp7.not = icmp eq i32 %N, 0
@@ -260,11 +260,11 @@ define hidden i64 @i64_mac_u16(ptr nocapture noundef readonly %a, ptr nocapture
 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv2 = zext i16 %1 to i64
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul nuw nsw i64 %conv2, %conv
 
-; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %0 = load i16, ptr %arrayidx, align 2
-; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %conv = zext i16 %0 to i64
-; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %1 = load i16, ptr %arrayidx1, align 2
-; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %conv2 = zext i16 %1 to i64
-; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %mul = mul nuw nsw i64 %conv2, %conv
+; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load
+; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i64
+; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load
+; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv2> = zext ir<%1> to i64
+; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nuw nsw ir<%conv2>, ir<%conv>
 ; CHECK: LV: Selecting VF: 2.
 entry:
   %cmp8.not = icmp eq i32 %N, 0
@@ -297,10 +297,10 @@ define hidden i64 @i64_mac_u32(ptr nocapture noundef readonly %a, ptr nocapture
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul i32 %1, %0
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %conv = zext i32 %mul to i64
 
-; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %0 = load i32, ptr %arrayidx, align 4
-; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %1 = load i32, ptr %arrayidx1, align 4
-; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %mul = mul i32 %1, %0
-; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %conv = zext i32 %mul to i64
+; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load
+; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load
+; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul ir<%1>, ir<%0>
+; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%mul> to i64
 ; CHECK: LV: Selecting VF: 2.
 entry:
   %cmp6.not = icmp eq i32 %N, 0
diff --git a/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll b/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll
index 54cbab78b1e29..dd380d8ad2818 100644
--- a/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll
@@ -19,12 +19,22 @@ target triple = "wasm32-unknown-wasi"
 %struct.FourFloats = type { float, float, float, float }
 
 ; CHECK-LABEL: two_ints_same_op
-; CHECK: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 2 at %10
-; CHECK: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at %10
 ; CHECK: LV: Scalar loop costs: 12.
-; CHECK: LV: Vector loop of width 2 costs: 13.
-; CHECK: LV: Vector loop of width 4 costs: 6.
-; CHECK: LV: Selecting VF: 4
+; CHECK: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 2 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK: Cost for VF 2: 27 (Estimated cost per lane: 13.5)
+; CHECK: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK: Cost for VF 4: 24 (Estimated cost per lane: 6.0)
+; CHECK: LV: Selecting VF: 4.
 define hidden void @two_ints_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
   br i1 %5, label %6, label %7
@@ -54,12 +64,22 @@ define hidden void @two_ints_same_op(ptr noalias nocapture noundef writeonly %0,
 }
 
 ; CHECK-LABEL: two_ints_vary_op
-; CHECK: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 2 at %10
-; CHECK: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at %10
 ; CHECK: LV: Scalar loop costs: 12.
-; CHECK: LV: Vector loop of width 2 costs: 13.
-; CHECK: LV: Vector loop of width 4 costs: 6.
-; CHECK: LV: Selecting VF: 4
+; CHECK: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 2 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK: Cost for VF 2: 27 (Estimated cost per lane: 13.5)
+; CHECK: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK: Cost for VF 4: 24 (Estimated cost per lane: 6.0)
+; CHECK: LV: Selecting VF: 4.
 define hidden void @two_ints_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
   br i1 %5, label %6, label %7
@@ -90,15 +110,15 @@ define hidden void @two_ints_vary_op(ptr noalias nocapture noundef writeonly %0,
 
 ; CHECK-LABEL: three_ints
 ; CHECK: LV: Scalar loop costs: 16.
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %10 = load i32, ptr %9
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %12 = load i32, ptr %11
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %25, ptr %26
-; CHECK: LV: Vector loop of width 2 costs: 30.
-; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: %10 = load i32, ptr %9
-; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: %12 = load i32, ptr %11
-; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: store i32 %25, ptr %26
-; CHECK: LV: Vector loop of width 4 costs: 28.
-; CHECK: LV: Selecting VF: 1
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 2: 61 (Estimated cost per lane: 30.5)
+; CHECK: Cost of 12 for VF 4: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 12 for VF 4: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 12 for VF 4: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 4: 115 (Estimated cost per lane: 28.8)
+; CHECK: LV: Selecting VF: 1.
 define hidden void @three_ints(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
   br i1 %5, label %6, label %7
@@ -136,19 +156,19 @@ define hidden void @three_ints(ptr noalias nocapture noundef writeonly %0, ptr n
 
 ; CHECK-LABEL: three_shorts
 ; CHECK: LV: Scalar loop costs: 16.
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %10 = load i16
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %12 = load i16
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %25
-; CHECK: LV: Vector loop of width 2 costs: 30.
-; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: %10 = load i16
-; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: %12 = load i16
-; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: store i16 %25
-; CHECK: LV: Vector loop of width 4 costs: 28.
-; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: %10 = load i16
-; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: %12 = load i16
-; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: store i16 %25
-; CHECK: LV: Vector loop of width 8 costs: 27.
-; CHECK: LV: Selecting VF: 1
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 2: 61 (Estimated cost per lane: 30.5)
+; CHECK: Cost of 12 for VF 4: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 12 for VF 4: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 12 for VF 4: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 4: 115 (Estimated cost per lane: 28.8)
+; CHECK: Cost of 24 for VF 8: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 24 for VF 8: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 24 for VF 8: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 8: 223 (Estimated cost per lane: 27.9)
+; CHECK: LV: Selecting VF: 1.
 define hidden void @three_shorts(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
   br i1 %5, label %6, label %7
@@ -185,24 +205,41 @@ define hidden void @three_shorts(ptr noalias nocapture noundef writeonly %0, ptr
 }
 
 ; CHECK-LABEL: four_shorts_same_op
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4
 ; CHECK: LV: Scalar loop costs: 20.
-; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %10 = load i16
-; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %12 = load i16
-; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: store i16
-; CHECK: LV: Vector loop of width 2 costs: 31.
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i16
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %12 = load i16
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: store i16
-; CHECK: LV: Vector loop of width 4 costs: 15.
-; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: %10 = load i16
-; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: %12 = load i16
-; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: store i16
-; CHECK: LV: Vector loop of width 8 costs: 26
-; CHECK: LV: Selecting VF: 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 2: 62 (Estimated cost per lane: 31.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 4: 62 (Estimated cost per lane: 15.5)
+; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 8: 212 (Estimated cost per lane: 26.5)
+; CHECK: LV: Selecting VF: 4.
 define hidden void @four_shorts_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
   br i1 %5, label %6, label %7
@@ -246,23 +283,41 @@ define hidden void @four_shorts_same_op(ptr noalias nocapture noundef writeonly
 }
 
 ; CHECK-LABEL: four_shorts_split_op
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4
 ; CHECK: LV: Scalar loop costs: 20.
-; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %10 = load i16
-; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %12 = load i16
-; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: store i16
-; CHECK: LV: Vector loop of width 2 costs: 31.
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i16
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %12 = load i16
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: store i16 %31
-; CHECK: LV: Vector loop of width 4 costs: 15.
-; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: %10 = load i16
-; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: %12 = load i16
-; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: store i16 %31
-; CHECK: LV: Vector loop of width 8 costs: 26.
-; CHECK: LV: Selecting VF: 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 2: 62 (Estimated cost per lane: 31.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 4: 62 (Estimated cost per lane: 15.5)
+; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 8: 212 (Estimated cost per lane: 26.5)
+; CHECK: LV: Selecting VF: 4.
 define hidden void @four_shorts_split_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
   br i1 %5, label %6, label %7
@@ -306,23 +361,41 @@ define hidden void @four_shorts_split_op(ptr noalias nocapture noundef writeonly
 }
 
 ; CHECK-LABEL: four_shorts_interleave_op
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4
 ; CHECK: LV: Scalar loop costs: 20.
-; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %10 = load i16
-; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %12 = load i16
-; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: store i16
-; CHECK: LV: Vector loop of width 2 costs: 31.
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i16
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %12 = load i16
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: store i16
-; CHECK: LV: Vector loop of width 4 costs: 15.
-; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: %10 = load i16
-; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: %12 = load i16
-; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: store i16
-; CHECK: LV: Vector loop of width 8 costs: 26.
-; CHECK: LV: Selecting VF: 4
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 2: 62 (Estimated cost per lane: 31.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 4: 62 (Estimated cost per lane: 15.5)
+; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 8: 212 (Estimated cost per lane: 26.5)
+; CHECK: LV: Selecting VF: 4.
 define hidden void @four_shorts_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
   br i1 %5, label %6, label %7
@@ -366,17 +439,51 @@ define hidden void @four_shorts_interleave_op(ptr noalias nocapture noundef writ
 }
 
 ; CHECK-LABEL: five_shorts
-; CHECK: Cost of 42 for VF 4: INTERLEAVE-GROUP with factor 5
-; CHECK: Cost of 84 for VF 8: INTERLEAVE-GROUP with factor 5
-; CHECK: LV: Found an estimated cost of 84 for VF 8 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 84 for VF 8 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 84 for VF 8 For instruction: store i8 %37
-; CHECK: LV: Vector loop of width 8 costs: 32
-; CHECK: LV: Found an estimated cost of 168 for VF 16 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 168 for VF 16 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 168 for VF 16 For instruction: store i8 %37
-; CHECK: LV: Vector loop of width 16 costs: 32
-; CHECK: LV: Selecting VF: 1
+; CHECK: LV: Scalar loop costs: 24.
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 2: 99 (Estimated cost per lane: 49.5)
+; CHECK: Cost of 42 for VF 4: INTERLEAVE-GROUP with factor 5 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK-NEXT:   ir<%34> = load from index 4
+; CHECK: Cost of 42 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK-NEXT:   store ir<%37> to index 4
+; CHECK: Cost for VF 4: 135 (Estimated cost per lane: 33.8)
+; CHECK: Cost of 84 for VF 8: INTERLEAVE-GROUP with factor 5 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK-NEXT:   ir<%34> = load from index 4
+; CHECK: Cost of 84 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK-NEXT:   store ir<%37> to index 4
+; CHECK: Cost for VF 8: 261 (Estimated cost per lane: 32.6)
+; CHECK: Cost of 168 for VF 16: INTERLEAVE-GROUP with factor 5 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK-NEXT:   ir<%34> = load from index 4
+; CHECK: Cost of 168 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK-NEXT:   store ir<%37> to index 4
+; CHECK: Cost for VF 16: 513 (Estimated cost per lane: 32.1)
+; CHECK: LV: Selecting VF: 1.
 define hidden void @five_shorts(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
   br i1 %5, label %6, label %7
@@ -427,26 +534,32 @@ define hidden void @five_shorts(ptr noalias nocapture noundef writeonly %0, ptr
 }
 
 ; CHECK-LABEL: two_bytes_same_op
-; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2
 ; CHECK: LV: Scalar loop costs: 12.
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %13 = mul i8
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 %13
-; CHECK: LV: Vector loop of width 2 costs: 26.
-; CHECK: LV: Found an estimated cost of 11 for VF 4 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: %13 = mul i8
-; CHECK: LV: Found an estimated cost of 11 for VF 4 For instruction: store i8
-; CHECK: LV: Vector loop of width 4 costs: 15.
-; CHECK: LV: Found an estimated cost of 7 for VF 8 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %13 = mul i8
-; CHECK: LV: Found an estimated cost of 7 for VF 8 For instruction: store i8
-; CHECK: LV: Vector loop of width 8 costs: 4.
-; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 4 for VF 16 For instruction: %13 = mul i8
-; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: store i8
-; CHECK: LV: Vector loop of width 16 costs: 1.
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 2: 52 (Estimated cost per lane: 26.0)
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK: Cost for VF 4: 61 (Estimated cost per lane: 15.2)
+; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK: Cost for VF 8: 33 (Estimated cost per lane: 4.1)
+; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK: Cost for VF 16: 30 (Estimated cost per lane: 1.9)
 ; CHECK: LV: Selecting VF: 16.
 define hidden void @two_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
@@ -477,26 +590,32 @@ define hidden void @two_bytes_same_op(ptr noalias nocapture noundef writeonly %0
 }
 
 ; CHECK-LABEL: two_bytes_vary_op
-; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2
 ; CHECK: LV: Scalar loop costs: 12.
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %13 = mul i8
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 %13
-; CHECK: LV: Vector loop of width 2 costs: 23.
-; CHECK: LV: Found an estimated cost of 11 for VF 4 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: %13 = mul i8
-; CHECK: LV: Found an estimated cost of 11 for VF 4 For instruction: store i8
-; CHECK: LV: Vector loop of width 4 costs: 12.
-; CHECK: LV: Found an estimated cost of 7 for VF 8 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %13 = mul i8
-; CHECK: LV: Found an estimated cost of 7 for VF 8 For instruction: store i8
-; CHECK: LV: Vector loop of width 8 costs: 3.
-; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 4 for VF 16 For instruction: %13 = mul i8
-; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: store i8 %19
-; CHECK: LV: Vector loop of width 16 costs: 1.
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 2: 47 (Estimated cost per lane: 23.5)
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK: Cost for VF 4: 50 (Estimated cost per lane: 12.5)
+; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK: Cost for VF 8: 30 (Estimated cost per lane: 3.8)
+; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK: Cost for VF 16: 27 (Estimated cost per lane: 1.7)
 ; CHECK: LV: Selecting VF: 16.
 define hidden void @two_bytes_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
@@ -528,16 +647,22 @@ define hidden void @two_bytes_vary_op(ptr noalias nocapture noundef writeonly %0
 
 ; CHECK-LABEL: three_bytes_same_op
 ; CHECK: LV: Scalar loop costs: 16.
-; CHECK: LV: Vector loop of width 2 costs: 30.
-; CHECK: LV: Vector loop of width 4 costs: 28.
-; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: %10 = load i8, ptr %9
-; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: %12 = load i8, ptr %11
-; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: store i8 %25
-; CHECK: LV: Vector loop of width 8 costs: 27.
-; CHECK: LV: Found an estimated cost of 48 for VF 16 For instruction: %10 = load i8, ptr %9
-; CHECK: LV: Found an estimated cost of 48 for VF 16 For instruction: %12 = load i8, ptr %11
-; CHECK: LV: Found an estimated cost of 48 for VF 16 For instruction: store i8 %25
-; CHECK: LV: Vector loop of width 16 costs: 27.
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 2: 61 (Estimated cost per lane: 30.5)
+; CHECK: Cost of 12 for VF 4: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 12 for VF 4: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 12 for VF 4: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 4: 115 (Estimated cost per lane: 28.8)
+; CHECK: Cost of 24 for VF 8: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 24 for VF 8: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 24 for VF 8: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 8: 223 (Estimated cost per lane: 27.9)
+; CHECK: Cost of 48 for VF 16: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 48 for VF 16: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 48 for VF 16: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 16: 439 (Estimated cost per lane: 27.4)
 ; CHECK: LV: Selecting VF: 1.
 define hidden void @three_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
@@ -576,16 +701,22 @@ define hidden void @three_bytes_same_op(ptr noalias nocapture noundef writeonly
 
 ; CHECK-LABEL: three_bytes_interleave_op
 ; CHECK: LV: Scalar loop costs: 16.
-; CHECK: LV: Vector loop of width 2 costs: 30.
-; CHECK: LV: Vector loop of width 4 costs: 28.
-; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: %10 = load i8, ptr %9
-; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: %12 = load i8, ptr %11
-; CHECK: LV: Found an estimated cost of 24 for VF 8 For instruction: store i8 %25
-; CHECK: LV: Vector loop of width 8 costs: 27.
-; CHECK: LV: Found an estimated cost of 48 for VF 16 For instruction: %10 = load i8, ptr %9
-; CHECK: LV: Found an estimated cost of 48 for VF 16 For instruction: %12 = load i8, ptr %11
-; CHECK: LV: Found an estimated cost of 48 for VF 16 For instruction: store i8 %25
-; CHECK: LV: Vector loop of width 16 costs: 27.
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 2: 61 (Estimated cost per lane: 30.5)
+; CHECK: Cost of 12 for VF 4: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 12 for VF 4: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 12 for VF 4: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 4: 115 (Estimated cost per lane: 28.8)
+; CHECK: Cost of 24 for VF 8: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 24 for VF 8: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 24 for VF 8: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 8: 223 (Estimated cost per lane: 27.9)
+; CHECK: Cost of 48 for VF 16: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 48 for VF 16: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 48 for VF 16: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 16: 439 (Estimated cost per lane: 27.4)
 ; CHECK: LV: Selecting VF: 1.
 define hidden void @three_bytes_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
@@ -623,23 +754,44 @@ define hidden void @three_bytes_interleave_op(ptr noalias nocapture noundef writ
 }
 
 ; CHECK-LABEL: four_bytes_same_op
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4
 ; CHECK: LV: Scalar loop costs: 20.
-; CHECK: LV: Vector loop of width 2 costs: 40.
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: store i8
-; CHECK: LV: Vector loop of width 4 costs: 15.
-; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: store i8
-; CHECK: LV: Vector loop of width 8 costs: 10.
-; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: store i8
-; CHECK: LV: Vector loop of width 16 costs: 25.
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 2: 80 (Estimated cost per lane: 40.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 4: 62 (Estimated cost per lane: 15.5)
+; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 8: 86 (Estimated cost per lane: 10.8)
+; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 16: 404 (Estimated cost per lane: 25.2)
 ; CHECK: LV: Selecting VF: 8.
 define hidden void @four_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
@@ -684,27 +836,45 @@ define hidden void @four_bytes_same_op(ptr noalias nocapture noundef writeonly %
 }
 
 ; CHECK-LABEL: four_bytes_split_op
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4
 ; CHECK: LV: Scalar loop costs: 20.
-; CHECK: LV: Vector loop of width 2 costs: 45.
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 12 for VF 4 For instruction: %13 = mul i8
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: store i8
-; CHECK: LV: Vector loop of width 4 costs: 21.
-; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %13 = mul i8
-; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: store i8
-; CHECK: LV: Vector loop of width 8 costs: 11.
-; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 4 for VF 16 For instruction: %13 = mul i8
-; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: store i8
-; CHECK: LV: Vector loop of width 16 costs: 25
-; CHECK: LV: Selecting VF: 8
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 2: 90 (Estimated cost per lane: 45.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 4: 84 (Estimated cost per lane: 21.0)
+; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 8: 92 (Estimated cost per lane: 11.5)
+; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 16: 410 (Estimated cost per lane: 25.6)
+; CHECK: LV: Selecting VF: 8.
 define hidden void @four_bytes_split_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
   br i1 %5, label %6, label %7
@@ -749,24 +919,45 @@ define hidden void @four_bytes_split_op(ptr noalias nocapture noundef writeonly
 
 
 ; CHECK-LABEL: four_bytes_interleave_op
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4
 ; CHECK: LV: Scalar loop costs: 20.
-; CHECK: LV: Vector loop of width 2 costs: 40
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: store i8
-; CHECK: LV: Vector loop of width 4 costs: 15
-; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: store i8
-; CHECK: LV: Vector loop of width 8 costs: 10
-; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: store i8
-; CHECK: LV: Vector loop of width 16 costs: 25
-; CHECK: LV: Selecting VF: 8
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%12> = load ir<%11>
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%13>, ir<%14>
+; CHECK: Cost for VF 2: 80 (Estimated cost per lane: 40.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 4: 62 (Estimated cost per lane: 15.5)
+; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 8: 86 (Estimated cost per lane: 10.8)
+; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK: Cost for VF 16: 404 (Estimated cost per lane: 25.2)
+; CHECK: LV: Selecting VF: 8.
 define hidden void @four_bytes_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
   br i1 %5, label %6, label %7
@@ -811,22 +1002,84 @@ define hidden void @four_bytes_interleave_op(ptr noalias nocapture noundef write
 
 
 ; CHECK-LABEL: eight_bytes_same_op
-; CHECK: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8
-; CHECK: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8
-; CHECK: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8
-; CHECK: LV: Found an estimated cost of 66 for VF 4 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 66 for VF 4 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %55
-; CHECK: LV: Vector loop of width 4 costs: 74
-; CHECK: LV: Found an estimated cost of 132 for VF 8 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 132 for VF 8 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %55
-; CHECK: LV: Vector loop of width 8 costs: 54
-; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %55
-; CHECK: LV: Vector loop of width 16 costs: 51
-; CHECK: LV: Selecting VF: 1
+; CHECK: LV: Scalar loop costs: 36.
+; CHECK: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK-NEXT:   ir<%34> = load from index 4
+; CHECK-NEXT:   ir<%40> = load from index 5
+; CHECK-NEXT:   ir<%46> = load from index 6
+; CHECK-NEXT:   ir<%52> = load from index 7
+; CHECK: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK-NEXT:   store ir<%37> to index 4
+; CHECK-NEXT:   store ir<%43> to index 5
+; CHECK-NEXT:   store ir<%49> to index 6
+; CHECK-NEXT:   store ir<%55> to index 7
+; CHECK: Cost for VF 2: 154 (Estimated cost per lane: 77.0)
+; CHECK: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK-NEXT:   ir<%34> = load from index 4
+; CHECK-NEXT:   ir<%40> = load from index 5
+; CHECK-NEXT:   ir<%46> = load from index 6
+; CHECK-NEXT:   ir<%52> = load from index 7
+; CHECK: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK-NEXT:   store ir<%37> to index 4
+; CHECK-NEXT:   store ir<%43> to index 5
+; CHECK-NEXT:   store ir<%49> to index 6
+; CHECK-NEXT:   store ir<%55> to index 7
+; CHECK: Cost for VF 4: 298 (Estimated cost per lane: 74.5)
+; CHECK: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK-NEXT:   ir<%34> = load from index 4
+; CHECK-NEXT:   ir<%40> = load from index 5
+; CHECK-NEXT:   ir<%46> = load from index 6
+; CHECK-NEXT:   ir<%52> = load from index 7
+; CHECK: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK-NEXT:   store ir<%37> to index 4
+; CHECK-NEXT:   store ir<%43> to index 5
+; CHECK-NEXT:   store ir<%49> to index 6
+; CHECK-NEXT:   store ir<%55> to index 7
+; CHECK: Cost for VF 8: 432 (Estimated cost per lane: 54.0)
+; CHECK: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK-NEXT:   ir<%34> = load from index 4
+; CHECK-NEXT:   ir<%40> = load from index 5
+; CHECK-NEXT:   ir<%46> = load from index 6
+; CHECK-NEXT:   ir<%52> = load from index 7
+; CHECK: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK-NEXT:   store ir<%37> to index 4
+; CHECK-NEXT:   store ir<%43> to index 5
+; CHECK-NEXT:   store ir<%49> to index 6
+; CHECK-NEXT:   store ir<%55> to index 7
+; CHECK: Cost for VF 16: 828 (Estimated cost per lane: 51.8)
+; CHECK: LV: Selecting VF: 1.
 define hidden void @eight_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
   br i1 %5, label %6, label %7
@@ -898,11 +1151,84 @@ define hidden void @eight_bytes_same_op(ptr noalias nocapture noundef writeonly
 }
 
 ; CHECK-LABEL: eight_bytes_split_op
-; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %55
-; CHECK: LV: Vector loop of width 16 costs: 50
-; CHECK: LV: Selecting VF: 1
+; CHECK: LV: Scalar loop costs: 36.
+; CHECK: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK-NEXT:   ir<%34> = load from index 4
+; CHECK-NEXT:   ir<%40> = load from index 5
+; CHECK-NEXT:   ir<%46> = load from index 6
+; CHECK-NEXT:   ir<%52> = load from index 7
+; CHECK: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK-NEXT:   store ir<%37> to index 4
+; CHECK-NEXT:   store ir<%43> to index 5
+; CHECK-NEXT:   store ir<%49> to index 6
+; CHECK-NEXT:   store ir<%55> to index 7
+; CHECK: Cost for VF 2: 114 (Estimated cost per lane: 57.0)
+; CHECK: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK-NEXT:   ir<%34> = load from index 4
+; CHECK-NEXT:   ir<%40> = load from index 5
+; CHECK-NEXT:   ir<%46> = load from index 6
+; CHECK-NEXT:   ir<%52> = load from index 7
+; CHECK: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK-NEXT:   store ir<%37> to index 4
+; CHECK-NEXT:   store ir<%43> to index 5
+; CHECK-NEXT:   store ir<%49> to index 6
+; CHECK-NEXT:   store ir<%55> to index 7
+; CHECK: Cost for VF 4: 210 (Estimated cost per lane: 52.5)
+; CHECK: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK-NEXT:   ir<%34> = load from index 4
+; CHECK-NEXT:   ir<%40> = load from index 5
+; CHECK-NEXT:   ir<%46> = load from index 6
+; CHECK-NEXT:   ir<%52> = load from index 7
+; CHECK: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK-NEXT:   store ir<%37> to index 4
+; CHECK-NEXT:   store ir<%43> to index 5
+; CHECK-NEXT:   store ir<%49> to index 6
+; CHECK-NEXT:   store ir<%55> to index 7
+; CHECK: Cost for VF 8: 408 (Estimated cost per lane: 51.0)
+; CHECK: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK-NEXT:   ir<%34> = load from index 4
+; CHECK-NEXT:   ir<%40> = load from index 5
+; CHECK-NEXT:   ir<%46> = load from index 6
+; CHECK-NEXT:   ir<%52> = load from index 7
+; CHECK: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK-NEXT:   store ir<%37> to index 4
+; CHECK-NEXT:   store ir<%43> to index 5
+; CHECK-NEXT:   store ir<%49> to index 6
+; CHECK-NEXT:   store ir<%55> to index 7
+; CHECK: Cost for VF 16: 804 (Estimated cost per lane: 50.2)
+; CHECK: LV: Selecting VF: 1.
 define hidden void @eight_bytes_split_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
   br i1 %5, label %6, label %7
@@ -974,11 +1300,84 @@ define hidden void @eight_bytes_split_op(ptr noalias nocapture noundef writeonly
 }
 
 ; CHECK-LABEL: eight_bytes_interleave_op
-; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: %12 = load i8
-; CHECK: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %55
-; CHECK: LV: Vector loop of width 16 costs: 50
-; CHECK: LV: Selecting VF: 1
+; CHECK: LV: Scalar loop costs: 36.
+; CHECK: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK-NEXT:   ir<%34> = load from index 4
+; CHECK-NEXT:   ir<%40> = load from index 5
+; CHECK-NEXT:   ir<%46> = load from index 6
+; CHECK-NEXT:   ir<%52> = load from index 7
+; CHECK: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK-NEXT:   store ir<%37> to index 4
+; CHECK-NEXT:   store ir<%43> to index 5
+; CHECK-NEXT:   store ir<%49> to index 6
+; CHECK-NEXT:   store ir<%55> to index 7
+; CHECK: Cost for VF 2: 114 (Estimated cost per lane: 57.0)
+; CHECK: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK-NEXT:   ir<%34> = load from index 4
+; CHECK-NEXT:   ir<%40> = load from index 5
+; CHECK-NEXT:   ir<%46> = load from index 6
+; CHECK-NEXT:   ir<%52> = load from index 7
+; CHECK: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK-NEXT:   store ir<%37> to index 4
+; CHECK-NEXT:   store ir<%43> to index 5
+; CHECK-NEXT:   store ir<%49> to index 6
+; CHECK-NEXT:   store ir<%55> to index 7
+; CHECK: Cost for VF 4: 210 (Estimated cost per lane: 52.5)
+; CHECK: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK-NEXT:   ir<%34> = load from index 4
+; CHECK-NEXT:   ir<%40> = load from index 5
+; CHECK-NEXT:   ir<%46> = load from index 6
+; CHECK-NEXT:   ir<%52> = load from index 7
+; CHECK: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK-NEXT:   store ir<%37> to index 4
+; CHECK-NEXT:   store ir<%43> to index 5
+; CHECK-NEXT:   store ir<%49> to index 6
+; CHECK-NEXT:   store ir<%55> to index 7
+; CHECK: Cost for VF 8: 408 (Estimated cost per lane: 51.0)
+; CHECK: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%16> = load from index 1
+; CHECK-NEXT:   ir<%22> = load from index 2
+; CHECK-NEXT:   ir<%28> = load from index 3
+; CHECK-NEXT:   ir<%34> = load from index 4
+; CHECK-NEXT:   ir<%40> = load from index 5
+; CHECK-NEXT:   ir<%46> = load from index 6
+; CHECK-NEXT:   ir<%52> = load from index 7
+; CHECK: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%14>
+; CHECK-NEXT:   store ir<%13> to index 0
+; CHECK-NEXT:   store ir<%19> to index 1
+; CHECK-NEXT:   store ir<%25> to index 2
+; CHECK-NEXT:   store ir<%31> to index 3
+; CHECK-NEXT:   store ir<%37> to index 4
+; CHECK-NEXT:   store ir<%43> to index 5
+; CHECK-NEXT:   store ir<%49> to index 6
+; CHECK-NEXT:   store ir<%55> to index 7
+; CHECK: Cost for VF 16: 804 (Estimated cost per lane: 50.2)
+; CHECK: LV: Selecting VF: 1.
 define hidden void @eight_bytes_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
   br i1 %5, label %6, label %7
@@ -1051,14 +1450,32 @@ define hidden void @eight_bytes_interleave_op(ptr noalias nocapture noundef writ
 
 ; CHECK-LABEL: four_bytes_into_four_ints_same_op
 ; CHECK: LV: Scalar loop costs: 28.
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 14 for VF 2 For instruction: %17 = load i32
-; CHECK: LV: Found an estimated cost of 14 for VF 2 For instruction: store i32
-; CHECK: LV: Vector loop of width 2 costs: 44.
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 24 for VF 4 For instruction: %17 = load i32
-; CHECK: LV: Found an estimated cost of 24 for VF 4 For instruction: store i32
-; CHECK: LV: Vector loop of width 4 costs: 26.
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%13> = load ir<%12>
+; CHECK: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 4 at %17, ir<%16>
+; CHECK-NEXT:   ir<%17> = load from index 0
+; CHECK-NEXT:   ir<%27> = load from index 1
+; CHECK-NEXT:   ir<%37> = load from index 2
+; CHECK-NEXT:   ir<%47> = load from index 3
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%20> = load ir<%19>
+; CHECK: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%16>
+; CHECK-NEXT:   store ir<%18> to index 0
+; CHECK-NEXT:   store ir<%28> to index 1
+; CHECK-NEXT:   store ir<%38> to index 2
+; CHECK-NEXT:   store ir<%48> to index 3
+; CHECK: Cost for VF 2: 88 (Estimated cost per lane: 44.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%20> = load from index 1
+; CHECK-NEXT:   ir<%30> = load from index 2
+; CHECK-NEXT:   ir<%40> = load from index 3
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %13
+; CHECK: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%16>
+; CHECK-NEXT:   store ir<%18> to index 0
+; CHECK-NEXT:   store ir<%28> to index 1
+; CHECK-NEXT:   store ir<%38> to index 2
+; CHECK-NEXT:   store ir<%48> to index 3
+; CHECK: Cost for VF 4: 104 (Estimated cost per lane: 26.0)
 ; CHECK: LV: Selecting VF: 4.
 define hidden void @four_bytes_into_four_ints_same_op(ptr noalias nocapture noundef %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
@@ -1119,18 +1536,28 @@ define hidden void @four_bytes_into_four_ints_same_op(ptr noalias nocapture noun
 }
 
 ; CHECK-LABEL: four_bytes_into_four_ints_vary_op
-; CHECK: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 4
 ; CHECK: LV: Scalar loop costs: 21.
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %11 = zext i8
-; CHECK: LV: Found an estimated cost of 14 for VF 2 For instruction: store i32
-; CHECK: LV: Vector loop of width 2 costs: 35.
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:  %11 = zext i8
-; CHECK: LV: Found an estimated cost of 24 for VF 4 For instruction: store i32
-; CHECK: LV: Vector loop of width 4 costs: 20.
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load ir<%9>
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%13> = load ir<%12>
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%18> = load ir<%17>
+; CHECK: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%16>
+; CHECK-NEXT:   store ir<%15> to index 0
+; CHECK-NEXT:   store ir<%23> to index 1
+; CHECK-NEXT:   store ir<%31> to index 2
+; CHECK-NEXT:   store ir<%38> to index 3
+; CHECK: Cost for VF 2: 71 (Estimated cost per lane: 35.5)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, ir<%9>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%18> = load from index 1
+; CHECK-NEXT:   ir<%26> = load from index 2
+; CHECK-NEXT:   ir<%34> = load from index 3
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %13
+; CHECK: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%16>
+; CHECK-NEXT:   store ir<%15> to index 0
+; CHECK-NEXT:   store ir<%23> to index 1
+; CHECK-NEXT:   store ir<%31> to index 2
+; CHECK-NEXT:   store ir<%38> to index 3
+; CHECK: Cost for VF 4: 80 (Estimated cost per lane: 20.0)
 ; CHECK: LV: Selecting VF: 4.
 define hidden void @four_bytes_into_four_ints_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
   %5 = icmp eq i32 %3, 0
@@ -1183,10 +1610,27 @@ define hidden void @four_bytes_into_four_ints_vary_op(ptr noalias nocapture noun
 
 ; CHECK-LABEL: scale_uv_row_down2
 ; CHECK: LV: Scalar loop costs: 10.
-; CHECK: LV: Vector loop of width 2 costs: 13.
-; CHECK: LV: Vector loop of width 4 costs: 8.
-; CHECK: LV: Vector loop of width 8 costs: 4.
-; CHECK: LV: Vector loop of width 16 costs: 5.
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %11, ir<%10>
+; CHECK-NEXT:   ir<%11> = load from index 0
+; CHECK-NEXT:   ir<%13> = load from index 1
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, vp<%next.gep>.1
+; CHECK-NEXT:   store ir<%11> to index 0
+; CHECK-NEXT:   store ir<%13> to index 1
+; CHECK: Cost for VF 4: 35 (Estimated cost per lane: 8.8)
+; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at %11, ir<%10>
+; CHECK-NEXT:   ir<%11> = load from index 0
+; CHECK-NEXT:   ir<%13> = load from index 1
+; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, vp<%next.gep>.1
+; CHECK-NEXT:   store ir<%11> to index 0
+; CHECK-NEXT:   store ir<%13> to index 1
+; CHECK: Cost for VF 8: 39 (Estimated cost per lane: 4.9)
+; CHECK: Cost of 68 for VF 16: INTERLEAVE-GROUP with factor 4 at %11, ir<%10>
+; CHECK-NEXT:   ir<%11> = load from index 0
+; CHECK-NEXT:   ir<%13> = load from index 1
+; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, vp<%next.gep>.1
+; CHECK-NEXT:   store ir<%11> to index 0
+; CHECK-NEXT:   store ir<%13> to index 1
+; CHECK: Cost for VF 16: 80 (Estimated cost per lane: 5.0)
 ; CHECK: LV: Selecting VF: 8.
 define hidden void @scale_uv_row_down2(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) {
   %5 = icmp sgt i32 %3, 0
@@ -1215,19 +1659,41 @@ define hidden void @scale_uv_row_down2(ptr nocapture noundef readonly %0, i32 no
 
 ; CHECK-LABEL: scale_uv_row_down2_box
 ; CHECK: LV: Scalar loop costs: 26.
-; CHECK: LV: Vector loop of width 2 costs: 39.
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %14 = load i8
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %20 = load i8
-; CHECK: LV: Found an estimated cost of 11 for VF 4 For instruction: store i8 %48
-; CHECK: LV: Vector loop of width 4 costs: 18.
-; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %14 = load i8
-; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %20 = load i8
-; CHECK: LV: Found an estimated cost of 7 for VF 8 For instruction: store i8 %48
-; CHECK: LV: Vector loop of width 8 costs: 11.
-; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %14 = load i8
-; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %20 = load i8
-; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: store i8 %48
-; CHECK: LV: Vector loop of width 16 costs: 20.
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%14> = load vp<%next.gep> (!alias.scope {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%17> = load ir<%16> (!alias.scope {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%20> = load ir<%19> (!alias.scope {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%48>, ir<%49>
+; CHECK: Cost for VF 2: 78 (Estimated cost per lane: 39.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %14, vp<%next.gep>
+; CHECK-NEXT:   ir<%14> = load from index 0
+; CHECK-NEXT:   ir<%32> = load from index 1
+; CHECK-NEXT:   ir<%17> = load from index 2
+; CHECK-NEXT:   ir<%35> = load from index 3
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %20
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, vp<%next.gep>.1
+; CHECK-NEXT:   store ir<%30> to index 0
+; CHECK-NEXT:   store ir<%48> to index 1
+; CHECK: Cost for VF 4: 73 (Estimated cost per lane: 18.2)
+; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at %14, vp<%next.gep>
+; CHECK-NEXT:   ir<%14> = load from index 0
+; CHECK-NEXT:   ir<%32> = load from index 1
+; CHECK-NEXT:   ir<%17> = load from index 2
+; CHECK-NEXT:   ir<%35> = load from index 3
+; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at %20
+; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, vp<%next.gep>.1
+; CHECK-NEXT:   store ir<%30> to index 0
+; CHECK-NEXT:   store ir<%48> to index 1
+; CHECK: Cost for VF 8: 89 (Estimated cost per lane: 11.1)
+; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at %14, vp<%next.gep>
+; CHECK-NEXT:   ir<%14> = load from index 0
+; CHECK-NEXT:   ir<%32> = load from index 1
+; CHECK-NEXT:   ir<%17> = load from index 2
+; CHECK-NEXT:   ir<%35> = load from index 3
+; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at %20
+; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, vp<%next.gep>.1
+; CHECK-NEXT:   store ir<%30> to index 0
+; CHECK-NEXT:   store ir<%48> to index 1
+; CHECK: Cost for VF 16: 322 (Estimated cost per lane: 20.1)
 ; CHECK: LV: Selecting VF: 8.
 define hidden void @scale_uv_row_down2_box(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) {
   %5 = icmp sgt i32 %3, 0
@@ -1293,18 +1759,38 @@ define hidden void @scale_uv_row_down2_box(ptr nocapture noundef readonly %0, i3
 
 ; CHECK-LABEL: scale_uv_row_down2_linear
 ; CHECK: LV: Scalar loop costs: 18.
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %13 = load i8
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: store i8
-; CHECK: LV: Vector loop of width 2 costs: 25.
-; CHECK: LV: Found an estimated cost of 18 for VF 4 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 11 for VF 4 For instruction: store i8
-; CHECK: LV: Vector loop of width 4 costs: 11.
-; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %10 = load i8
-; CHECK: LV: Found an estimated cost of 7 for VF 8 For instruction: store i8
-; CHECK: LV: Vector loop of width 8 costs: 6.
-; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %10 = load i8
-; CHECK: LV: Vector loop of width 16 costs: 10.
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%10> = load vp<%next.gep> (!alias.scope {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%13> = load ir<%12> (!alias.scope {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%18>, vp<%next.gep>.1 (!alias.scope {{.*}}, !noalias {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%28>, ir<%29>
+; CHECK: Cost for VF 2: 50 (Estimated cost per lane: 25.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %10, vp<%next.gep>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%20> = load from index 1
+; CHECK-NEXT:   ir<%13> = load from index 2
+; CHECK-NEXT:   ir<%23> = load from index 3
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, vp<%next.gep>.1
+; CHECK-NEXT:   store ir<%18> to index 0
+; CHECK-NEXT:   store ir<%28> to index 1
+; CHECK: Cost for VF 4: 47 (Estimated cost per lane: 11.8)
+; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 at %10, vp<%next.gep>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%20> = load from index 1
+; CHECK-NEXT:   ir<%13> = load from index 2
+; CHECK-NEXT:   ir<%23> = load from index 3
+; CHECK: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, vp<%next.gep>.1
+; CHECK-NEXT:   store ir<%18> to index 0
+; CHECK-NEXT:   store ir<%28> to index 1
+; CHECK: Cost for VF 8: 55 (Estimated cost per lane: 6.9)
+; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at %10, vp<%next.gep>
+; CHECK-NEXT:   ir<%10> = load from index 0
+; CHECK-NEXT:   ir<%20> = load from index 1
+; CHECK-NEXT:   ir<%13> = load from index 2
+; CHECK-NEXT:   ir<%23> = load from index 3
+; CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, vp<%next.gep>.1
+; CHECK-NEXT:   store ir<%18> to index 0
+; CHECK-NEXT:   store ir<%28> to index 1
+; CHECK: Cost for VF 16: 174 (Estimated cost per lane: 10.9)
 ; CHECK: LV: Selecting VF: 8.
 define hidden void @scale_uv_row_down2_linear(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) {
   %5 = icmp sgt i32 %3, 0
@@ -1347,16 +1833,21 @@ define hidden void @scale_uv_row_down2_linear(ptr nocapture noundef readonly %0,
 }
 
 ; CHECK-LABEL: two_floats_same_op
-; CHECK: LV: Scalar loop costs: 14
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
 ; CHECK: LV: Scalar loop costs: 14.
-; CHECK: LV: Vector loop of width 2 costs: 19.
-; CHECK: LV: Vector loop of width 4 costs: 15.
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%mul> to index 0
+; CHECK-NEXT:   store ir<%mul8> to index 1
+; CHECK: Cost for VF 2: 38 (Estimated cost per lane: 19.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%mul> to index 0
+; CHECK-NEXT:   store ir<%mul8> to index 1
+; CHECK: Cost for VF 4: 62 (Estimated cost per lane: 15.5)
 ; CHECK: LV: Selecting VF: 1.
 define hidden void @two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
@@ -1388,16 +1879,21 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: two_floats_vary_op
-; CHECK: LV: Scalar loop costs: 14
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
 ; CHECK: LV: Scalar loop costs: 14.
-; CHECK: LV: Vector loop of width 2 costs: 19.
-; CHECK: LV: Vector loop of width 4 costs: 15.
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%add> to index 0
+; CHECK-NEXT:   store ir<%sub> to index 1
+; CHECK: Cost for VF 2: 38 (Estimated cost per lane: 19.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%add> to index 0
+; CHECK-NEXT:   store ir<%sub> to index 1
+; CHECK: Cost for VF 4: 62 (Estimated cost per lane: 15.5)
 ; CHECK: LV: Selecting VF: 1.
 define hidden void @two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
@@ -1429,13 +1925,22 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: two_bytes_two_floats_same_op
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: LV: Scalar loop costs: 18
-; CHECK: LV: Vector loop of width 2 costs: 27
-; CHECK: LV: Vector loop of width 4 costs: 15
+; CHECK: LV: Scalar loop costs: 18.
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%0> = load ir<%arrayidx> (!alias.scope {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%1> = load ir<%arrayidx1> (!alias.scope {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%2> = load ir<%y> (!alias.scope {{.*}})
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx4>
+; CHECK-NEXT:   store ir<%mul> to index 0
+; CHECK-NEXT:   store ir<%mul11> to index 1
+; CHECK: Cost for VF 2: 54 (Estimated cost per lane: 27.0)
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at %1
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx4>
+; CHECK-NEXT:   store ir<%mul> to index 0
+; CHECK-NEXT:   store ir<%mul11> to index 1
+; CHECK: Cost for VF 4: 60 (Estimated cost per lane: 15.0)
 ; CHECK: LV: Selecting VF: 4.
 define hidden void @two_bytes_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
@@ -1471,13 +1976,22 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: two_bytes_two_floats_vary_op
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: LV: Scalar loop costs: 18
-; CHECK: LV: Vector loop of width 2 costs: 27
-; CHECK: LV: Vector loop of width 4 costs: 15
+; CHECK: LV: Scalar loop costs: 18.
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%0> = load ir<%arrayidx> (!alias.scope {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%1> = load ir<%arrayidx1> (!alias.scope {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%2> = load ir<%y> (!alias.scope {{.*}})
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx4>
+; CHECK-NEXT:   store ir<%add> to index 0
+; CHECK-NEXT:   store ir<%sub> to index 1
+; CHECK: Cost for VF 2: 54 (Estimated cost per lane: 27.0)
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at %1
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx4>
+; CHECK-NEXT:   store ir<%add> to index 0
+; CHECK-NEXT:   store ir<%sub> to index 1
+; CHECK: Cost for VF 4: 60 (Estimated cost per lane: 15.0)
 ; CHECK: LV: Selecting VF: 4.
 define hidden void @two_bytes_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
@@ -1513,13 +2027,21 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: two_floats_two_bytes_same_op
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: LV: Scalar loop costs: 16
-; CHECK: LV: Vector loop of width 2 costs: 26
-; CHECK: LV: Vector loop of width 4 costs: 16.
+; CHECK: LV: Scalar loop costs: 16.
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv>, ir<%arrayidx3> (!alias.scope {{.*}}, !noalias {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv9>, ir<%y11> (!alias.scope {{.*}}, !noalias {{.*}})
+; CHECK: Cost for VF 2: 52 (Estimated cost per lane: 26.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %1
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%conv> to index 0
+; CHECK-NEXT:   store ir<%conv9> to index 1
+; CHECK: Cost for VF 4: 67 (Estimated cost per lane: 16.8)
 ; CHECK: LV: Selecting VF: 1.
 define hidden void @two_floats_two_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
@@ -1553,13 +2075,21 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: two_floats_two_bytes_vary_op
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: LV: Scalar loop costs: 16
-; CHECK: LV: Vector loop of width 2 costs: 26
-; CHECK: LV: Vector loop of width 4 costs: 16.
+; CHECK: LV: Scalar loop costs: 16.
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv>, ir<%arrayidx3> (!alias.scope {{.*}}, !noalias {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv8>, ir<%y10> (!alias.scope {{.*}}, !noalias {{.*}})
+; CHECK: Cost for VF 2: 52 (Estimated cost per lane: 26.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %1
+; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%conv> to index 0
+; CHECK-NEXT:   store ir<%conv8> to index 1
+; CHECK: Cost for VF 4: 67 (Estimated cost per lane: 16.8)
 ; CHECK: LV: Selecting VF: 1.
 define hidden void @two_floats_two_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
@@ -1593,15 +2123,23 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: two_shorts_two_floats_same_op
-; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: LV: Scalar loop costs: 18
-; CHECK: LV: Vector loop of width 2 costs: 24
-; CHECK: LV: Vector loop of width 4 costs: 12
+; CHECK: LV: Scalar loop costs: 18.
+; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 at %1
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx4>
+; CHECK-NEXT:   store ir<%mul> to index 0
+; CHECK-NEXT:   store ir<%mul11> to index 1
+; CHECK: Cost for VF 2: 48 (Estimated cost per lane: 24.0)
+; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 at %1
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx4>
+; CHECK-NEXT:   store ir<%mul> to index 0
+; CHECK-NEXT:   store ir<%mul11> to index 1
+; CHECK: Cost for VF 4: 48 (Estimated cost per lane: 12.0)
 ; CHECK: LV: Selecting VF: 4.
 define hidden void @two_shorts_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
@@ -1637,15 +2175,23 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: two_shorts_two_floats_vary_op
-; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: LV: Scalar loop costs: 18
-; CHECK: LV: Vector loop of width 2 costs: 24
-; CHECK: LV: Vector loop of width 4 costs: 12
+; CHECK: LV: Scalar loop costs: 18.
+; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 at %1
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx4>
+; CHECK-NEXT:   store ir<%add> to index 0
+; CHECK-NEXT:   store ir<%sub> to index 1
+; CHECK: Cost for VF 2: 48 (Estimated cost per lane: 24.0)
+; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 at %1
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx4>
+; CHECK-NEXT:   store ir<%add> to index 0
+; CHECK-NEXT:   store ir<%sub> to index 1
+; CHECK: Cost for VF 4: 48 (Estimated cost per lane: 12.0)
 ; CHECK: LV: Selecting VF: 4.
 define hidden void @two_shorts_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
@@ -1681,16 +2227,24 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: two_floats_two_shorts_same_op
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: LV: Scalar loop costs: 16
-; CHECK: LV: Vector loop of width 2 costs: 23
-; CHECK: LV: Vector loop of width 4 costs: 14
-; CHECK: LV: Selecting VF: 4
+; CHECK: LV: Scalar loop costs: 16.
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %1
+; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%conv> to index 0
+; CHECK-NEXT:   store ir<%conv9> to index 1
+; CHECK: Cost for VF 2: 47 (Estimated cost per lane: 23.5)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %1
+; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%conv> to index 0
+; CHECK-NEXT:   store ir<%conv9> to index 1
+; CHECK: Cost for VF 4: 59 (Estimated cost per lane: 14.8)
+; CHECK: LV: Selecting VF: 4.
 define hidden void @two_floats_two_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
   %cmp22.not = icmp eq i32 %N, 0
@@ -1723,16 +2277,24 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: two_floats_two_shorts_vary_op
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2
-; CHECK: LV: Scalar loop costs: 16
-; CHECK: LV: Vector loop of width 2 costs: 23
-; CHECK: LV: Vector loop of width 4 costs: 14
-; CHECK: LV: Selecting VF: 4
+; CHECK: LV: Scalar loop costs: 16.
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %1
+; CHECK: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%conv> to index 0
+; CHECK-NEXT:   store ir<%conv8> to index 1
+; CHECK: Cost for VF 2: 47 (Estimated cost per lane: 23.5)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %1
+; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%conv> to index 0
+; CHECK-NEXT:   store ir<%conv8> to index 1
+; CHECK: Cost for VF 4: 59 (Estimated cost per lane: 14.8)
+; CHECK: LV: Selecting VF: 4.
 define hidden void @two_floats_two_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
   %cmp21.not = icmp eq i32 %N, 0
@@ -1765,13 +2327,20 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: four_floats_same_op
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: LV: Scalar loop costs: 24
-; CHECK: LV: Vector loop of width 2 costs: 33
-; CHECK: LV: Vector loop of width 4 costs: 30
-; CHECK: LV: Selecting VF: 4
+; CHECK: LV: Scalar loop costs: 24.
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%mul> to index 0
+; CHECK-NEXT:   store ir<%mul8> to index 1
+; CHECK-NEXT:   store ir<%mul14> to index 2
+; CHECK-NEXT:   store ir<%mul20> to index 3
+; CHECK: Cost for VF 2: 66 (Estimated cost per lane: 33.0)
+; CHECK: Cost for VF 4: 12 (Estimated cost per lane: 3.0)
+; CHECK: LV: Selecting VF: 4.
 define hidden void @four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
   %cmp45.not = icmp eq i32 %N, 0
@@ -1816,16 +2385,30 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: four_floats_vary_op
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: LV: Scalar loop costs: 24
-; CHECK: LV: Vector loop of width 2 costs: 33
-; CHECK: LV: Vector loop of width 4 costs: 30
-; CHECK: LV: Selecting VF: 1
+; CHECK: LV: Scalar loop costs: 24.
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%add> to index 0
+; CHECK-NEXT:   store ir<%sub> to index 1
+; CHECK-NEXT:   store ir<%mul> to index 2
+; CHECK-NEXT:   store ir<%div> to index 3
+; CHECK: Cost for VF 2: 66 (Estimated cost per lane: 33.0)
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%add> to index 0
+; CHECK-NEXT:   store ir<%sub> to index 1
+; CHECK-NEXT:   store ir<%mul> to index 2
+; CHECK-NEXT:   store ir<%div> to index 3
+; CHECK: Cost for VF 4: 120 (Estimated cost per lane: 30.0)
+; CHECK: LV: Selecting VF: 1.
 define hidden void @four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
   %cmp42.not = icmp eq i32 %N, 0
@@ -1870,14 +2453,29 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: four_bytes_four_floats_same_op
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: LV: Scalar loop costs: 32
-; CHECK: LV: Vector loop of width 2 costs: 51
-; CHECK: LV: Vector loop of width 4 costs: 27
-; CHECK: LV: Selecting VF: 4
+; CHECK: LV: Scalar loop costs: 32.
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%0> = load ir<%arrayidx> (!alias.scope {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%1> = load ir<%arrayidx1> (!alias.scope {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%2> = load ir<%y> (!alias.scope {{.*}})
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx4>
+; CHECK-NEXT:   store ir<%mul> to index 0
+; CHECK-NEXT:   store ir<%mul11> to index 1
+; CHECK-NEXT:   store ir<%mul19> to index 2
+; CHECK-NEXT:   store ir<%mul27> to index 3
+; CHECK: Cost for VF 2: 102 (Estimated cost per lane: 51.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %1
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx4>
+; CHECK-NEXT:   store ir<%mul> to index 0
+; CHECK-NEXT:   store ir<%mul11> to index 1
+; CHECK-NEXT:   store ir<%mul19> to index 2
+; CHECK-NEXT:   store ir<%mul27> to index 3
+; CHECK: Cost for VF 4: 108 (Estimated cost per lane: 27.0)
+; CHECK: LV: Selecting VF: 4.
 define hidden void @four_bytes_four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
   %cmp52.not = icmp eq i32 %N, 0
@@ -1930,14 +2528,29 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: four_bytes_four_floats_vary_op
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: LV: Scalar loop costs: 32
-; CHECK: LV: Vector loop of width 2 costs: 51
-; CHECK: LV: Vector loop of width 4 costs: 27
-; CHECK: LV: Selecting VF: 4
+; CHECK: LV: Scalar loop costs: 32.
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%0> = load ir<%arrayidx> (!alias.scope {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%1> = load ir<%arrayidx1> (!alias.scope {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE ir<%2> = load ir<%y> (!alias.scope {{.*}})
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx4>
+; CHECK-NEXT:   store ir<%mul> to index 0
+; CHECK-NEXT:   store ir<%add> to index 1
+; CHECK-NEXT:   store ir<%div> to index 2
+; CHECK-NEXT:   store ir<%sub> to index 3
+; CHECK: Cost for VF 2: 102 (Estimated cost per lane: 51.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %1
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx4>
+; CHECK-NEXT:   store ir<%mul> to index 0
+; CHECK-NEXT:   store ir<%add> to index 1
+; CHECK-NEXT:   store ir<%div> to index 2
+; CHECK-NEXT:   store ir<%sub> to index 3
+; CHECK: Cost for VF 4: 108 (Estimated cost per lane: 27.0)
+; CHECK: LV: Selecting VF: 4.
 define hidden void @four_bytes_four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
   %cmp49.not = icmp eq i32 %N, 0
@@ -1990,15 +2603,29 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: four_floats_four_bytes_same_op
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: LV: Scalar loop costs: 28
-; CHECK: LV: Vector loop of width 2 costs: 48
-; CHECK: LV: Vector loop of width 4 costs: 31
-; CHECK: LV: Selecting VF: 1
+; CHECK: LV: Scalar loop costs: 28.
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv>, ir<%arrayidx3> (!alias.scope {{.*}}, !noalias {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv9>, ir<%y11> (!alias.scope {{.*}}, !noalias {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv16>, ir<%z18> (!alias.scope {{.*}}, !noalias {{.*}})
+; CHECK: Cost for VF 2: 96 (Estimated cost per lane: 48.0)
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %1
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%conv> to index 0
+; CHECK-NEXT:   store ir<%conv9> to index 1
+; CHECK-NEXT:   store ir<%conv16> to index 2
+; CHECK-NEXT:   store ir<%conv23> to index 3
+; CHECK: Cost for VF 4: 126 (Estimated cost per lane: 31.5)
+; CHECK: LV: Selecting VF: 1.
 define hidden void @four_floats_four_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
   %cmp48.not = icmp eq i32 %N, 0
@@ -2047,15 +2674,29 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: four_floats_four_bytes_vary_op
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: LV: Scalar loop costs: 28
-; CHECK: LV: Vector loop of width 2 costs: 48
-; CHECK: LV: Vector loop of width 4 costs: 31
-; CHECK: LV: Selecting VF: 1
+; CHECK: LV: Scalar loop costs: 28.
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv>, ir<%arrayidx3> (!alias.scope {{.*}}, !noalias {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv8>, ir<%y10> (!alias.scope {{.*}}, !noalias {{.*}})
+; CHECK: Cost of 6 for VF 2: REPLICATE store ir<%conv14>, ir<%z16> (!alias.scope {{.*}}, !noalias {{.*}})
+; CHECK: Cost for VF 2: 96 (Estimated cost per lane: 48.0)
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %1
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%conv> to index 0
+; CHECK-NEXT:   store ir<%conv8> to index 1
+; CHECK-NEXT:   store ir<%conv14> to index 2
+; CHECK-NEXT:   store ir<%conv20> to index 3
+; CHECK: Cost for VF 4: 126 (Estimated cost per lane: 31.5)
+; CHECK: LV: Selecting VF: 1.
 define hidden void @four_floats_four_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
   %cmp45.not = icmp eq i32 %N, 0
@@ -2104,16 +2745,32 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: four_shorts_four_floats_same_op
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: LV: Scalar loop costs: 32
-; CHECK: LV: Vector loop of width 2 costs: 41
-; CHECK: LV: Vector loop of width 4 costs: 25
-; CHECK: LV: Selecting VF: 4
+; CHECK: LV: Scalar loop costs: 32.
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %1
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx4>
+; CHECK-NEXT:   store ir<%mul> to index 0
+; CHECK-NEXT:   store ir<%mul11> to index 1
+; CHECK-NEXT:   store ir<%mul19> to index 2
+; CHECK-NEXT:   store ir<%mul27> to index 3
+; CHECK: Cost for VF 2: 82 (Estimated cost per lane: 41.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %1
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx4>
+; CHECK-NEXT:   store ir<%mul> to index 0
+; CHECK-NEXT:   store ir<%mul11> to index 1
+; CHECK-NEXT:   store ir<%mul19> to index 2
+; CHECK-NEXT:   store ir<%mul27> to index 3
+; CHECK: Cost for VF 4: 100 (Estimated cost per lane: 25.0)
+; CHECK: LV: Selecting VF: 4.
 define hidden void @four_shorts_four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
   %cmp52.not = icmp eq i32 %N, 0
@@ -2166,16 +2823,32 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: four_shorts_four_floats_vary_op
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: LV: Scalar loop costs: 32
-; CHECK: LV: Vector loop of width 2 costs: 41
-; CHECK: LV: Vector loop of width 4 costs: 25
-; CHECK: LV: Selecting VF: 4
+; CHECK: LV: Scalar loop costs: 32.
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %1
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx4>
+; CHECK-NEXT:   store ir<%mul> to index 0
+; CHECK-NEXT:   store ir<%add> to index 1
+; CHECK-NEXT:   store ir<%div> to index 2
+; CHECK-NEXT:   store ir<%sub> to index 3
+; CHECK: Cost for VF 2: 82 (Estimated cost per lane: 41.0)
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %1
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx4>
+; CHECK-NEXT:   store ir<%mul> to index 0
+; CHECK-NEXT:   store ir<%add> to index 1
+; CHECK-NEXT:   store ir<%div> to index 2
+; CHECK-NEXT:   store ir<%sub> to index 3
+; CHECK: Cost for VF 4: 100 (Estimated cost per lane: 25.0)
+; CHECK: LV: Selecting VF: 4.
 define hidden void @four_shorts_four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
   %cmp49.not = icmp eq i32 %N, 0
@@ -2228,16 +2901,32 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: four_floats_four_shorts_same_op
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: LV: Scalar loop costs: 28
-; CHECK: LV: Vector loop of width 2 costs: 41
-; CHECK: LV: Vector loop of width 4 costs: 29
-; CHECK: LV: Selecting VF: 1
+; CHECK: LV: Scalar loop costs: 28.
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %1
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%conv> to index 0
+; CHECK-NEXT:   store ir<%conv9> to index 1
+; CHECK-NEXT:   store ir<%conv16> to index 2
+; CHECK-NEXT:   store ir<%conv23> to index 3
+; CHECK: Cost for VF 2: 82 (Estimated cost per lane: 41.0)
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %1
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%conv> to index 0
+; CHECK-NEXT:   store ir<%conv9> to index 1
+; CHECK-NEXT:   store ir<%conv16> to index 2
+; CHECK-NEXT:   store ir<%conv23> to index 3
+; CHECK: Cost for VF 4: 118 (Estimated cost per lane: 29.5)
+; CHECK: LV: Selecting VF: 1.
 define hidden void @four_floats_four_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
   %cmp48.not = icmp eq i32 %N, 0
@@ -2286,16 +2975,32 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: four_floats_four_shorts_vary_op
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: LV: Scalar loop costs: 28
-; CHECK: LV: Vector loop of width 2 costs: 41
-; CHECK: LV: Vector loop of width 4 costs: 29
-; CHECK: LV: Selecting VF: 1
+; CHECK: LV: Scalar loop costs: 28.
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %1
+; CHECK: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%conv> to index 0
+; CHECK-NEXT:   store ir<%conv8> to index 1
+; CHECK-NEXT:   store ir<%conv14> to index 2
+; CHECK-NEXT:   store ir<%conv20> to index 3
+; CHECK: Cost for VF 2: 82 (Estimated cost per lane: 41.0)
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %0, ir<%arrayidx>
+; CHECK-NEXT:   ir<%0> = load from index 0
+; CHECK-NEXT:   ir<%2> = load from index 1
+; CHECK-NEXT:   ir<%4> = load from index 2
+; CHECK-NEXT:   ir<%6> = load from index 3
+; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %1
+; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx3>
+; CHECK-NEXT:   store ir<%conv> to index 0
+; CHECK-NEXT:   store ir<%conv8> to index 1
+; CHECK-NEXT:   store ir<%conv14> to index 2
+; CHECK-NEXT:   store ir<%conv20> to index 3
+; CHECK: Cost for VF 4: 118 (Estimated cost per lane: 29.5)
+; CHECK: LV: Selecting VF: 1.
 define hidden void @four_floats_four_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
 entry:
   %cmp45.not = icmp eq i32 %N, 0
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/handle-iptr-with-data-layout-to-not-assert.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/handle-iptr-with-data-layout-to-not-assert.ll
index d0d414a869636..eb87b420ff654 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/handle-iptr-with-data-layout-to-not-assert.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/handle-iptr-with-data-layout-to-not-assert.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9] for VF [0-9] For instruction:\s*store ptr %[0-9], ptr %__last" --filter "LV: Found an estimated cost of [0-9] for VF [0-9] For instruction:\s*store ptr %[0-9]" --version 5
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store ptr" --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at" --version 5
 ; REQUIRES: asserts
 ; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
 target triple = "x86_64-unknown-linux-gnu"
@@ -6,9 +6,12 @@ target triple = "x86_64-unknown-linux-gnu"
 define ptr @foo(ptr %__first, ptr %__last) #0 {
 ; CHECK-LABEL: 'foo'
 ; CHECK:  LV: Found an estimated cost of 1 for VF 1 For instruction: store ptr %0, ptr %__last, align 8
-; CHECK:  LV: Found an estimated cost of 2 for VF 2 For instruction: store ptr %0, ptr %__last, align 8
-; CHECK:  LV: Found an estimated cost of 3 for VF 4 For instruction: store ptr %0, ptr %__last, align 8
-; CHECK:  LV: Found an estimated cost of 3 for VF 8 For instruction: store ptr %0, ptr %__last, align 8
+; CHECK:  Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, vp<%next.gep>
+; CHECK:    ir<%0> = load from index 0
+; CHECK:  Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, vp<%next.gep>
+; CHECK:    ir<%0> = load from index 0
+; CHECK:  Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %0, vp<%next.gep>
+; CHECK:    ir<%0> = load from index 0
 ;
 entry:
   %cmp.not1 = icmp eq ptr %__first, %__last
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-2.ll
index 4fcbdcecc4790..5c43db13f12ca 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-2.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,39 +14,65 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
+; SSE2:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; SSE2:    ir<%v0> = load from index 0
+; SSE2:    ir<%v1> = load from index 1
+; SSE2:  Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; SSE2:    ir<%v0> = load from index 0
+; SSE2:    ir<%v1> = load from index 1
+; SSE2:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX1:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX1:    ir<%v0> = load from index 0
+; AVX1:    ir<%v1> = load from index 1
+; AVX1:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX1:    ir<%v0> = load from index 0
+; AVX1:    ir<%v1> = load from index 1
+; AVX1:  Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 12 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 24 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX2:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 22 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 92 for VF 64 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX512:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 22 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 92 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-3.ll
index a6f190bd5eb96..a90795399544f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-3.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-3.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,39 +14,68 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
+; SSE2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX1:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 44 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX2:  Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 10 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 20 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 44 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 12 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 51 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 210 for VF 64 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX512:  Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 51 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 210 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-4.ll
index 80a9ad64b8626..9fb4c01abe813 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-4.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-4.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,35 +14,74 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
+; SSE2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX1:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 40 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX2:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 10 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 20 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 40 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 84 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 8 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 22 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 92 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX512:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:  Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:  Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:  Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:  Cost of 92 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-5.ll
index d8aadbe04b72f..0be2dc2c43504 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-5.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-5.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load float, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,136 +14,59 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
+; SSE2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX1:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 35 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 145 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX512:  Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:  Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:  Cost of 18 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:  Cost of 35 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:  Cost of 145 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-6.ll
index c51d8e0a89298..6b64df947f103 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-6.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-6.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,36 +14,88 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
+; SSE2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX1:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 37 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 76 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX2:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 37 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 76 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 210 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX512:  Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:  Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:  Cost of 21 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:  Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:  Cost of 210 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-7.ll
index d8dc847535154..9f044950e99f0 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-7.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-7.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load float, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,179 +14,61 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
+; SSE2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
+; AVX1:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v6 = load float, ptr %in6, align 4
+; AVX2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 35 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 70 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
+; AVX512:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:  Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:  Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:  Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-8.ll
index 068d18badbb98..9c0cbfd45d415 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-8.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load float, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,187 +14,73 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load float, ptr %in7, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
-; SSE2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load float, ptr %in7, align 4
+; SSE2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 15 for VF 8 For instruction: %v7 = load float, ptr %in7, align 4
+; AVX1:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX2:  LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v7 = load float, ptr %in7, align 4
+; AVX2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:    ir<%v6> = load from index 6
+; AVX2:    ir<%v7> = load from index 7
+; AVX2:  Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX512:  LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX512:  LV: Found an estimated cost of 14 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX512:  LV: Found an estimated cost of 40 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX512:  LV: Found an estimated cost of 92 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load float, ptr %in7, align 4
+; AVX512:  Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:    ir<%v7> = load from index 7
+; AVX512:  Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:    ir<%v7> = load from index 7
+; AVX512:  Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:    ir<%v7> = load from index 7
+; AVX512:  Cost of 92 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:    ir<%v7> = load from index 7
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-2.ll
index b796772f6c580..b3cc8811542f5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-2.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,67 +14,55 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 12 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 24 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 24 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
+; SSE2:  Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; SSE2:    ir<%v0> = load from index 0
+; SSE2:    ir<%v1> = load from index 1
+; SSE2:  Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX1:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX1:    ir<%v0> = load from index 0
+; AVX1:    ir<%v1> = load from index 1
+; AVX1:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 24 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 48 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX2:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 22 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX512:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-3.ll
index 868a9c2ab62cf..c847f70e41b15 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-3.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-3.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,86 +14,57 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 12 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 12 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
+; SSE2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX1:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX2:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 16 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 32 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX512:  Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-4.ll
index 5cbad3d97019c..d88c58571c773 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-4.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-4.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,99 +14,60 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
+; SSE2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX1:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX2:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX512:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:  Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:  Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-5.ll
index d5d6be704b757..f0d72a118d2f2 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-5.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-5.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,126 +14,57 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
+; SSE2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX1:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 35 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX512:  Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:  Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:  Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-6.ll
index dba4cb412e307..d43a3b3746783 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-6.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-6.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,149 +14,80 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
+; SSE2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
+; AVX1:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8
+; AVX2:  Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 11 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 51 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8
+; AVX512:  Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:  Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:  Cost of 51 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-7.ll
index a5a6a1e155d50..0d3f3b708ce08 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-7.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-7.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,165 +14,67 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
+; SSE2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
+; AVX1:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v6 = load double, ptr %in6, align 8
+; AVX2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 70 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8
+; AVX512:  Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:  Cost of 35 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:  Cost of 70 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v6> = load ir<%in6>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v6> = load ir<%in6>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-8.ll
index 63b93ca66e184..f739aadf7e138 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-8.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,171 +14,79 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load double, ptr %in7, align 8
+; SSE2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX1:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load double, ptr %in7, align 8
+; AVX1:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 32 For instruction: %v7 = load double, ptr %in7, align 8
+; AVX2:  Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX512:  LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v7 = load double, ptr %in7, align 8
+; AVX512:  Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:    ir<%v7> = load from index 7
+; AVX512:  Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:    ir<%v7> = load from index 7
+; AVX512:  Cost of 10 for VF 8: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 10 for VF 8: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 10 for VF 8: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 10 for VF 8: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 10 for VF 8: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 10 for VF 8: WIDEN ir<%v5> = load ir<%in5>
+; AVX512:  Cost of 10 for VF 8: WIDEN ir<%v6> = load ir<%in6>
+; AVX512:  Cost of 10 for VF 8: WIDEN ir<%v7> = load ir<%in7>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v6> = load ir<%in6>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v7> = load ir<%in7>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v6> = load ir<%in6>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v7> = load ir<%in7>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v5> = load ir<%in5>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v6> = load ir<%in6>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v7> = load ir<%in7>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-half.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-half.ll
index ab767533f6e0d..796b34fd7c25b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-half.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-half.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF 32: INTERLEAVE-GROUP with factor [0-9]+ at %0," --version 5
 ; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize -mattr=avx512fp16 %s 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
@@ -9,9 +9,11 @@ target triple = "i386-unknown-linux-gnu"
 
 ; Function Attrs: norecurse nounwind
 define void @stride8(half %k, i32 %width_) {
+; CHECK-LABEL: 'stride8'
+; CHECK:  Cost of 148 for VF 32: INTERLEAVE-GROUP with factor 8 at %0, ir<%arrayidx>
+;
 entry:
 
-; CHECK: Cost of 148 for VF 32: INTERLEAVE-GROUP with factor 8 at %0, ir<%arrayidx>
 
   %cmp72 = icmp sgt i32 %width_, 0
   br i1 %cmp72, label %for.body.lr.ph, label %for.cond.cleanup
@@ -97,9 +99,11 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
 
 ; Function Attrs: norecurse nounwind
 define void @stride3(half %k, i32 %width_) {
+; CHECK-LABEL: 'stride3'
+; CHECK:  Cost of 18 for VF 32: INTERLEAVE-GROUP with factor 3 at %0, ir<%arrayidx>
+;
 entry:
 
-; CHECK: LV: Found an estimated cost of 18 for VF 32 For instruction: %0 = load half, ptr %arrayidx, align 4
 
   %cmp27 = icmp sgt i32 %width_, 0
   br i1 %cmp27, label %for.body.lr.ph, label %for.cond.cleanup
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-2.ll
index 19f2d23a22afc..d589c446f5330 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-2.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i16, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -15,49 +15,86 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
+; SSE2:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; SSE2:    ir<%v0> = load from index 0
+; SSE2:    ir<%v1> = load from index 1
+; SSE2:  Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; SSE2:    ir<%v0> = load from index 0
+; SSE2:    ir<%v1> = load from index 1
+; SSE2:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX1:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX1:    ir<%v0> = load from index 0
+; AVX1:    ir<%v1> = load from index 1
+; AVX1:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX1:    ir<%v0> = load from index 0
+; AVX1:    ir<%v1> = load from index 1
+; AVX1:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 7 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 11 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 22 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 11 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 22 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 7 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 10 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 20 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 284 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX512DQ:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:  Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:  Cost of 10 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:  Cost of 20 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:  Cost of 284 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
 ;
 ; AVX512BW-LABEL: 'test'
 ; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 7 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 34 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX512BW:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:  Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:  Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:  Cost of 7 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:  Cost of 34 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-3.ll
index f87927cffc3c9..222dac04dc019 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-3.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-3.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i16, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -15,49 +15,95 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
+; SSE2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 11 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 31 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 62 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 10 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 31 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 62 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 59 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 426 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX512DQ:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:  Cost of 10 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:  Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:  Cost of 30 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:  Cost of 59 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:  Cost of 426 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
 ;
 ; AVX512BW-LABEL: 'test'
 ; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 7 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 9 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 18 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 81 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX512BW:  Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:  Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:  Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:  Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:  Cost of 18 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:  Cost of 81 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-4.ll
index f2e102e434d9d..5d822e5824e0f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-4.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-4.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i16, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -15,49 +15,112 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
+; SSE2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 35 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 79 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 158 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2:  Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 79 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 158 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 34 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 77 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 154 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 568 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX512DQ:  Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:  Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:  Cost of 34 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:  Cost of 77 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:  Cost of 154 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:  Cost of 568 for VF 64: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
 ;
 ; AVX512BW-LABEL: 'test'
 ; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 9 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 12 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 34 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 148 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX512BW:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:  Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:  Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:  Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:  Cost of 34 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:  Cost of 148 for VF 64: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-5.ll
index 0124a86a1548c..3cdc803c73098 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-5.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-5.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i16, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -15,193 +15,104 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; SSE2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
+; SSE2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 25 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 175 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 355 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 710 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX512DQ:  Cost of 25 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:  Cost of 45 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:  Cost of 85 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:  Cost of 175 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:  Cost of 355 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:  Cost of 710 for VF 64: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
 ;
 ; AVX512BW-LABEL: 'test'
 ; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 11 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 55 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 235 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX512BW:  Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:  Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:  Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:  Cost of 28 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:  Cost of 55 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:  Cost of 235 for VF 64: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-6.ll
index 3417008ba5f9a..1bfe9f6d31769 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-6.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-6.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i16, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -15,49 +15,146 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
+; SSE2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 42 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 112 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 224 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2:  Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 42 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 224 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 41 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 109 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 218 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 852 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX512DQ:  Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:  Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:  Cost of 41 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:  Cost of 109 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:  Cost of 218 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:  Cost of 852 for VF 64: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
 ;
 ; AVX512BW-LABEL: 'test'
 ; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 13 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 13 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 81 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 342 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX512BW:  Cost of 13 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:  Cost of 13 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:  Cost of 17 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:  Cost of 33 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:  Cost of 81 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:  Cost of 342 for VF 64: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-7.ll
index 392a677c95e3e..3b313a83c05ce 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-7.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-7.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i16, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -15,265 +15,128 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
-; SSE2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
-; SSE2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
-; SSE2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
+; SSE2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX1:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX1:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
+; AVX2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 121 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 245 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 497 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 994 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i16, ptr %in6, align 2
+; AVX512DQ:  Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:  Cost of 64 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:  Cost of 121 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:  Cost of 245 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:  Cost of 497 for VF 32: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:  Cost of 994 for VF 64: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
 ;
 ; AVX512BW-LABEL: 'test'
 ; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512BW:  LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512BW:  LV: Found an estimated cost of 15 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512BW:  LV: Found an estimated cost of 19 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512BW:  LV: Found an estimated cost of 56 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512BW:  LV: Found an estimated cost of 112 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512BW:  LV: Found an estimated cost of 469 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i16, ptr %in6, align 2
+; AVX512BW:  Cost of 15 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:  Cost of 15 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:  Cost of 19 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:  Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:  Cost of 112 for VF 32: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:  Cost of 469 for VF 64: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-8.ll
index cbe9fc4b579b2..281639de7a6ab 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-8.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i16, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -15,269 +15,140 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
-; SSE2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2
+; SSE2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v7 = load i16, ptr %in7, align 2
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2:  LV: Found an estimated cost of 66 for VF 32 For instruction: %v7 = load i16, ptr %in7, align 2
+; AVX2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX512DQ:  LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX512DQ:  LV: Found an estimated cost of 68 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX512DQ:  LV: Found an estimated cost of 136 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX512DQ:  LV: Found an estimated cost of 280 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX512DQ:  LV: Found an estimated cost of 568 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i16, ptr %in7, align 2
+; AVX512DQ:  Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:    ir<%v7> = load from index 7
+; AVX512DQ:  Cost of 68 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:    ir<%v7> = load from index 7
+; AVX512DQ:  Cost of 136 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:    ir<%v7> = load from index 7
+; AVX512DQ:  Cost of 280 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:    ir<%v7> = load from index 7
+; AVX512DQ:  Cost of 568 for VF 32: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:    ir<%v7> = load from index 7
+; AVX512DQ:  Cost of 1136 for VF 64: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:    ir<%v7> = load from index 7
 ;
 ; AVX512BW-LABEL: 'test'
 ; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX512BW:  LV: Found an estimated cost of 17 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX512BW:  LV: Found an estimated cost of 17 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX512BW:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX512BW:  LV: Found an estimated cost of 64 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX512BW:  LV: Found an estimated cost of 148 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i16, ptr %in7, align 2
+; AVX512BW:  Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:    ir<%v7> = load from index 7
+; AVX512BW:  Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:    ir<%v7> = load from index 7
+; AVX512BW:  Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:    ir<%v7> = load from index 7
+; AVX512BW:  Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:    ir<%v7> = load from index 7
+; AVX512BW:  Cost of 148 for VF 32: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:    ir<%v7> = load from index 7
+; AVX512BW:  Cost of 616 for VF 64: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:    ir<%v7> = load from index 7
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2-indices-0u.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2-indices-0u.ll
index 2f01b40f1807a..ce92fccd3772e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2-indices-0u.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2-indices-0u.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,39 +14,50 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; SSE2:    ir<%v0> = load from index 0
+; SSE2:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; SSE2:    ir<%v0> = load from index 0
+; SSE2:  Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 2 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX1:    ir<%v0> = load from index 0
+; AVX1:  Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX1:    ir<%v0> = load from index 0
+; AVX1:  Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 2 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 16 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:  Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:  Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:  Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:  Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 2 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 13 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 50 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512:  Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:  Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:  Cost of 1 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:  Cost of 2 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:  Cost of 13 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:  Cost of 50 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2.ll
index 0f7fb04348cda..7c11092830f9f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,39 +14,65 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; SSE2:    ir<%v0> = load from index 0
+; SSE2:    ir<%v1> = load from index 1
+; SSE2:  Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; SSE2:    ir<%v0> = load from index 0
+; SSE2:    ir<%v1> = load from index 1
+; SSE2:  Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX1:    ir<%v0> = load from index 0
+; AVX1:    ir<%v1> = load from index 1
+; AVX1:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX1:    ir<%v0> = load from index 0
+; AVX1:    ir<%v1> = load from index 1
+; AVX1:  Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 12 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 24 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 22 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 92 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 22 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 92 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-01u.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-01u.ll
index 2a5d8860e005f..90915502dba2f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-01u.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-01u.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,39 +14,57 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 16 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 16 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 34 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 9 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 36 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 144 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 36 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 144 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-0uu.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-0uu.ll
index e1fe312f9b22b..f036e897ad20c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-0uu.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-0uu.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,39 +14,45 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 11 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 23 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:  Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:  Cost of 11 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:  Cost of 23 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 2 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 3 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 21 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 78 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512:  Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:  Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:  Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:  Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:  Cost of 21 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:  Cost of 78 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3.ll
index cee1dc84445b5..6d02b6504333d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,39 +14,68 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 44 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2:  Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 10 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 20 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 44 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 12 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 51 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 210 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512:  Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 51 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 210 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-012u.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-012u.ll
index 0f1e265a5c7de..bfbd85555db00 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-012u.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-012u.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,38 +14,64 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 67 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2:  Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 16 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 32 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 67 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 17 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 71 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512:  Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 17 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 71 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-01uu.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-01uu.ll
index 269bb2ee98b76..96e9a7a710fae 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-01uu.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-01uu.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i32, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,67 +14,54 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 44 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 24 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 50 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX2:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 50 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 13 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 50 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX512:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 50 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-0uuu.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-0uuu.ll
index 2a6c6407d9eff..0bdefbcb269bf 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-0uuu.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-0uuu.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,39 +14,45 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 16 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 33 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:  Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:  Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:  Cost of 16 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:  Cost of 33 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 2 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 29 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512:  Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:  Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:  Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:  Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:  Cost of 29 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4.ll
index 5289cfb6838dd..8fb210b0b2dbf 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,35 +14,74 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 40 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 10 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 20 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 40 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 84 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 8 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 22 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 92 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:  Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:  Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:  Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:  Cost of 92 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-5.ll
index 0b2f091221319..5b2f2d522563a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-5.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-5.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i32, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,136 +14,59 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 35 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 145 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX512:  Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:  Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:  Cost of 18 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:  Cost of 35 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:  Cost of 145 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-6.ll
index cf9b4799b2a2f..e0674c038ecf4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-6.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-6.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,36 +14,88 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 37 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 76 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 37 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 76 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 210 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512:  Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:  Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:  Cost of 21 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:  Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:  Cost of 210 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-7.ll
index 91bb334817c2d..754be0e80299f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-7.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-7.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i32, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,179 +14,61 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v6 = load i32, ptr %in6, align 4
+; AVX2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 35 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 70 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4
+; AVX512:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:  Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:  Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:  Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-8.ll
index 1c9d2edf0ef93..2ab06d6f82b9c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-8.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i32, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,187 +14,73 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v7 = load i32, ptr %in7, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v7 = load i32, ptr %in7, align 4
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v7 = load i32, ptr %in7, align 4
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX2:  LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 16 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2:  LV: Found an estimated cost of 68 for VF 32 For instruction: %v7 = load i32, ptr %in7, align 4
+; AVX2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:    ir<%v6> = load from index 6
+; AVX2:    ir<%v7> = load from index 7
+; AVX2:  Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX512:  LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX512:  LV: Found an estimated cost of 14 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX512:  LV: Found an estimated cost of 40 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX512:  LV: Found an estimated cost of 92 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i32, ptr %in7, align 4
+; AVX512:  Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:    ir<%v7> = load from index 7
+; AVX512:  Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:    ir<%v7> = load from index 7
+; AVX512:  Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:    ir<%v7> = load from index 7
+; AVX512:  Cost of 92 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:    ir<%v7> = load from index 7
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-2.ll
index 9a23a5923fd7e..52276bce225eb 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-2.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,67 +14,59 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 20 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 40 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 40 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
+; SSE2:  Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; SSE2:    ir<%v0> = load from index 0
+; SSE2:    ir<%v1> = load from index 1
+; SSE2:  Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX1:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX1:    ir<%v0> = load from index 0
+; AVX1:    ir<%v1> = load from index 1
+; AVX1:  Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 24 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 48 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX2:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 22 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX512:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-3.ll
index d8c64e3c17357..e842981174205 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-3.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-3.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,86 +14,57 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 20 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 20 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX2:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 16 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 32 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX512:  Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:  Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-4.ll
index 072220fb67e2a..328d0d6f8cef8 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-4.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-4.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,99 +14,72 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX2:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX512:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:  Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:  Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-5.ll
index d1a51ae3f779d..e534038b2e795 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-5.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-5.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,116 +14,62 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 35 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX512:  Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:  Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:  Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v4> = load ir<%in4>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-6.ll
index 16e2ad92a0565..8647841feeaab 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-6.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-6.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,137 +14,86 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
+; AVX2:  Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 11 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 51 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8
+; AVX512:  Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:  Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:  Cost of 51 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v5> = load ir<%in5>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-7.ll
index 73429492d47e8..972ebc51fdeec 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-7.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-7.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,165 +14,74 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v6 = load i64, ptr %in6, align 8
+; AVX2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 70 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8
+; AVX512:  Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:  Cost of 35 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:  Cost of 70 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v6> = load ir<%in6>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v6> = load ir<%in6>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v5> = load ir<%in5>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v6> = load ir<%in6>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-8.ll
index 6656fc58548f4..e166fd5296d4b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-8.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,171 +14,79 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v7 = load i64, ptr %in7, align 8
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v7 = load i64, ptr %in7, align 8
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: %v7 = load i64, ptr %in7, align 8
+; AVX2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX512:  LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: %v7 = load i64, ptr %in7, align 8
+; AVX512:  Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:    ir<%v7> = load from index 7
+; AVX512:  Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512:    ir<%v0> = load from index 0
+; AVX512:    ir<%v1> = load from index 1
+; AVX512:    ir<%v2> = load from index 2
+; AVX512:    ir<%v3> = load from index 3
+; AVX512:    ir<%v4> = load from index 4
+; AVX512:    ir<%v5> = load from index 5
+; AVX512:    ir<%v6> = load from index 6
+; AVX512:    ir<%v7> = load from index 7
+; AVX512:  Cost of 10 for VF 8: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 10 for VF 8: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 10 for VF 8: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 10 for VF 8: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 10 for VF 8: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 10 for VF 8: WIDEN ir<%v5> = load ir<%in5>
+; AVX512:  Cost of 10 for VF 8: WIDEN ir<%v6> = load ir<%in6>
+; AVX512:  Cost of 10 for VF 8: WIDEN ir<%v7> = load ir<%in7>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v6> = load ir<%in6>
+; AVX512:  Cost of 20 for VF 16: WIDEN ir<%v7> = load ir<%in7>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v6> = load ir<%in6>
+; AVX512:  Cost of 40 for VF 32: WIDEN ir<%v7> = load ir<%in7>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v4> = load ir<%in4>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v5> = load ir<%in5>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v6> = load ir<%in6>
+; AVX512:  Cost of 80 for VF 64: WIDEN ir<%v7> = load ir<%in7>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-2.ll
index b684bac7373a7..eafd91b4bf950 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-2.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i8, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -15,49 +15,78 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:  Cost of 8 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 7 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 270 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX512DQ:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:  Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:  Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:  Cost of 7 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:  Cost of 270 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
 ;
 ; AVX512BW-LABEL: 'test'
 ; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 9 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 17 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 41 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX512BW:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:  Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:  Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:  Cost of 17 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:  Cost of 41 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-3.ll
index 9ba44e2564d13..061f27cd8caee 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-3.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-3.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i8, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -15,49 +15,95 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 9 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 13 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 17 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2:  Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:  Cost of 17 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 9 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 14 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 16 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 405 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX512DQ:  Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:  Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:  Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:  Cost of 14 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:  Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:  Cost of 405 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
 ;
 ; AVX512BW-LABEL: 'test'
 ; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 13 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 13 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 16 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 25 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX512BW:  Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:  Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:  Cost of 13 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:  Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:  Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:  Cost of 25 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-4.ll
index 7e79d7e3fb716..3e15690abe167 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-4.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-4.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i8, ptr %in0"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -15,49 +15,112 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 13 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 26 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 13 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 26 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:  Cost of 60 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 13 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 25 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 58 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 540 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX512DQ:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:  Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:  Cost of 13 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:  Cost of 25 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:  Cost of 58 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:  Cost of 540 for VF 64: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
 ;
 ; AVX512BW-LABEL: 'test'
 ; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 80 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 238 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX512BW:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:  Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:  Cost of 17 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:  Cost of 33 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:  Cost of 80 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:  Cost of 238 for VF 64: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-5.ll
index cce8887ad2447..2778841dbd3a1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-5.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-5.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i8, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -15,193 +15,104 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 335 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 675 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX512DQ:  Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:  Cost of 45 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:  Cost of 85 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:  Cost of 165 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:  Cost of 335 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:  Cost of 675 for VF 64: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
 ;
 ; AVX512BW-LABEL: 'test'
 ; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 41 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 99 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 198 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 395 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX512BW:  Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:  Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:  Cost of 41 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:  Cost of 99 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:  Cost of 198 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:  Cost of 395 for VF 64: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-6.ll
index 259fb8d2c430f..281628dd5966d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-6.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-6.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i8, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -15,229 +15,146 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 17 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 46 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 88 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
+; AVX2:  Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 20 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 46 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
+; AVX2:  Cost of 88 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX2:    ir<%v0> = load from index 0
+; AVX2:    ir<%v1> = load from index 1
+; AVX2:    ir<%v2> = load from index 2
+; AVX2:    ir<%v3> = load from index 3
+; AVX2:    ir<%v4> = load from index 4
+; AVX2:    ir<%v5> = load from index 5
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 17 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 45 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 85 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 810 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1
+; AVX512DQ:  Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:  Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:  Cost of 21 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:  Cost of 45 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:  Cost of 85 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:  Cost of 810 for VF 64: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
 ;
 ; AVX512BW-LABEL: 'test'
 ; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 25 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 49 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 119 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 237 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 591 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1
+; AVX512BW:  Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:  Cost of 25 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:  Cost of 49 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:  Cost of 119 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:  Cost of 237 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:  Cost of 591 for VF 64: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-7.ll
index e8a2637b9f5b4..efc6704e4785a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-7.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-7.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i8, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -15,265 +15,128 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
+; AVX2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 233 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 469 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 945 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i8, ptr %in6, align 1
+; AVX512DQ:  Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:  Cost of 62 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:  Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:  Cost of 233 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:  Cost of 469 for VF 32: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:  Cost of 945 for VF 64: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
 ;
 ; AVX512BW-LABEL: 'test'
 ; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512BW:  LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512BW:  LV: Found an estimated cost of 29 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512BW:  LV: Found an estimated cost of 57 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512BW:  LV: Found an estimated cost of 138 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512BW:  LV: Found an estimated cost of 413 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512BW:  LV: Found an estimated cost of 826 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i8, ptr %in6, align 1
+; AVX512BW:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:  Cost of 29 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:  Cost of 57 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:  Cost of 138 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:  Cost of 413 for VF 32: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:  Cost of 826 for VF 64: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-8.ll
index da19a9a9faed2..dc06af6263551 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-8.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i8, ptr %in."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -15,301 +15,140 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
-; SSE2:  LV: Found an estimated cost of 47 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1
+; SSE2:  Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; SSE2:  Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX1:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v7 = load i8, ptr %in7, align 1
+; AVX1:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX1:  Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2:  LV: Found an estimated cost of 65 for VF 32 For instruction: %v7 = load i8, ptr %in7, align 1
+; AVX2:  Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
+; AVX2:  Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
 ;
 ; AVX512DQ-LABEL: 'test'
 ; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX512DQ:  LV: Found an estimated cost of 33 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX512DQ:  LV: Found an estimated cost of 66 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX512DQ:  LV: Found an estimated cost of 132 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX512DQ:  LV: Found an estimated cost of 264 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX512DQ:  LV: Found an estimated cost of 536 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1080 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v7 = load i8, ptr %in7, align 1
+; AVX512DQ:  Cost of 33 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:    ir<%v7> = load from index 7
+; AVX512DQ:  Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:    ir<%v7> = load from index 7
+; AVX512DQ:  Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:    ir<%v7> = load from index 7
+; AVX512DQ:  Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:    ir<%v7> = load from index 7
+; AVX512DQ:  Cost of 536 for VF 32: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:    ir<%v7> = load from index 7
+; AVX512DQ:  Cost of 1080 for VF 64: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512DQ:    ir<%v0> = load from index 0
+; AVX512DQ:    ir<%v1> = load from index 1
+; AVX512DQ:    ir<%v2> = load from index 2
+; AVX512DQ:    ir<%v3> = load from index 3
+; AVX512DQ:    ir<%v4> = load from index 4
+; AVX512DQ:    ir<%v5> = load from index 5
+; AVX512DQ:    ir<%v6> = load from index 6
+; AVX512DQ:    ir<%v7> = load from index 7
 ;
 ; AVX512BW-LABEL: 'test'
 ; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX512BW:  LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX512BW:  LV: Found an estimated cost of 33 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX512BW:  LV: Found an estimated cost of 65 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX512BW:  LV: Found an estimated cost of 158 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX512BW:  LV: Found an estimated cost of 472 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX512BW:  LV: Found an estimated cost of 1100 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX512BW:  LV: Found an estimated cost of 0 for VF 64 For instruction: %v7 = load i8, ptr %in7, align 1
+; AVX512BW:  Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:    ir<%v7> = load from index 7
+; AVX512BW:  Cost of 33 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:    ir<%v7> = load from index 7
+; AVX512BW:  Cost of 65 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:    ir<%v7> = load from index 7
+; AVX512BW:  Cost of 158 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:    ir<%v7> = load from index 7
+; AVX512BW:  Cost of 472 for VF 32: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:    ir<%v7> = load from index 7
+; AVX512BW:  Cost of 1100 for VF 64: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
+; AVX512BW:    ir<%v0> = load from index 0
+; AVX512BW:    ir<%v1> = load from index 1
+; AVX512BW:    ir<%v2> = load from index 2
+; AVX512BW:    ir<%v3> = load from index 3
+; AVX512BW:    ir<%v4> = load from index 4
+; AVX512BW:    ir<%v5> = load from index 5
+; AVX512BW:    ir<%v6> = load from index 6
+; AVX512BW:    ir<%v7> = load from index 7
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-2.ll
index 85ab5b85c9958..d73dc5e0134d6 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-2.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v1, ptr %out1"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,40 +13,72 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
-; SSE2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, ptr %out1, align 4
-; SSE2:  LV: Found an estimated cost of 14 for VF 4 For instruction: store float %v1, ptr %out1, align 4
-; SSE2:  LV: Found an estimated cost of 28 for VF 8 For instruction: store float %v1, ptr %out1, align 4
-; SSE2:  LV: Found an estimated cost of 56 for VF 16 For instruction: store float %v1, ptr %out1, align 4
+; SSE2:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
-; AVX1:  LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, ptr %out1, align 4
-; AVX1:  LV: Found an estimated cost of 14 for VF 4 For instruction: store float %v1, ptr %out1, align 4
-; AVX1:  LV: Found an estimated cost of 30 for VF 8 For instruction: store float %v1, ptr %out1, align 4
-; AVX1:  LV: Found an estimated cost of 60 for VF 16 For instruction: store float %v1, ptr %out1, align 4
-; AVX1:  LV: Found an estimated cost of 120 for VF 32 For instruction: store float %v1, ptr %out1, align 4
+; AVX1:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 30 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 60 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 120 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
-; AVX2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, ptr %out1, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 4 For instruction: store float %v1, ptr %out1, align 4
-; AVX2:  LV: Found an estimated cost of 6 for VF 8 For instruction: store float %v1, ptr %out1, align 4
-; AVX2:  LV: Found an estimated cost of 12 for VF 16 For instruction: store float %v1, ptr %out1, align 4
-; AVX2:  LV: Found an estimated cost of 24 for VF 32 For instruction: store float %v1, ptr %out1, align 4
+; AVX2:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
-; AVX512:  LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, ptr %out1, align 4
-; AVX512:  LV: Found an estimated cost of 2 for VF 4 For instruction: store float %v1, ptr %out1, align 4
-; AVX512:  LV: Found an estimated cost of 2 for VF 8 For instruction: store float %v1, ptr %out1, align 4
-; AVX512:  LV: Found an estimated cost of 5 for VF 16 For instruction: store float %v1, ptr %out1, align 4
-; AVX512:  LV: Found an estimated cost of 10 for VF 32 For instruction: store float %v1, ptr %out1, align 4
-; AVX512:  LV: Found an estimated cost of 20 for VF 64 For instruction: store float %v1, ptr %out1, align 4
+; AVX512:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 10 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 20 for VF 64: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-3.ll
index 77abfc65a3e59..d372ab153784a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-3.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-3.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v2, ptr %out2"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,40 +13,92 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 2 For instruction: store float %v2, ptr %out2, align 4
-; SSE2:  LV: Found an estimated cost of 24 for VF 4 For instruction: store float %v2, ptr %out2, align 4
-; SSE2:  LV: Found an estimated cost of 48 for VF 8 For instruction: store float %v2, ptr %out2, align 4
-; SSE2:  LV: Found an estimated cost of 96 for VF 16 For instruction: store float %v2, ptr %out2, align 4
+; SSE2:  Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
-; AVX1:  LV: Found an estimated cost of 12 for VF 2 For instruction: store float %v2, ptr %out2, align 4
-; AVX1:  LV: Found an estimated cost of 22 for VF 4 For instruction: store float %v2, ptr %out2, align 4
-; AVX1:  LV: Found an estimated cost of 45 for VF 8 For instruction: store float %v2, ptr %out2, align 4
-; AVX1:  LV: Found an estimated cost of 90 for VF 16 For instruction: store float %v2, ptr %out2, align 4
-; AVX1:  LV: Found an estimated cost of 180 for VF 32 For instruction: store float %v2, ptr %out2, align 4
+; AVX1:  Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 22 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 45 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 90 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 180 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 2 For instruction: store float %v2, ptr %out2, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: store float %v2, ptr %out2, align 4
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: store float %v2, ptr %out2, align 4
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: store float %v2, ptr %out2, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: store float %v2, ptr %out2, align 4
+; AVX2:  Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 28 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 60 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
-; AVX512:  LV: Found an estimated cost of 4 for VF 2 For instruction: store float %v2, ptr %out2, align 4
-; AVX512:  LV: Found an estimated cost of 4 for VF 4 For instruction: store float %v2, ptr %out2, align 4
-; AVX512:  LV: Found an estimated cost of 8 for VF 8 For instruction: store float %v2, ptr %out2, align 4
-; AVX512:  LV: Found an estimated cost of 12 for VF 16 For instruction: store float %v2, ptr %out2, align 4
-; AVX512:  LV: Found an estimated cost of 24 for VF 32 For instruction: store float %v2, ptr %out2, align 4
-; AVX512:  LV: Found an estimated cost of 48 for VF 64 For instruction: store float %v2, ptr %out2, align 4
+; AVX512:  Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 48 for VF 64: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-4.ll
index 699c2eb63cb31..f2f7107cc1cc7 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-4.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-4.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v3, ptr %out3"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,40 +13,112 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
-; SSE2:  LV: Found an estimated cost of 12 for VF 2 For instruction: store float %v3, ptr %out3, align 4
-; SSE2:  LV: Found an estimated cost of 28 for VF 4 For instruction: store float %v3, ptr %out3, align 4
-; SSE2:  LV: Found an estimated cost of 56 for VF 8 For instruction: store float %v3, ptr %out3, align 4
-; SSE2:  LV: Found an estimated cost of 112 for VF 16 For instruction: store float %v3, ptr %out3, align 4
+; SSE2:  Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
-; AVX1:  LV: Found an estimated cost of 12 for VF 2 For instruction: store float %v3, ptr %out3, align 4
-; AVX1:  LV: Found an estimated cost of 28 for VF 4 For instruction: store float %v3, ptr %out3, align 4
-; AVX1:  LV: Found an estimated cost of 60 for VF 8 For instruction: store float %v3, ptr %out3, align 4
-; AVX1:  LV: Found an estimated cost of 120 for VF 16 For instruction: store float %v3, ptr %out3, align 4
-; AVX1:  LV: Found an estimated cost of 240 for VF 32 For instruction: store float %v3, ptr %out3, align 4
+; AVX1:  Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 60 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 120 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 240 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
-; AVX2:  LV: Found an estimated cost of 6 for VF 2 For instruction: store float %v3, ptr %out3, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: store float %v3, ptr %out3, align 4
-; AVX2:  LV: Found an estimated cost of 20 for VF 8 For instruction: store float %v3, ptr %out3, align 4
-; AVX2:  LV: Found an estimated cost of 40 for VF 16 For instruction: store float %v3, ptr %out3, align 4
-; AVX2:  LV: Found an estimated cost of 80 for VF 32 For instruction: store float %v3, ptr %out3, align 4
+; AVX2:  Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 20 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 40 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 80 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
-; AVX512:  LV: Found an estimated cost of 5 for VF 2 For instruction: store float %v3, ptr %out3, align 4
-; AVX512:  LV: Found an estimated cost of 5 for VF 4 For instruction: store float %v3, ptr %out3, align 4
-; AVX512:  LV: Found an estimated cost of 11 for VF 8 For instruction: store float %v3, ptr %out3, align 4
-; AVX512:  LV: Found an estimated cost of 22 for VF 16 For instruction: store float %v3, ptr %out3, align 4
-; AVX512:  LV: Found an estimated cost of 44 for VF 32 For instruction: store float %v3, ptr %out3, align 4
-; AVX512:  LV: Found an estimated cost of 88 for VF 64 For instruction: store float %v3, ptr %out3, align 4
+; AVX512:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 44 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 88 for VF 64: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-5.ll
index 86758b5a24fe9..5e29f47acaf35 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-5.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-5.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v4, ptr %out4"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,37 +13,132 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
-; SSE2:  LV: Found an estimated cost of 20 for VF 2 For instruction: store float %v4, ptr %out4, align 4
-; SSE2:  LV: Found an estimated cost of 44 for VF 4 For instruction: store float %v4, ptr %out4, align 4
-; SSE2:  LV: Found an estimated cost of 88 for VF 8 For instruction: store float %v4, ptr %out4, align 4
+; SSE2:  Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
-; AVX1:  LV: Found an estimated cost of 16 for VF 2 For instruction: store float %v4, ptr %out4, align 4
-; AVX1:  LV: Found an estimated cost of 36 for VF 4 For instruction: store float %v4, ptr %out4, align 4
-; AVX1:  LV: Found an estimated cost of 75 for VF 8 For instruction: store float %v4, ptr %out4, align 4
-; AVX1:  LV: Found an estimated cost of 150 for VF 16 For instruction: store float %v4, ptr %out4, align 4
+; AVX1:  Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 75 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 150 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 300 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
-; AVX2:  LV: Found an estimated cost of 16 for VF 2 For instruction: store float %v4, ptr %out4, align 4
-; AVX2:  LV: Found an estimated cost of 36 for VF 4 For instruction: store float %v4, ptr %out4, align 4
-; AVX2:  LV: Found an estimated cost of 75 for VF 8 For instruction: store float %v4, ptr %out4, align 4
-; AVX2:  LV: Found an estimated cost of 150 for VF 16 For instruction: store float %v4, ptr %out4, align 4
+; AVX2:  Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 75 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 150 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 300 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
-; AVX512:  LV: Found an estimated cost of 7 for VF 2 For instruction: store float %v4, ptr %out4, align 4
-; AVX512:  LV: Found an estimated cost of 14 for VF 4 For instruction: store float %v4, ptr %out4, align 4
-; AVX512:  LV: Found an estimated cost of 21 for VF 8 For instruction: store float %v4, ptr %out4, align 4
-; AVX512:  LV: Found an estimated cost of 35 for VF 16 For instruction: store float %v4, ptr %out4, align 4
-; AVX512:  LV: Found an estimated cost of 70 for VF 32 For instruction: store float %v4, ptr %out4, align 4
-; AVX512:  LV: Found an estimated cost of 140 for VF 64 For instruction: store float %v4, ptr %out4, align 4
+; AVX512:  Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 21 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 35 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 70 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 140 for VF 64: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-6.ll
index 53c8f59491e76..c003c1314575e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-6.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-6.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v5, ptr %out5"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,37 +13,152 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
-; SSE2:  LV: Found an estimated cost of 21 for VF 2 For instruction: store float %v5, ptr %out5, align 4
-; SSE2:  LV: Found an estimated cost of 48 for VF 4 For instruction: store float %v5, ptr %out5, align 4
-; SSE2:  LV: Found an estimated cost of 96 for VF 8 For instruction: store float %v5, ptr %out5, align 4
+; SSE2:  Cost of 21 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 48 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 96 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 192 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
-; AVX1:  LV: Found an estimated cost of 19 for VF 2 For instruction: store float %v5, ptr %out5, align 4
-; AVX1:  LV: Found an estimated cost of 42 for VF 4 For instruction: store float %v5, ptr %out5, align 4
-; AVX1:  LV: Found an estimated cost of 90 for VF 8 For instruction: store float %v5, ptr %out5, align 4
-; AVX1:  LV: Found an estimated cost of 180 for VF 16 For instruction: store float %v5, ptr %out5, align 4
+; AVX1:  Cost of 19 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 42 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 90 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 180 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 360 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
-; AVX2:  LV: Found an estimated cost of 11 for VF 2 For instruction: store float %v5, ptr %out5, align 4
-; AVX2:  LV: Found an estimated cost of 15 for VF 4 For instruction: store float %v5, ptr %out5, align 4
-; AVX2:  LV: Found an estimated cost of 39 for VF 8 For instruction: store float %v5, ptr %out5, align 4
-; AVX2:  LV: Found an estimated cost of 78 for VF 16 For instruction: store float %v5, ptr %out5, align 4
+; AVX2:  Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 15 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 39 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 78 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 360 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
-; AVX512:  LV: Found an estimated cost of 8 for VF 2 For instruction: store float %v5, ptr %out5, align 4
-; AVX512:  LV: Found an estimated cost of 17 for VF 4 For instruction: store float %v5, ptr %out5, align 4
-; AVX512:  LV: Found an estimated cost of 25 for VF 8 For instruction: store float %v5, ptr %out5, align 4
-; AVX512:  LV: Found an estimated cost of 51 for VF 16 For instruction: store float %v5, ptr %out5, align 4
-; AVX512:  LV: Found an estimated cost of 102 for VF 32 For instruction: store float %v5, ptr %out5, align 4
-; AVX512:  LV: Found an estimated cost of 204 for VF 64 For instruction: store float %v5, ptr %out5, align 4
+; AVX512:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 25 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 102 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 204 for VF 64: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-7.ll
index 244bceb69f97d..d48a3409f9b3e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-7.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-7.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v6, ptr %out6"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,36 +13,172 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
-; SSE2:  LV: Found an estimated cost of 23 for VF 2 For instruction: store float %v6, ptr %out6, align 4
-; SSE2:  LV: Found an estimated cost of 52 for VF 4 For instruction: store float %v6, ptr %out6, align 4
-; SSE2:  LV: Found an estimated cost of 104 for VF 8 For instruction: store float %v6, ptr %out6, align 4
+; SSE2:  Cost of 23 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 52 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 104 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 208 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
-; AVX1:  LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v6, ptr %out6, align 4
-; AVX1:  LV: Found an estimated cost of 50 for VF 4 For instruction: store float %v6, ptr %out6, align 4
-; AVX1:  LV: Found an estimated cost of 105 for VF 8 For instruction: store float %v6, ptr %out6, align 4
-; AVX1:  LV: Found an estimated cost of 210 for VF 16 For instruction: store float %v6, ptr %out6, align 4
+; AVX1:  Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 50 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 105 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 210 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 420 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
-; AVX2:  LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v6, ptr %out6, align 4
-; AVX2:  LV: Found an estimated cost of 50 for VF 4 For instruction: store float %v6, ptr %out6, align 4
-; AVX2:  LV: Found an estimated cost of 105 for VF 8 For instruction: store float %v6, ptr %out6, align 4
-; AVX2:  LV: Found an estimated cost of 210 for VF 16 For instruction: store float %v6, ptr %out6, align 4
+; AVX2:  Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 50 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 105 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 210 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 420 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
-; AVX512:  LV: Found an estimated cost of 10 for VF 2 For instruction: store float %v6, ptr %out6, align 4
-; AVX512:  LV: Found an estimated cost of 20 for VF 4 For instruction: store float %v6, ptr %out6, align 4
-; AVX512:  LV: Found an estimated cost of 40 for VF 8 For instruction: store float %v6, ptr %out6, align 4
-; AVX512:  LV: Found an estimated cost of 70 for VF 16 For instruction: store float %v6, ptr %out6, align 4
-; AVX512:  LV: Found an estimated cost of 140 for VF 32 For instruction: store float %v6, ptr %out6, align 4
+; AVX512:  Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 140 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 280 for VF 64: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-8.ll
index c1fe64e324378..117966d4dbc39 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-8.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v7, ptr %out7"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,36 +13,192 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
-; SSE2:  LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v7, ptr %out7, align 4
-; SSE2:  LV: Found an estimated cost of 56 for VF 4 For instruction: store float %v7, ptr %out7, align 4
-; SSE2:  LV: Found an estimated cost of 112 for VF 8 For instruction: store float %v7, ptr %out7, align 4
+; SSE2:  Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
+; SSE2:  Cost of 56 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
+; SSE2:  Cost of 112 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
+; SSE2:  Cost of 224 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
-; AVX1:  LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v7, ptr %out7, align 4
-; AVX1:  LV: Found an estimated cost of 56 for VF 4 For instruction: store float %v7, ptr %out7, align 4
-; AVX1:  LV: Found an estimated cost of 120 for VF 8 For instruction: store float %v7, ptr %out7, align 4
-; AVX1:  LV: Found an estimated cost of 240 for VF 16 For instruction: store float %v7, ptr %out7, align 4
+; AVX1:  Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 56 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 240 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 480 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
-; AVX2:  LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v7, ptr %out7, align 4
-; AVX2:  LV: Found an estimated cost of 56 for VF 4 For instruction: store float %v7, ptr %out7, align 4
-; AVX2:  LV: Found an estimated cost of 120 for VF 8 For instruction: store float %v7, ptr %out7, align 4
-; AVX2:  LV: Found an estimated cost of 240 for VF 16 For instruction: store float %v7, ptr %out7, align 4
+; AVX2:  Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 56 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 240 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 480 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
-; AVX512:  LV: Found an estimated cost of 11 for VF 2 For instruction: store float %v7, ptr %out7, align 4
-; AVX512:  LV: Found an estimated cost of 23 for VF 4 For instruction: store float %v7, ptr %out7, align 4
-; AVX512:  LV: Found an estimated cost of 46 for VF 8 For instruction: store float %v7, ptr %out7, align 4
-; AVX512:  LV: Found an estimated cost of 92 for VF 16 For instruction: store float %v7, ptr %out7, align 4
-; AVX512:  LV: Found an estimated cost of 184 for VF 32 For instruction: store float %v7, ptr %out7, align 4
+; AVX512:  Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:    store ir<%v7> to index 7
+; AVX512:  Cost of 23 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:    store ir<%v7> to index 7
+; AVX512:  Cost of 46 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:    store ir<%v7> to index 7
+; AVX512:  Cost of 92 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:    store ir<%v7> to index 7
+; AVX512:  Cost of 184 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:    store ir<%v7> to index 7
+; AVX512:  Cost of 368 for VF 64: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:    store ir<%v7> to index 7
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-2.ll
index dd6094e4a7d5c..c0e32fb5dee91 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-2.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v1, ptr %out1"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,40 +13,72 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 6 for VF 2 For instruction: store double %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 12 for VF 4 For instruction: store double %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 24 for VF 8 For instruction: store double %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 48 for VF 16 For instruction: store double %v1, ptr %out1, align 8
+; SSE2:  Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 24 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 48 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 6 for VF 2 For instruction: store double %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 14 for VF 4 For instruction: store double %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 28 for VF 8 For instruction: store double %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 56 for VF 16 For instruction: store double %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 112 for VF 32 For instruction: store double %v1, ptr %out1, align 8
+; AVX1:  Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 112 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: store double %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 6 for VF 4 For instruction: store double %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 12 for VF 8 For instruction: store double %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 24 for VF 16 For instruction: store double %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 48 for VF 32 For instruction: store double %v1, ptr %out1, align 8
+; AVX2:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 2 for VF 2 For instruction: store double %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 2 for VF 4 For instruction: store double %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 5 for VF 8 For instruction: store double %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 16 For instruction: store double %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 32 For instruction: store double %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 64 For instruction: store double %v1, ptr %out1, align 8
+; AVX512:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 10 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 20 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 40 for VF 64: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-3.ll
index 3f4216bb3a1ef..2c24bd2095c89 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-3.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-3.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v2, ptr %out2"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,37 +13,92 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 2 For instruction: store double %v2, ptr %out2, align 8
-; SSE2:  LV: Found an estimated cost of 20 for VF 4 For instruction: store double %v2, ptr %out2, align 8
-; SSE2:  LV: Found an estimated cost of 40 for VF 8 For instruction: store double %v2, ptr %out2, align 8
+; SSE2:  Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 80 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 11 for VF 2 For instruction: store double %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 24 for VF 4 For instruction: store double %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 48 for VF 8 For instruction: store double %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 96 for VF 16 For instruction: store double %v2, ptr %out2, align 8
+; AVX1:  Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 192 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 6 for VF 2 For instruction: store double %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: store double %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: store double %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: store double %v2, ptr %out2, align 8
+; AVX2:  Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 18 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 36 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 192 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 4 for VF 2 For instruction: store double %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 8 for VF 4 For instruction: store double %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 12 for VF 8 For instruction: store double %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 24 for VF 16 For instruction: store double %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 48 for VF 32 For instruction: store double %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 96 for VF 64 For instruction: store double %v2, ptr %out2, align 8
+; AVX512:  Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 96 for VF 64: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-4.ll
index 44f9a14424cda..8b7ec565f1dde 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-4.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-4.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v3, ptr %out3"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,36 +13,112 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
-; SSE2:  LV: Found an estimated cost of 12 for VF 2 For instruction: store double %v3, ptr %out3, align 8
-; SSE2:  LV: Found an estimated cost of 24 for VF 4 For instruction: store double %v3, ptr %out3, align 8
-; SSE2:  LV: Found an estimated cost of 48 for VF 8 For instruction: store double %v3, ptr %out3, align 8
+; SSE2:  Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 12 for VF 2 For instruction: store double %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 28 for VF 4 For instruction: store double %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 56 for VF 8 For instruction: store double %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 112 for VF 16 For instruction: store double %v3, ptr %out3, align 8
+; AVX1:  Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 224 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 8 for VF 2 For instruction: store double %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 12 for VF 4 For instruction: store double %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 8 For instruction: store double %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 16 For instruction: store double %v3, ptr %out3, align 8
+; AVX2:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 224 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 5 for VF 2 For instruction: store double %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 11 for VF 4 For instruction: store double %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 22 for VF 8 For instruction: store double %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 44 for VF 16 For instruction: store double %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 88 for VF 32 For instruction: store double %v3, ptr %out3, align 8
+; AVX512:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 44 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 88 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 176 for VF 64: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-5.ll
index 5946c4d7df295..14216fca2fc90 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-5.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-5.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v4, ptr %out4"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,33 +13,132 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
-; SSE2:  LV: Found an estimated cost of 18 for VF 2 For instruction: store double %v4, ptr %out4, align 8
-; SSE2:  LV: Found an estimated cost of 36 for VF 4 For instruction: store double %v4, ptr %out4, align 8
+; SSE2:  Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 72 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 144 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 44 for VF 4 For instruction: store double %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 88 for VF 8 For instruction: store double %v4, ptr %out4, align 8
+; AVX1:  Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 352 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 44 for VF 4 For instruction: store double %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 88 for VF 8 For instruction: store double %v4, ptr %out4, align 8
+; AVX2:  Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 352 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 14 for VF 2 For instruction: store double %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 21 for VF 4 For instruction: store double %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 35 for VF 8 For instruction: store double %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 70 for VF 16 For instruction: store double %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 140 for VF 32 For instruction: store double %v4, ptr %out4, align 8
+; AVX512:  Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 140 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 280 for VF 64: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-6.ll
index e9c97283fab18..f15e3ffc88103 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-6.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-6.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v5, ptr %out5"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,33 +13,152 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
-; SSE2:  LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v5, ptr %out5, align 8
-; SSE2:  LV: Found an estimated cost of 40 for VF 4 For instruction: store double %v5, ptr %out5, align 8
+; SSE2:  Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 80 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 160 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 21 for VF 2 For instruction: store double %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 48 for VF 4 For instruction: store double %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 96 for VF 8 For instruction: store double %v5, ptr %out5, align 8
+; AVX1:  Cost of 21 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 48 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 96 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 192 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 384 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 11 for VF 2 For instruction: store double %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 21 for VF 4 For instruction: store double %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 42 for VF 8 For instruction: store double %v5, ptr %out5, align 8
+; AVX2:  Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 42 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 192 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 384 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 17 for VF 2 For instruction: store double %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 25 for VF 4 For instruction: store double %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 51 for VF 8 For instruction: store double %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 102 for VF 16 For instruction: store double %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 204 for VF 32 For instruction: store double %v5, ptr %out5, align 8
+; AVX512:  Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 25 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 51 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 102 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 204 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 408 for VF 64: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-7.ll
index 555bbe8e44269..763c95910162a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-7.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-7.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v6, ptr %out6"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,33 +13,172 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
-; SSE2:  LV: Found an estimated cost of 22 for VF 2 For instruction: store double %v6, ptr %out6, align 8
-; SSE2:  LV: Found an estimated cost of 44 for VF 4 For instruction: store double %v6, ptr %out6, align 8
+; SSE2:  Cost of 22 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v0> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 23 for VF 2 For instruction: store double %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 52 for VF 4 For instruction: store double %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 104 for VF 8 For instruction: store double %v6, ptr %out6, align 8
+; AVX1:  Cost of 23 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 52 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 104 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 208 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 416 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v0> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 23 for VF 2 For instruction: store double %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 52 for VF 4 For instruction: store double %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 104 for VF 8 For instruction: store double %v6, ptr %out6, align 8
+; AVX2:  Cost of 23 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 52 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 104 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 208 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 416 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v0> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 4 For instruction: store double %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 70 for VF 8 For instruction: store double %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 140 for VF 16 For instruction: store double %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 280 for VF 32 For instruction: store double %v6, ptr %out6, align 8
+; AVX512:  Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 70 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 140 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 280 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 560 for VF 64: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v0> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-8.ll
index a11d86ce14ef7..90c4333ee7597 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-8.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v., ptr %out."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store double %v\., ptr %out" --filter "Cost of [0-9]+ for VF [0-9]+: (WIDEN store|REPLICATE store ir<%v\.>)"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,164 +13,41 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v0, ptr %out0, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v2, ptr %out2, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v3, ptr %out3, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v4, ptr %out4, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v5, ptr %out5, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v6, ptr %out6, align 8
-; SSE2:  LV: Found an estimated cost of 24 for VF 2 For instruction: store double %v7, ptr %out7, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v0, ptr %out0, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v2, ptr %out2, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v3, ptr %out3, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v4, ptr %out4, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v5, ptr %out5, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v6, ptr %out6, align 8
-; SSE2:  LV: Found an estimated cost of 48 for VF 4 For instruction: store double %v7, ptr %out7, align 8
-;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v0, ptr %out0, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 24 for VF 2 For instruction: store double %v7, ptr %out7, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v0, ptr %out0, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 56 for VF 4 For instruction: store double %v7, ptr %out7, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v0, ptr %out0, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 112 for VF 8 For instruction: store double %v7, ptr %out7, align 8
-;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v0, ptr %out0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 24 for VF 2 For instruction: store double %v7, ptr %out7, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v0, ptr %out0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 4 For instruction: store double %v7, ptr %out7, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v0, ptr %out0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 112 for VF 8 For instruction: store double %v7, ptr %out7, align 8
-;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v0, ptr %out0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 23 for VF 2 For instruction: store double %v7, ptr %out7, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v0, ptr %out0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 46 for VF 4 For instruction: store double %v7, ptr %out7, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v0, ptr %out0, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v7, ptr %out7, align 8
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out0>, ir<%v0>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out1>, ir<%v1>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out2>, ir<%v2>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out3>, ir<%v3>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out4>, ir<%v4>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out5>, ir<%v5>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out6>, ir<%v6>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out7>, ir<%v7>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out0>, ir<%v0>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out1>, ir<%v1>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out2>, ir<%v2>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out3>, ir<%v3>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out4>, ir<%v4>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out5>, ir<%v5>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out6>, ir<%v6>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out7>, ir<%v7>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out0>, ir<%v0>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out1>, ir<%v1>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out2>, ir<%v2>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out3>, ir<%v3>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out4>, ir<%v4>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out5>, ir<%v5>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out6>, ir<%v6>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out7>, ir<%v7>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out0>, ir<%v0>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out1>, ir<%v1>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out2>, ir<%v2>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out3>, ir<%v3>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out4>, ir<%v4>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out5>, ir<%v5>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out6>, ir<%v6>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out7>, ir<%v7>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-2.ll
index 5a48776cdad2f..ff1e2e9d786e1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-2.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v1, ptr %out1"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,50 +14,92 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
-; SSE2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2
-; SSE2:  LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2
-; SSE2:  LV: Found an estimated cost of 34 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2
-; SSE2:  LV: Found an estimated cost of 68 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2
+; SSE2:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 34 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 68 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX1:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX1:  LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX1:  LV: Found an estimated cost of 34 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX1:  LV: Found an estimated cost of 70 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX1:  LV: Found an estimated cost of 140 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2
+; AVX1:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 34 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 140 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX2:  LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX2:  LV: Found an estimated cost of 6 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX2:  LV: Found an estimated cost of 12 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2
+; AVX2:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 12 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
 ;
 ; AVX512DQ-LABEL: 'test'
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 4 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 5 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 10 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX512DQ:  LV: Found an estimated cost of 284 for VF 64 For instruction: store i16 %v1, ptr %out1, align 2
+; AVX512DQ:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:  Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:  Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:  Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:  Cost of 10 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:  Cost of 284 for VF 64: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
 ;
 ; AVX512BW-LABEL: 'test'
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX512BW:  LV: Found an estimated cost of 3 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX512BW:  LV: Found an estimated cost of 3 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX512BW:  LV: Found an estimated cost of 3 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX512BW:  LV: Found an estimated cost of 3 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX512BW:  LV: Found an estimated cost of 7 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2
-; AVX512BW:  LV: Found an estimated cost of 14 for VF 64 For instruction: store i16 %v1, ptr %out1, align 2
+; AVX512BW:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:  Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:  Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:  Cost of 7 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:  Cost of 14 for VF 64: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-3.ll
index 2cfb488d5007f..9d7bab77ae8f2 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-3.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-3.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v2, ptr %out2"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,50 +14,118 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
-; SSE2:  LV: Found an estimated cost of 16 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2
-; SSE2:  LV: Found an estimated cost of 26 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2
-; SSE2:  LV: Found an estimated cost of 51 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2
-; SSE2:  LV: Found an estimated cost of 102 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2
+; SSE2:  Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 26 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 51 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 102 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX1:  LV: Found an estimated cost of 15 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX1:  LV: Found an estimated cost of 29 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX1:  LV: Found an estimated cost of 52 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX1:  LV: Found an estimated cost of 105 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX1:  LV: Found an estimated cost of 210 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2
+; AVX1:  Cost of 15 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 29 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 52 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 105 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 210 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX2:  LV: Found an estimated cost of 7 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2
+; AVX2:  Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 30 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 60 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
 ;
 ; AVX512DQ-LABEL: 'test'
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 7 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 9 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 15 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 29 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 57 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX512DQ:  LV: Found an estimated cost of 426 for VF 64 For instruction: store i16 %v2, ptr %out2, align 2
+; AVX512DQ:  Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:  Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:  Cost of 15 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:  Cost of 29 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:  Cost of 57 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:  Cost of 426 for VF 64: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
 ;
 ; AVX512BW-LABEL: 'test'
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX512BW:  LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX512BW:  LV: Found an estimated cost of 6 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX512BW:  LV: Found an estimated cost of 6 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX512BW:  LV: Found an estimated cost of 12 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX512BW:  LV: Found an estimated cost of 18 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2
-; AVX512BW:  LV: Found an estimated cost of 36 for VF 64 For instruction: store i16 %v2, ptr %out2, align 2
+; AVX512BW:  Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:  Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:  Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:  Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:  Cost of 18 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:  Cost of 36 for VF 64: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-4.ll
index 2e4594fac2361..323f0ea8b3007 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-4.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-4.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v3, ptr %out3"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,50 +14,144 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
-; SSE2:  LV: Found an estimated cost of 17 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2
-; SSE2:  LV: Found an estimated cost of 34 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2
-; SSE2:  LV: Found an estimated cost of 68 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2
-; SSE2:  LV: Found an estimated cost of 136 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2
+; SSE2:  Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 34 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 136 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX1:  LV: Found an estimated cost of 17 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX1:  LV: Found an estimated cost of 34 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX1:  LV: Found an estimated cost of 68 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX1:  LV: Found an estimated cost of 140 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX1:  LV: Found an estimated cost of 280 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2
+; AVX1:  Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 34 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 140 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 280 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX2:  LV: Found an estimated cost of 12 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX2:  LV: Found an estimated cost of 72 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2
+; AVX2:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 36 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 72 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
 ;
 ; AVX512DQ-LABEL: 'test'
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 3 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 7 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 11 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 34 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 68 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX512DQ:  LV: Found an estimated cost of 568 for VF 64 For instruction: store i16 %v3, ptr %out3, align 2
+; AVX512DQ:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:  Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:  Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:  Cost of 34 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:  Cost of 68 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:  Cost of 568 for VF 64: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
 ;
 ; AVX512BW-LABEL: 'test'
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX512BW:  LV: Found an estimated cost of 8 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX512BW:  LV: Found an estimated cost of 8 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX512BW:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX512BW:  LV: Found an estimated cost of 17 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX512BW:  LV: Found an estimated cost of 34 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2
-; AVX512BW:  LV: Found an estimated cost of 68 for VF 64 For instruction: store i16 %v3, ptr %out3, align 2
+; AVX512BW:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:  Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:  Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:  Cost of 17 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:  Cost of 34 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:  Cost of 68 for VF 64: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-5.ll
index f536f4438649d..1ca528a9ab95f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-5.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-5.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v4, ptr %out4"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,50 +14,170 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
-; SSE2:  LV: Found an estimated cost of 22 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2
-; SSE2:  LV: Found an estimated cost of 43 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2
-; SSE2:  LV: Found an estimated cost of 85 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2
-; SSE2:  LV: Found an estimated cost of 170 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2
+; SSE2:  Cost of 22 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 43 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 85 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 170 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX1:  LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX1:  LV: Found an estimated cost of 44 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX1:  LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX1:  LV: Found an estimated cost of 175 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX1:  LV: Found an estimated cost of 350 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2
+; AVX1:  Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 86 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 175 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 350 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX2:  LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX2:  LV: Found an estimated cost of 44 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX2:  LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX2:  LV: Found an estimated cost of 175 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX2:  LV: Found an estimated cost of 350 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2
+; AVX2:  Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 86 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 175 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 350 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
 ;
 ; AVX512DQ-LABEL: 'test'
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 47 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 176 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 355 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX512DQ:  LV: Found an estimated cost of 710 for VF 64 For instruction: store i16 %v4, ptr %out4, align 2
+; AVX512DQ:  Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:  Cost of 47 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:  Cost of 86 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:  Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:  Cost of 355 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:  Cost of 710 for VF 64: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
 ;
 ; AVX512BW-LABEL: 'test'
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX512BW:  LV: Found an estimated cost of 11 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX512BW:  LV: Found an estimated cost of 11 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX512BW:  LV: Found an estimated cost of 22 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX512BW:  LV: Found an estimated cost of 33 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX512BW:  LV: Found an estimated cost of 55 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2
-; AVX512BW:  LV: Found an estimated cost of 110 for VF 64 For instruction: store i16 %v4, ptr %out4, align 2
+; AVX512BW:  Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:  Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:  Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:  Cost of 33 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:  Cost of 55 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:  Cost of 110 for VF 64: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-6.ll
index 1b7522d01ae2e..04feb5c3e0252 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-6.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-6.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v5, ptr %out5"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,50 +14,196 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
-; SSE2:  LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2
-; SSE2:  LV: Found an estimated cost of 51 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2
-; SSE2:  LV: Found an estimated cost of 102 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2
-; SSE2:  LV: Found an estimated cost of 204 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2
+; SSE2:  Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 51 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 102 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 204 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX1:  LV: Found an estimated cost of 29 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX1:  LV: Found an estimated cost of 52 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX1:  LV: Found an estimated cost of 102 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX1:  LV: Found an estimated cost of 210 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX1:  LV: Found an estimated cost of 420 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2
+; AVX1:  Cost of 29 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 52 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 102 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 210 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 420 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX2:  LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX2:  LV: Found an estimated cost of 17 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX2:  LV: Found an estimated cost of 24 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX2:  LV: Found an estimated cost of 64 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX2:  LV: Found an estimated cost of 102 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2
+; AVX2:  Cost of 13 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 24 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 102 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
 ;
 ; AVX512DQ-LABEL: 'test'
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 18 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 23 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 61 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 96 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX512DQ:  LV: Found an estimated cost of 852 for VF 64 For instruction: store i16 %v5, ptr %out5, align 2
+; AVX512DQ:  Cost of 13 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:  Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:  Cost of 23 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:  Cost of 61 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:  Cost of 96 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:  Cost of 852 for VF 64: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
 ;
 ; AVX512BW-LABEL: 'test'
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX512BW:  LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX512BW:  LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX512BW:  LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX512BW:  LV: Found an estimated cost of 40 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX512BW:  LV: Found an estimated cost of 81 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2
-; AVX512BW:  LV: Found an estimated cost of 162 for VF 64 For instruction: store i16 %v5, ptr %out5, align 2
+; AVX512BW:  Cost of 13 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:  Cost of 13 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:  Cost of 27 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:  Cost of 40 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:  Cost of 81 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:  Cost of 162 for VF 64: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-7.ll
index f6ed77887c802..e6dd98c0a5326 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-7.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-7.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v6, ptr %out6"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,50 +14,222 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
-; SSE2:  LV: Found an estimated cost of 33 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2
-; SSE2:  LV: Found an estimated cost of 60 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2
-; SSE2:  LV: Found an estimated cost of 119 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2
-; SSE2:  LV: Found an estimated cost of 238 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2
+; SSE2:  Cost of 33 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 60 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 119 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 238 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX1:  LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX1:  LV: Found an estimated cost of 63 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX1:  LV: Found an estimated cost of 120 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX1:  LV: Found an estimated cost of 245 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX1:  LV: Found an estimated cost of 490 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2
+; AVX1:  Cost of 35 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 63 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 245 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 490 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX2:  LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX2:  LV: Found an estimated cost of 63 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX2:  LV: Found an estimated cost of 120 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX2:  LV: Found an estimated cost of 245 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX2:  LV: Found an estimated cost of 490 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2
+; AVX2:  Cost of 35 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 63 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 245 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 490 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
 ;
 ; AVX512DQ-LABEL: 'test'
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 65 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 122 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 246 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 497 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX512DQ:  LV: Found an estimated cost of 994 for VF 64 For instruction: store i16 %v6, ptr %out6, align 2
+; AVX512DQ:  Cost of 35 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:  Cost of 65 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:  Cost of 122 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:  Cost of 246 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:  Cost of 497 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:  Cost of 994 for VF 64: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
 ;
 ; AVX512BW-LABEL: 'test'
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX512BW:  LV: Found an estimated cost of 16 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX512BW:  LV: Found an estimated cost of 16 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX512BW:  LV: Found an estimated cost of 32 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX512BW:  LV: Found an estimated cost of 64 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX512BW:  LV: Found an estimated cost of 112 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2
-; AVX512BW:  LV: Found an estimated cost of 224 for VF 64 For instruction: store i16 %v6, ptr %out6, align 2
+; AVX512BW:  Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:  Cost of 16 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:  Cost of 32 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:  Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:  Cost of 112 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:  Cost of 224 for VF 64: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-8.ll
index cc82d48fadb2c..e1e267b2595c8 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-8.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v7, ptr %out7"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,50 +14,248 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
-; SSE2:  LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2
-; SSE2:  LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2
-; SSE2:  LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2
-; SSE2:  LV: Found an estimated cost of 272 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2
+; SSE2:  Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
+; SSE2:  Cost of 68 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
+; SSE2:  Cost of 136 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
+; SSE2:  Cost of 272 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX1:  LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX1:  LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX1:  LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX1:  LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX1:  LV: Found an estimated cost of 560 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2
+; AVX1:  Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 68 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 136 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 280 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 560 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX2:  LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX2:  LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX2:  LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX2:  LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX2:  LV: Found an estimated cost of 560 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2
+; AVX2:  Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 68 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 136 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 280 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 560 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
 ;
 ; AVX512DQ-LABEL: 'test'
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX512DQ:  LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX512DQ:  LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX512DQ:  LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX512DQ:  LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX512DQ:  LV: Found an estimated cost of 568 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX512DQ:  LV: Found an estimated cost of 1136 for VF 64 For instruction: store i16 %v7, ptr %out7, align 2
+; AVX512DQ:  Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:    store ir<%v7> to index 7
+; AVX512DQ:  Cost of 68 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:    store ir<%v7> to index 7
+; AVX512DQ:  Cost of 136 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:    store ir<%v7> to index 7
+; AVX512DQ:  Cost of 280 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:    store ir<%v7> to index 7
+; AVX512DQ:  Cost of 568 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:    store ir<%v7> to index 7
+; AVX512DQ:  Cost of 1136 for VF 64: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:    store ir<%v7> to index 7
 ;
 ; AVX512BW-LABEL: 'test'
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX512BW:  LV: Found an estimated cost of 18 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX512BW:  LV: Found an estimated cost of 18 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX512BW:  LV: Found an estimated cost of 37 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX512BW:  LV: Found an estimated cost of 74 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX512BW:  LV: Found an estimated cost of 148 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2
-; AVX512BW:  LV: Found an estimated cost of 296 for VF 64 For instruction: store i16 %v7, ptr %out7, align 2
+; AVX512BW:  Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    store ir<%v7> to index 7
+; AVX512BW:  Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    store ir<%v7> to index 7
+; AVX512BW:  Cost of 37 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    store ir<%v7> to index 7
+; AVX512BW:  Cost of 74 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    store ir<%v7> to index 7
+; AVX512BW:  Cost of 148 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    store ir<%v7> to index 7
+; AVX512BW:  Cost of 296 for VF 64: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    store ir<%v7> to index 7
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-2.ll
index 0a33a96bb6ba1..95523a3dce3fc 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-2.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v1, ptr %out1"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,40 +13,72 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
-; SSE2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, ptr %out1, align 4
-; SSE2:  LV: Found an estimated cost of 30 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4
-; SSE2:  LV: Found an estimated cost of 60 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4
-; SSE2:  LV: Found an estimated cost of 120 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4
+; SSE2:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 30 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 60 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 120 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX1:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX1:  LV: Found an estimated cost of 18 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX1:  LV: Found an estimated cost of 38 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX1:  LV: Found an estimated cost of 76 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX1:  LV: Found an estimated cost of 152 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4
+; AVX1:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 38 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 76 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 152 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX2:  LV: Found an estimated cost of 3 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX2:  LV: Found an estimated cost of 6 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX2:  LV: Found an estimated cost of 12 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX2:  LV: Found an estimated cost of 24 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4
+; AVX2:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX512:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX512:  LV: Found an estimated cost of 2 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX512:  LV: Found an estimated cost of 2 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX512:  LV: Found an estimated cost of 5 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX512:  LV: Found an estimated cost of 10 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4
-; AVX512:  LV: Found an estimated cost of 20 for VF 64 For instruction: store i32 %v1, ptr %out1, align 4
+; AVX512:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 10 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 20 for VF 64: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-3.ll
index ad8eca60b43bd..07fe59c0f36c3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-3.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-3.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v2, ptr %out2"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,40 +13,92 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
-; SSE2:  LV: Found an estimated cost of 23 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4
-; SSE2:  LV: Found an estimated cost of 48 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4
-; SSE2:  LV: Found an estimated cost of 96 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4
-; SSE2:  LV: Found an estimated cost of 192 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4
+; SSE2:  Cost of 23 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 48 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 96 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 192 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX1:  LV: Found an estimated cost of 28 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX1:  LV: Found an estimated cost of 57 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX1:  LV: Found an estimated cost of 114 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX1:  LV: Found an estimated cost of 228 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4
+; AVX1:  Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 57 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 114 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 228 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX2:  LV: Found an estimated cost of 14 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX2:  LV: Found an estimated cost of 28 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX2:  LV: Found an estimated cost of 60 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4
+; AVX2:  Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 28 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 60 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX512:  LV: Found an estimated cost of 4 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX512:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX512:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX512:  LV: Found an estimated cost of 12 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX512:  LV: Found an estimated cost of 24 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4
-; AVX512:  LV: Found an estimated cost of 48 for VF 64 For instruction: store i32 %v2, ptr %out2, align 4
+; AVX512:  Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 48 for VF 64: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-4.ll
index 6c86f56a2da3c..0949b11043e22 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-4.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-4.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v3, ptr %out3"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,40 +13,112 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
-; SSE2:  LV: Found an estimated cost of 28 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4
-; SSE2:  LV: Found an estimated cost of 60 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4
-; SSE2:  LV: Found an estimated cost of 120 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4
-; SSE2:  LV: Found an estimated cost of 240 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4
+; SSE2:  Cost of 28 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 60 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 240 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX1:  LV: Found an estimated cost of 18 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX1:  LV: Found an estimated cost of 36 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX1:  LV: Found an estimated cost of 76 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX1:  LV: Found an estimated cost of 152 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX1:  LV: Found an estimated cost of 304 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4
+; AVX1:  Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 76 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 152 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 304 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX2:  LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX2:  LV: Found an estimated cost of 20 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX2:  LV: Found an estimated cost of 40 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX2:  LV: Found an estimated cost of 80 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4
+; AVX2:  Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 20 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 40 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 80 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX512:  LV: Found an estimated cost of 5 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX512:  LV: Found an estimated cost of 5 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX512:  LV: Found an estimated cost of 11 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX512:  LV: Found an estimated cost of 22 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX512:  LV: Found an estimated cost of 44 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4
-; AVX512:  LV: Found an estimated cost of 88 for VF 64 For instruction: store i32 %v3, ptr %out3, align 4
+; AVX512:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 44 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 88 for VF 64: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-5.ll
index f4fbbec3a46f5..f2a50db468e5b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-5.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-5.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v4, ptr %out4"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,37 +13,132 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
-; SSE2:  LV: Found an estimated cost of 40 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4
-; SSE2:  LV: Found an estimated cost of 84 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4
-; SSE2:  LV: Found an estimated cost of 168 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4
+; SSE2:  Cost of 40 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 84 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 168 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 336 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX1:  LV: Found an estimated cost of 24 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX1:  LV: Found an estimated cost of 46 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX1:  LV: Found an estimated cost of 95 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX1:  LV: Found an estimated cost of 190 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4
+; AVX1:  Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 95 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 190 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 380 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX2:  LV: Found an estimated cost of 24 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX2:  LV: Found an estimated cost of 46 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX2:  LV: Found an estimated cost of 95 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX2:  LV: Found an estimated cost of 190 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4
+; AVX2:  Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 95 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 190 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 380 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX512:  LV: Found an estimated cost of 7 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX512:  LV: Found an estimated cost of 14 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX512:  LV: Found an estimated cost of 21 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX512:  LV: Found an estimated cost of 35 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX512:  LV: Found an estimated cost of 70 for VF 32 For instruction: store i32 %v4, ptr %out4, align 4
-; AVX512:  LV: Found an estimated cost of 140 for VF 64 For instruction: store i32 %v4, ptr %out4, align 4
+; AVX512:  Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 21 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 35 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 70 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 140 for VF 64: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-6.ll
index 4f35f667276d8..36b06e06a26b6 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-6.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-6.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v5, ptr %out5"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,37 +13,152 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
-; SSE2:  LV: Found an estimated cost of 45 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4
-; SSE2:  LV: Found an estimated cost of 96 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4
-; SSE2:  LV: Found an estimated cost of 192 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4
+; SSE2:  Cost of 45 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 96 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 192 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 384 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX1:  LV: Found an estimated cost of 28 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX1:  LV: Found an estimated cost of 54 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX1:  LV: Found an estimated cost of 114 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX1:  LV: Found an estimated cost of 228 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4
+; AVX1:  Cost of 28 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 54 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 114 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 228 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 456 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX2:  LV: Found an estimated cost of 11 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX2:  LV: Found an estimated cost of 15 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX2:  LV: Found an estimated cost of 39 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX2:  LV: Found an estimated cost of 78 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4
+; AVX2:  Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 15 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 39 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 78 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 456 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX512:  LV: Found an estimated cost of 8 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX512:  LV: Found an estimated cost of 17 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX512:  LV: Found an estimated cost of 25 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX512:  LV: Found an estimated cost of 51 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX512:  LV: Found an estimated cost of 102 for VF 32 For instruction: store i32 %v5, ptr %out5, align 4
-; AVX512:  LV: Found an estimated cost of 204 for VF 64 For instruction: store i32 %v5, ptr %out5, align 4
+; AVX512:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 25 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 102 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 204 for VF 64: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-7.ll
index 9a4f580a7a2ca..49aa508224965 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-7.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-7.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v6, ptr %out6"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,36 +13,172 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
-; SSE2:  LV: Found an estimated cost of 51 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4
-; SSE2:  LV: Found an estimated cost of 108 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4
-; SSE2:  LV: Found an estimated cost of 216 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4
+; SSE2:  Cost of 51 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 108 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 216 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 432 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
-; AVX1:  LV: Found an estimated cost of 35 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4
-; AVX1:  LV: Found an estimated cost of 64 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4
-; AVX1:  LV: Found an estimated cost of 133 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4
-; AVX1:  LV: Found an estimated cost of 266 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4
+; AVX1:  Cost of 35 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 64 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 133 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 266 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 532 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
-; AVX2:  LV: Found an estimated cost of 35 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4
-; AVX2:  LV: Found an estimated cost of 64 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4
-; AVX2:  LV: Found an estimated cost of 133 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4
-; AVX2:  LV: Found an estimated cost of 266 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4
+; AVX2:  Cost of 35 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 64 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 133 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 266 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 532 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
-; AVX512:  LV: Found an estimated cost of 10 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4
-; AVX512:  LV: Found an estimated cost of 20 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4
-; AVX512:  LV: Found an estimated cost of 40 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4
-; AVX512:  LV: Found an estimated cost of 70 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4
-; AVX512:  LV: Found an estimated cost of 140 for VF 32 For instruction: store i32 %v6, ptr %out6, align 4
+; AVX512:  Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 140 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 280 for VF 64: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-8.ll
index 2c4ca9993f53d..55cad6f0afd75 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-8.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v7, ptr %out7"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,36 +13,192 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
-; SSE2:  LV: Found an estimated cost of 56 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4
-; SSE2:  LV: Found an estimated cost of 120 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4
-; SSE2:  LV: Found an estimated cost of 240 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4
+; SSE2:  Cost of 56 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
+; SSE2:  Cost of 120 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
+; SSE2:  Cost of 240 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
+; SSE2:  Cost of 480 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
-; AVX1:  LV: Found an estimated cost of 36 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4
-; AVX1:  LV: Found an estimated cost of 72 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4
-; AVX1:  LV: Found an estimated cost of 152 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4
-; AVX1:  LV: Found an estimated cost of 304 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4
+; AVX1:  Cost of 36 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 72 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 152 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 304 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 608 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
-; AVX2:  LV: Found an estimated cost of 36 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4
-; AVX2:  LV: Found an estimated cost of 72 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4
-; AVX2:  LV: Found an estimated cost of 152 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4
-; AVX2:  LV: Found an estimated cost of 304 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4
+; AVX2:  Cost of 36 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 72 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 152 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 304 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 608 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
-; AVX512:  LV: Found an estimated cost of 11 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4
-; AVX512:  LV: Found an estimated cost of 23 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4
-; AVX512:  LV: Found an estimated cost of 46 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4
-; AVX512:  LV: Found an estimated cost of 92 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4
-; AVX512:  LV: Found an estimated cost of 184 for VF 32 For instruction: store i32 %v7, ptr %out7, align 4
+; AVX512:  Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:    store ir<%v7> to index 7
+; AVX512:  Cost of 23 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:    store ir<%v7> to index 7
+; AVX512:  Cost of 46 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:    store ir<%v7> to index 7
+; AVX512:  Cost of 92 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:    store ir<%v7> to index 7
+; AVX512:  Cost of 184 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:    store ir<%v7> to index 7
+; AVX512:  Cost of 368 for VF 64: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:    store ir<%v7> to index 7
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-2.ll
index 6fbc678408f6b..9610349875d56 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-2.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v1, ptr %out1"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,40 +13,72 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 14 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 28 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 56 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 112 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8
+; SSE2:  Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 10 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 22 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 44 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 88 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 176 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8
+; AVX1:  Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 22 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 44 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 88 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 176 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 3 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 6 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 12 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 24 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 48 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8
+; AVX2:  Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:  Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 2 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 5 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 64 For instruction: store i64 %v1, ptr %out1, align 8
+; AVX512:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 10 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 20 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:  Cost of 40 for VF 64: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-3.ll
index fe1dad3c3effc..2b1d991b2a9ec 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-3.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-3.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v2, ptr %out2"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,37 +13,92 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
-; SSE2:  LV: Found an estimated cost of 22 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
-; SSE2:  LV: Found an estimated cost of 44 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
-; SSE2:  LV: Found an estimated cost of 88 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
+; SSE2:  Cost of 22 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 16 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 33 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 66 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 132 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8
+; AVX1:  Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 33 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 66 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 264 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 6 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8
+; AVX2:  Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 18 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 36 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:  Cost of 264 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 4 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 8 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 12 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 24 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 48 for VF 32 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 96 for VF 64 For instruction: store i64 %v2, ptr %out2, align 8
+; AVX512:  Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:  Cost of 96 for VF 64: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-4.ll
index 4ebb2283f9b7f..953f7a94396b4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-4.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-4.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v3, ptr %out3"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,36 +13,112 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
-; SSE2:  LV: Found an estimated cost of 28 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
-; SSE2:  LV: Found an estimated cost of 56 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
-; SSE2:  LV: Found an estimated cost of 112 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
+; SSE2:  Cost of 28 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 56 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 112 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 224 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 20 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 44 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 88 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 176 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8
+; AVX1:  Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 352 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 8 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 12 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8
+; AVX2:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:  Cost of 352 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 5 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 11 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 22 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 44 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 88 for VF 32 For instruction: store i64 %v3, ptr %out3, align 8
+; AVX512:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 44 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 88 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:  Cost of 176 for VF 64: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-5.ll
index 79c64ecfe457d..8fc4d18cc706f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-5.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-5.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v4, ptr %out4"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,33 +13,132 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
-; SSE2:  LV: Found an estimated cost of 38 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
-; SSE2:  LV: Found an estimated cost of 76 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
+; SSE2:  Cost of 38 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 76 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 152 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 304 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 55 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 110 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
+; AVX1:  Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 55 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 110 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 220 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 440 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 55 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 110 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
+; AVX2:  Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 55 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 110 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 220 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 440 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 14 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 21 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 35 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 70 for VF 16 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 140 for VF 32 For instruction: store i64 %v4, ptr %out4, align 8
+; AVX512:  Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 140 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:  Cost of 280 for VF 64: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-6.ll
index 05909fa7a1fb9..4647cbab6b60d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-6.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-6.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v5, ptr %out5"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,33 +13,152 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
-; SSE2:  LV: Found an estimated cost of 44 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
-; SSE2:  LV: Found an estimated cost of 88 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
+; SSE2:  Cost of 44 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 88 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 176 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 352 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 30 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 66 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 132 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8
+; AVX1:  Cost of 30 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 528 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 11 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 21 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 42 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8
+; AVX2:  Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 42 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:  Cost of 528 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 17 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 25 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 51 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 102 for VF 16 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 204 for VF 32 For instruction: store i64 %v5, ptr %out5, align 8
+; AVX512:  Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 25 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 51 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 102 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 204 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:  Cost of 408 for VF 64: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-7.ll
index 881c7867614b7..b0a8727383234 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-7.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-7.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v6, ptr %out6"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,33 +13,172 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
-; SSE2:  LV: Found an estimated cost of 50 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
-; SSE2:  LV: Found an estimated cost of 100 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
+; SSE2:  Cost of 50 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 100 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 200 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 400 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 36 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 77 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 154 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
+; AVX1:  Cost of 36 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 77 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 154 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 308 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 616 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 36 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 77 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 154 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
+; AVX2:  Cost of 36 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 77 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 154 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 308 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 616 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 70 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 140 for VF 16 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 280 for VF 32 For instruction: store i64 %v6, ptr %out6, align 8
+; AVX512:  Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 70 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 140 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 280 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:  Cost of 560 for VF 64: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-8.ll
index 572f90b41debc..4c6710e763c9e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-8.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v., ptr %out."
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i64 %v\., ptr %out" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at <badref>,|WIDEN store|REPLICATE store ir<%v\.>)" --filter "^  store ir<%v.?> to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -13,164 +13,188 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v0, ptr %out0, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
-; SSE2:  LV: Found an estimated cost of 56 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v0, ptr %out0, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
-; SSE2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
-; SSE2:  LV: Found an estimated cost of 112 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8
+; SSE2:  Cost of 56 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
+; SSE2:  Cost of 112 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
+; SSE2:  Cost of 224 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
+; SSE2:  Cost of 448 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 40 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 88 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX1:  LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX1:  LV: Found an estimated cost of 176 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8
+; AVX1:  Cost of 40 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 88 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 176 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 352 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 704 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 40 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 88 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX2:  LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX2:  LV: Found an estimated cost of 176 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8
+; AVX2:  Cost of 40 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 88 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 176 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 352 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 704 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
 ;
 ; AVX512-LABEL: 'test'
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 23 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 46 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8
+; AVX512:  Cost of 23 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:    store ir<%v7> to index 7
+; AVX512:  Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512:    store ir<%v> to index 0
+; AVX512:    store ir<%v1> to index 1
+; AVX512:    store ir<%v2> to index 2
+; AVX512:    store ir<%v3> to index 3
+; AVX512:    store ir<%v4> to index 4
+; AVX512:    store ir<%v5> to index 5
+; AVX512:    store ir<%v6> to index 6
+; AVX512:    store ir<%v7> to index 7
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out0>, ir<%v>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out1>, ir<%v1>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out2>, ir<%v2>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out3>, ir<%v3>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out4>, ir<%v4>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out5>, ir<%v5>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out6>, ir<%v6>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out7>, ir<%v7>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out0>, ir<%v>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out1>, ir<%v1>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out2>, ir<%v2>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out3>, ir<%v3>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out4>, ir<%v4>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out5>, ir<%v5>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out6>, ir<%v6>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out7>, ir<%v7>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out0>, ir<%v>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out1>, ir<%v1>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out2>, ir<%v2>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out3>, ir<%v3>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out4>, ir<%v4>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out5>, ir<%v5>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out6>, ir<%v6>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out7>, ir<%v7>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out0>, ir<%v>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out1>, ir<%v1>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out2>, ir<%v2>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out3>, ir<%v3>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out4>, ir<%v4>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out5>, ir<%v5>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out6>, ir<%v6>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out7>, ir<%v7>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-2.ll
index 87b5f4d3858dc..91c82cc8fd5b7 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-2.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v1, ptr %out1"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,50 +14,106 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
-; SSE2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1
-; SSE2:  LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1
-; SSE2:  LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1
-; SSE2:  LV: Found an estimated cost of 126 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1
+; SSE2:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:  Cost of 126 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX1:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX1:  LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX1:  LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX1:  LV: Found an estimated cost of 66 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX1:  LV: Found an estimated cost of 134 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1
+; AVX1:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 66 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:  Cost of 134 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX2:  LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX2:  LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX2:  LV: Found an estimated cost of 6 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1
+; AVX2:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    ir<%v> = load from index 0
+; AVX2:  Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    ir<%v> = load from index 0
+; AVX2:  Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    ir<%v> = load from index 0
+; AVX2:  Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    ir<%v> = load from index 0
+; AVX2:  Cost of 6 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
 ;
 ; AVX512DQ-LABEL: 'test'
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 4 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 5 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX512DQ:  LV: Found an estimated cost of 270 for VF 64 For instruction: store i8 %v1, ptr %out1, align 1
+; AVX512DQ:  Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 5 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 270 for VF 64: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
 ;
 ; AVX512BW-LABEL: 'test'
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX512BW:  LV: Found an estimated cost of 4 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX512BW:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX512BW:  LV: Found an estimated cost of 4 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX512BW:  LV: Found an estimated cost of 8 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX512BW:  LV: Found an estimated cost of 20 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1
-; AVX512BW:  LV: Found an estimated cost of 41 for VF 64 For instruction: store i8 %v1, ptr %out1, align 1
+; AVX512BW:  Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 20 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 41 for VF 64: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-3.ll
index 9a7503b63f9ff..779134134fe39 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-3.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-3.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v2, ptr %out2"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,50 +14,132 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
-; SSE2:  LV: Found an estimated cost of 26 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
-; SSE2:  LV: Found an estimated cost of 52 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1
-; SSE2:  LV: Found an estimated cost of 101 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
-; SSE2:  LV: Found an estimated cost of 204 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1
+; SSE2:  Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 52 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 101 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:  Cost of 204 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX1:  LV: Found an estimated cost of 27 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX1:  LV: Found an estimated cost of 53 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX1:  LV: Found an estimated cost of 100 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX1:  LV: Found an estimated cost of 201 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1
+; AVX1:  Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 27 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 53 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 100 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:  Cost of 201 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX2:  LV: Found an estimated cost of 7 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX2:  LV: Found an estimated cost of 9 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX2:  LV: Found an estimated cost of 13 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1
+; AVX2:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    ir<%v> = load from index 0
+; AVX2:  Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    ir<%v> = load from index 0
+; AVX2:  Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    ir<%v> = load from index 0
+; AVX2:  Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    ir<%v> = load from index 0
+; AVX2:  Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
 ;
 ; AVX512DQ-LABEL: 'test'
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 7 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 9 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 14 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 15 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX512DQ:  LV: Found an estimated cost of 405 for VF 64 For instruction: store i8 %v2, ptr %out2, align 1
+; AVX512DQ:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 14 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 15 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 405 for VF 64: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
 ;
 ; AVX512BW-LABEL: 'test'
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX512BW:  LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX512BW:  LV: Found an estimated cost of 8 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX512BW:  LV: Found an estimated cost of 16 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX512BW:  LV: Found an estimated cost of 13 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX512BW:  LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1
-; AVX512BW:  LV: Found an estimated cost of 29 for VF 64 For instruction: store i8 %v2, ptr %out2, align 1
+; AVX512BW:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 16 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 29 for VF 64: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-4.ll
index e072d707e23e5..38b86e812f2dc 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-4.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-4.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v3, ptr %out3"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,50 +14,158 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
-; SSE2:  LV: Found an estimated cost of 28 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1
-; SSE2:  LV: Found an estimated cost of 60 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1
-; SSE2:  LV: Found an estimated cost of 124 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1
-; SSE2:  LV: Found an estimated cost of 252 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1
+; SSE2:  Cost of 28 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 60 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 124 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:  Cost of 252 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX1:  LV: Found an estimated cost of 17 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX1:  LV: Found an estimated cost of 33 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX1:  LV: Found an estimated cost of 66 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX1:  LV: Found an estimated cost of 132 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX1:  LV: Found an estimated cost of 268 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1
+; AVX1:  Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 33 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 66 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:  Cost of 268 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX2:  LV: Found an estimated cost of 5 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX2:  LV: Found an estimated cost of 5 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX2:  LV: Found an estimated cost of 5 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX2:  LV: Found an estimated cost of 10 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1
+; AVX2:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    ir<%v> = load from index 0
+; AVX2:  Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    ir<%v> = load from index 0
+; AVX2:  Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    ir<%v> = load from index 0
+; AVX2:  Cost of 10 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    ir<%v> = load from index 0
+; AVX2:  Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
 ;
 ; AVX512DQ-LABEL: 'test'
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 5 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 5 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 5 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 9 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 14 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX512DQ:  LV: Found an estimated cost of 540 for VF 64 For instruction: store i8 %v3, ptr %out3, align 1
+; AVX512DQ:  Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 14 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 540 for VF 64: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
 ;
 ; AVX512BW-LABEL: 'test'
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX512BW:  LV: Found an estimated cost of 11 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX512BW:  LV: Found an estimated cost of 11 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX512BW:  LV: Found an estimated cost of 11 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX512BW:  LV: Found an estimated cost of 12 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX512BW:  LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1
-; AVX512BW:  LV: Found an estimated cost of 28 for VF 64 For instruction: store i8 %v3, ptr %out3, align 1
+; AVX512BW:  Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 28 for VF 64: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-5.ll
index 1c079204cc3bb..d2245df5aa9b0 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-5.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-5.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v4, ptr %out4"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,50 +14,180 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
-; SSE2:  LV: Found an estimated cost of 44 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
-; SSE2:  LV: Found an estimated cost of 87 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
-; SSE2:  LV: Found an estimated cost of 178 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
-; SSE2:  LV: Found an estimated cost of 360 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
+; SSE2:  Cost of 44 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 87 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 178 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:  Cost of 360 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX1:  LV: Found an estimated cost of 24 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX1:  LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX1:  LV: Found an estimated cost of 84 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX1:  LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX1:  LV: Found an estimated cost of 335 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1
+; AVX1:  Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 84 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 166 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:  Cost of 335 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX2:  LV: Found an estimated cost of 24 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX2:  LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX2:  LV: Found an estimated cost of 84 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX2:  LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX2:  LV: Found an estimated cost of 335 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1
+; AVX2:  Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 84 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 166 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:  Cost of 335 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
 ;
 ; AVX512DQ-LABEL: 'test'
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 24 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 87 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 336 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX512DQ:  LV: Found an estimated cost of 675 for VF 64 For instruction: store i8 %v4, ptr %out4, align 1
+; AVX512DQ:  Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 87 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 166 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 336 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 675 for VF 64: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
 ;
 ; AVX512BW-LABEL: 'test'
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX512BW:  LV: Found an estimated cost of 15 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX512BW:  LV: Found an estimated cost of 31 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX512BW:  LV: Found an estimated cost of 79 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX512BW:  LV: Found an estimated cost of 158 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX512BW:  LV: Found an estimated cost of 237 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1
-; AVX512BW:  LV: Found an estimated cost of 395 for VF 64 For instruction: store i8 %v4, ptr %out4, align 1
+; AVX512BW:  Cost of 15 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 31 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 79 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 158 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 237 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 395 for VF 64: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-6.ll
index 0b260d46b9173..8c603581aa08e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-6.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-6.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v5, ptr %out5"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,50 +14,210 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
-; SSE2:  LV: Found an estimated cost of 49 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1
-; SSE2:  LV: Found an estimated cost of 98 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1
-; SSE2:  LV: Found an estimated cost of 201 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1
-; SSE2:  LV: Found an estimated cost of 408 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1
+; SSE2:  Cost of 49 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 98 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 201 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:  Cost of 408 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX1:  LV: Found an estimated cost of 27 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX1:  LV: Found an estimated cost of 53 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX1:  LV: Found an estimated cost of 100 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX1:  LV: Found an estimated cost of 198 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX1:  LV: Found an estimated cost of 402 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1
+; AVX1:  Cost of 27 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 53 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 100 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 198 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:  Cost of 402 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX2:  LV: Found an estimated cost of 10 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX2:  LV: Found an estimated cost of 12 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX2:  LV: Found an estimated cost of 18 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX2:  LV: Found an estimated cost of 30 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX2:  LV: Found an estimated cost of 96 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1
+; AVX2:  Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    ir<%v> = load from index 0
+; AVX2:  Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    ir<%v> = load from index 0
+; AVX2:  Cost of 18 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    ir<%v> = load from index 0
+; AVX2:  Cost of 30 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    ir<%v> = load from index 0
+; AVX2:  Cost of 96 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
 ;
 ; AVX512DQ-LABEL: 'test'
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 10 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 12 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 19 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 29 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 93 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX512DQ:  LV: Found an estimated cost of 810 for VF 64 For instruction: store i8 %v5, ptr %out5, align 1
+; AVX512DQ:  Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 19 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 29 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 93 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 810 for VF 64: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
 ;
 ; AVX512BW-LABEL: 'test'
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX512BW:  LV: Found an estimated cost of 18 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX512BW:  LV: Found an estimated cost of 38 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX512BW:  LV: Found an estimated cost of 98 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX512BW:  LV: Found an estimated cost of 197 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX512BW:  LV: Found an estimated cost of 295 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1
-; AVX512BW:  LV: Found an estimated cost of 591 for VF 64 For instruction: store i8 %v5, ptr %out5, align 1
+; AVX512BW:  Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 38 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 98 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 197 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 295 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 591 for VF 64: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-7.ll
index b69559c6dae62..d10cab37a697d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-7.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-7.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v6, ptr %out6"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,50 +14,231 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
-; SSE2:  LV: Found an estimated cost of 57 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
-; SSE2:  LV: Found an estimated cost of 112 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
-; SSE2:  LV: Found an estimated cost of 225 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
-; SSE2:  LV: Found an estimated cost of 456 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
+; SSE2:  Cost of 57 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 112 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 225 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:  Cost of 456 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX1:  LV: Found an estimated cost of 34 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX1:  LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX1:  LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX1:  LV: Found an estimated cost of 232 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX1:  LV: Found an estimated cost of 469 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1
+; AVX1:  Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 63 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 119 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 232 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:  Cost of 469 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX2:  LV: Found an estimated cost of 34 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX2:  LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX2:  LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX2:  LV: Found an estimated cost of 232 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX2:  LV: Found an estimated cost of 469 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1
+; AVX2:  Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 63 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 119 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 232 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:  Cost of 469 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
 ;
 ; AVX512DQ-LABEL: 'test'
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 34 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 234 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 470 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX512DQ:  LV: Found an estimated cost of 945 for VF 64 For instruction: store i8 %v6, ptr %out6, align 1
+; AVX512DQ:  Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:  Cost of 63 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 121 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 234 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 470 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 945 for VF 64: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
 ;
 ; AVX512BW-LABEL: 'test'
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX512BW:  LV: Found an estimated cost of 22 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX512BW:  LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX512BW:  LV: Found an estimated cost of 118 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX512BW:  LV: Found an estimated cost of 236 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX512BW:  LV: Found an estimated cost of 472 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1
-; AVX512BW:  LV: Found an estimated cost of 826 for VF 64 For instruction: store i8 %v6, ptr %out6, align 1
+; AVX512BW:  Cost of 22 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 118 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 236 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 472 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 826 for VF 64: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-8.ll
index 078528b58f6f7..2f8d2ff1cc8d2 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-8.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v7, ptr %out7"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^  store ir<.* to index"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
@@ -14,50 +14,258 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @test() {
 ; SSE2-LABEL: 'test'
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
-; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
-; SSE2:  LV: Found an estimated cost of 56 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1
-; SSE2:  LV: Found an estimated cost of 120 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1
-; SSE2:  LV: Found an estimated cost of 248 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1
-; SSE2:  LV: Found an estimated cost of 504 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1
+; SSE2:  Cost of 56 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
+; SSE2:  Cost of 120 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
+; SSE2:  Cost of 248 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
+; SSE2:  Cost of 504 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; SSE2:    store ir<%v> to index 0
+; SSE2:    store ir<%v1> to index 1
+; SSE2:    store ir<%v2> to index 2
+; SSE2:    store ir<%v3> to index 3
+; SSE2:    store ir<%v4> to index 4
+; SSE2:    store ir<%v5> to index 5
+; SSE2:    store ir<%v6> to index 6
+; SSE2:    store ir<%v7> to index 7
 ;
 ; AVX1-LABEL: 'test'
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX1:  LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX1:  LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX1:  LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX1:  LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX1:  LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1
+; AVX1:  Cost of 33 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
+; AVX1:  Cost of 536 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX1:    store ir<%v> to index 0
+; AVX1:    store ir<%v1> to index 1
+; AVX1:    store ir<%v2> to index 2
+; AVX1:    store ir<%v3> to index 3
+; AVX1:    store ir<%v4> to index 4
+; AVX1:    store ir<%v5> to index 5
+; AVX1:    store ir<%v6> to index 6
+; AVX1:    store ir<%v7> to index 7
 ;
 ; AVX2-LABEL: 'test'
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX2:  LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX2:  LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX2:  LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX2:  LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX2:  LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1
+; AVX2:  Cost of 33 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
+; AVX2:  Cost of 536 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX2:    store ir<%v> to index 0
+; AVX2:    store ir<%v1> to index 1
+; AVX2:    store ir<%v2> to index 2
+; AVX2:    store ir<%v3> to index 3
+; AVX2:    store ir<%v4> to index 4
+; AVX2:    store ir<%v5> to index 5
+; AVX2:    store ir<%v6> to index 6
+; AVX2:    store ir<%v7> to index 7
 ;
 ; AVX512DQ-LABEL: 'test'
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX512DQ:  LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX512DQ:  LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX512DQ:  LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX512DQ:  LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX512DQ:  LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX512DQ:  LV: Found an estimated cost of 1080 for VF 64 For instruction: store i8 %v7, ptr %out7, align 1
+; AVX512DQ:  Cost of 33 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:    store ir<%v7> to index 7
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:    store ir<%v7> to index 7
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:    store ir<%v7> to index 7
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:    store ir<%v7> to index 7
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 536 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:    store ir<%v7> to index 7
+; AVX512DQ:    ir<%v> = load from index 0
+; AVX512DQ:  Cost of 1080 for VF 64: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512DQ:    store ir<%v> to index 0
+; AVX512DQ:    store ir<%v1> to index 1
+; AVX512DQ:    store ir<%v2> to index 2
+; AVX512DQ:    store ir<%v3> to index 3
+; AVX512DQ:    store ir<%v4> to index 4
+; AVX512DQ:    store ir<%v5> to index 5
+; AVX512DQ:    store ir<%v6> to index 6
+; AVX512DQ:    store ir<%v7> to index 7
 ;
 ; AVX512BW-LABEL: 'test'
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX512BW:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX512BW:  LV: Found an estimated cost of 25 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX512BW:  LV: Found an estimated cost of 53 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX512BW:  LV: Found an estimated cost of 137 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX512BW:  LV: Found an estimated cost of 275 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX512BW:  LV: Found an estimated cost of 550 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1
-; AVX512BW:  LV: Found an estimated cost of 1100 for VF 64 For instruction: store i8 %v7, ptr %out7, align 1
+; AVX512BW:  Cost of 25 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    store ir<%v7> to index 7
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 53 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    store ir<%v7> to index 7
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 137 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    store ir<%v7> to index 7
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 275 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    store ir<%v7> to index 7
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 550 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    store ir<%v7> to index 7
+; AVX512BW:    ir<%v> = load from index 0
+; AVX512BW:  Cost of 1100 for VF 64: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
+; AVX512BW:    store ir<%v> to index 0
+; AVX512BW:    store ir<%v1> to index 1
+; AVX512BW:    store ir<%v2> to index 2
+; AVX512BW:    store ir<%v3> to index 3
+; AVX512BW:    store ir<%v4> to index 4
+; AVX512BW:    store ir<%v5> to index 5
+; AVX512BW:    store ir<%v6> to index 6
+; AVX512BW:    store ir<%v7> to index 7
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-load-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-load-i16.ll
index 665b0c3bac040..3f3ca1b215dde 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-load-i16.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-load-i16.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%i[2,4] = load i16, ptr %[a-zA-Z0-7]+, align 2"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%i[2,4] = load i16, ptr %[a-zA-Z0-7]+, align 2" --filter "Cost of [0-9]+ for VF [0-9]+: (REPLICATE ir<%i[24]> = load|INTERLEAVE-GROUP with factor [0-9]+ at %i[24])" --filter "^  ir<.* = load from index"
 ; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=DISABLED_MASKED_STRIDED
 ; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=ENABLED_MASKED_STRIDED
 ; REQUIRES: asserts
@@ -22,26 +22,30 @@ define void @test1(ptr noalias nocapture %points, ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-LABEL: 'test1'
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 6 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 6 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 13 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 13 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 27 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 27 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 55 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 55 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED:  Cost of 6 for VF 2: REPLICATE ir<%i2> = load ir<%arrayidx2>
+; DISABLED_MASKED_STRIDED:  Cost of 6 for VF 2: REPLICATE ir<%i4> = load ir<%arrayidx7>
+; DISABLED_MASKED_STRIDED:  Cost of 13 for VF 4: REPLICATE ir<%i2> = load ir<%arrayidx2>
+; DISABLED_MASKED_STRIDED:  Cost of 13 for VF 4: REPLICATE ir<%i4> = load ir<%arrayidx7>
+; DISABLED_MASKED_STRIDED:  Cost of 27 for VF 8: REPLICATE ir<%i2> = load ir<%arrayidx2>
+; DISABLED_MASKED_STRIDED:  Cost of 27 for VF 8: REPLICATE ir<%i4> = load ir<%arrayidx7>
+; DISABLED_MASKED_STRIDED:  Cost of 55 for VF 16: REPLICATE ir<%i2> = load ir<%arrayidx2>
+; DISABLED_MASKED_STRIDED:  Cost of 55 for VF 16: REPLICATE ir<%i4> = load ir<%arrayidx7>
 ;
 ; ENABLED_MASKED_STRIDED-LABEL: 'test1'
 ; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
 ; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 8 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 0 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 11 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 0 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 11 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 0 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 17 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 0 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>
+; ENABLED_MASKED_STRIDED:    ir<%i2> = load from index 0
+; ENABLED_MASKED_STRIDED:    ir<%i4> = load from index 1
+; ENABLED_MASKED_STRIDED:  Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>
+; ENABLED_MASKED_STRIDED:    ir<%i2> = load from index 0
+; ENABLED_MASKED_STRIDED:    ir<%i4> = load from index 1
+; ENABLED_MASKED_STRIDED:  Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>
+; ENABLED_MASKED_STRIDED:    ir<%i2> = load from index 0
+; ENABLED_MASKED_STRIDED:    ir<%i4> = load from index 1
+; ENABLED_MASKED_STRIDED:  Cost of 17 for VF 16: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>
+; ENABLED_MASKED_STRIDED:    ir<%i2> = load from index 0
+; ENABLED_MASKED_STRIDED:    ir<%i4> = load from index 1
 ;
 entry:
   br label %for.body
@@ -79,26 +83,30 @@ define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias no
 ; DISABLED_MASKED_STRIDED-LABEL: 'test2'
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 3000000 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 3000000 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 3000000 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 3000000 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 3000000 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 3000000 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED:  Cost of 3000000 for VF 2: REPLICATE ir<%i2> = load ir<%arrayidx2> (S->V)
+; DISABLED_MASKED_STRIDED:  Cost of 3000000 for VF 2: REPLICATE ir<%i4> = load ir<%arrayidx7> (S->V)
+; DISABLED_MASKED_STRIDED:  Cost of 3000000 for VF 4: REPLICATE ir<%i2> = load ir<%arrayidx2> (S->V)
+; DISABLED_MASKED_STRIDED:  Cost of 3000000 for VF 4: REPLICATE ir<%i4> = load ir<%arrayidx7> (S->V)
+; DISABLED_MASKED_STRIDED:  Cost of 3000000 for VF 8: REPLICATE ir<%i2> = load ir<%arrayidx2> (S->V)
+; DISABLED_MASKED_STRIDED:  Cost of 3000000 for VF 8: REPLICATE ir<%i4> = load ir<%arrayidx7> (S->V)
+; DISABLED_MASKED_STRIDED:  Cost of 3000000 for VF 16: REPLICATE ir<%i2> = load ir<%arrayidx2> (S->V)
+; DISABLED_MASKED_STRIDED:  Cost of 3000000 for VF 16: REPLICATE ir<%i4> = load ir<%arrayidx7> (S->V)
 ;
 ; ENABLED_MASKED_STRIDED-LABEL: 'test2'
 ; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
 ; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 8 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 0 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 11 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 0 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 11 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 0 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 17 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 0 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED:  Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>, vp<[[VP8:%[0-9]+]]>
+; ENABLED_MASKED_STRIDED:    ir<%i2> = load from index 0
+; ENABLED_MASKED_STRIDED:    ir<%i4> = load from index 1
+; ENABLED_MASKED_STRIDED:  Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>, vp<[[VP8]]>
+; ENABLED_MASKED_STRIDED:    ir<%i2> = load from index 0
+; ENABLED_MASKED_STRIDED:    ir<%i4> = load from index 1
+; ENABLED_MASKED_STRIDED:  Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>, vp<[[VP8]]>
+; ENABLED_MASKED_STRIDED:    ir<%i2> = load from index 0
+; ENABLED_MASKED_STRIDED:    ir<%i4> = load from index 1
+; ENABLED_MASKED_STRIDED:  Cost of 17 for VF 16: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>, vp<[[VP8]]>
+; ENABLED_MASKED_STRIDED:    ir<%i2> = load from index 0
+; ENABLED_MASKED_STRIDED:    ir<%i4> = load from index 1
 ;
 entry:
   %cmp15 = icmp sgt i32 %numPoints, 0
@@ -146,26 +154,22 @@ define void @test(ptr noalias nocapture %points, ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-LABEL: 'test'
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx, align 2
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 3000000 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 3000000 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 3000000 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
+; DISABLED_MASKED_STRIDED:  Cost of 3000000 for VF 2: REPLICATE ir<%i4> = load ir<%arrayidx6> (S->V)
+; DISABLED_MASKED_STRIDED:  Cost of 3000000 for VF 4: REPLICATE ir<%i4> = load ir<%arrayidx6> (S->V)
+; DISABLED_MASKED_STRIDED:  Cost of 3000000 for VF 8: REPLICATE ir<%i4> = load ir<%arrayidx6> (S->V)
+; DISABLED_MASKED_STRIDED:  Cost of 3000000 for VF 16: REPLICATE ir<%i4> = load ir<%arrayidx6> (S->V)
 ;
 ; ENABLED_MASKED_STRIDED-LABEL: 'test'
 ; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx, align 2
 ; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 7 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 9 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 9 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 14 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED:  Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at %i4, ir<%arrayidx6>, ir<%cmp1>
+; ENABLED_MASKED_STRIDED:    ir<%i4> = load from index 0
+; ENABLED_MASKED_STRIDED:  Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 3 at %i4, ir<%arrayidx6>, ir<%cmp1>
+; ENABLED_MASKED_STRIDED:    ir<%i4> = load from index 0
+; ENABLED_MASKED_STRIDED:  Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 3 at %i4, ir<%arrayidx6>, ir<%cmp1>
+; ENABLED_MASKED_STRIDED:    ir<%i4> = load from index 0
+; ENABLED_MASKED_STRIDED:  Cost of 14 for VF 16: INTERLEAVE-GROUP with factor 3 at %i4, ir<%arrayidx6>, ir<%cmp1>
+; ENABLED_MASKED_STRIDED:    ir<%i4> = load from index 0
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll
index c2c04ce6f5ff5..3623358228bd4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %[0,2], ptr %[a-zA-Z0-7]+, align 2"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i16 %[0,2], ptr %[a-zA-Z0-7]+, align 2" --filter "Cost of [1-9][0-9]* for VF [0-9]+: (profitable to scalarize\s+store i16 %[02]|REPLICATE store ir<%[02]>|INTERLEAVE-GROUP with factor [0-9]+ at <badref>)"
 ; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=DISABLED_MASKED_STRIDED
 ; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=ENABLED_MASKED_STRIDED
 ; REQUIRES: asserts
@@ -22,30 +22,29 @@ define void @test1(ptr noalias nocapture %points, ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-LABEL: 'test1'
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED:  Cost of 6 for VF 2: REPLICATE store ir<%0>, ir<%arrayidx2>
+; DISABLED_MASKED_STRIDED:  Cost of 6 for VF 2: REPLICATE store ir<%2>, ir<%arrayidx7>
+; DISABLED_MASKED_STRIDED:  Cost of 13 for VF 4: REPLICATE store ir<%0>, ir<%arrayidx2>
+; DISABLED_MASKED_STRIDED:  Cost of 13 for VF 4: REPLICATE store ir<%2>, ir<%arrayidx7>
+; DISABLED_MASKED_STRIDED:  Cost of 27 for VF 8: REPLICATE store ir<%0>, ir<%arrayidx2>
+; DISABLED_MASKED_STRIDED:  Cost of 27 for VF 8: REPLICATE store ir<%2>, ir<%arrayidx7>
+; DISABLED_MASKED_STRIDED:  Cost of 55 for VF 16: REPLICATE store ir<%0>, ir<%arrayidx2>
+; DISABLED_MASKED_STRIDED:  Cost of 55 for VF 16: REPLICATE store ir<%2>, ir<%arrayidx7>
 ;
 ; ENABLED_MASKED_STRIDED-LABEL: 'test1'
 ; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
 ; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 27 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED:  Cost of 6 for VF 2: REPLICATE store ir<%0>, ir<%arrayidx2>
+; ENABLED_MASKED_STRIDED:  Cost of 6 for VF 2: REPLICATE store ir<%2>, ir<%arrayidx7>
+; ENABLED_MASKED_STRIDED:  Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx2>
+; ENABLED_MASKED_STRIDED:    store ir<%0> to index 0
+; ENABLED_MASKED_STRIDED:    store ir<%2> to index 1
+; ENABLED_MASKED_STRIDED:  Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx2>
+; ENABLED_MASKED_STRIDED:    store ir<%0> to index 0
+; ENABLED_MASKED_STRIDED:    store ir<%2> to index 1
+; ENABLED_MASKED_STRIDED:  Cost of 27 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx2>
+; ENABLED_MASKED_STRIDED:    store ir<%0> to index 0
+; ENABLED_MASKED_STRIDED:    store ir<%2> to index 1
 ;
 entry:
   br label %for.body
@@ -83,30 +82,30 @@ define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias no
 ; DISABLED_MASKED_STRIDED-LABEL: 'test2'
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 8 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 3000000 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 17 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 3000000 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 35 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 3000000 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 71 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 3000000 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED:  Cost of 8 for VF 2: REPLICATE store ir<%0>, ir<%arrayidx2>
+; DISABLED_MASKED_STRIDED:  Cost of 3000000 for VF 2: REPLICATE store ir<%2>, ir<%arrayidx7>
+; DISABLED_MASKED_STRIDED:  Cost of 17 for VF 4: REPLICATE store ir<%0>, ir<%arrayidx2>
+; DISABLED_MASKED_STRIDED:  Cost of 3000000 for VF 4: REPLICATE store ir<%2>, ir<%arrayidx7>
+; DISABLED_MASKED_STRIDED:  Cost of 35 for VF 8: REPLICATE store ir<%0>, ir<%arrayidx2>
+; DISABLED_MASKED_STRIDED:  Cost of 3000000 for VF 8: REPLICATE store ir<%2>, ir<%arrayidx7>
+; DISABLED_MASKED_STRIDED:  Cost of 71 for VF 16: REPLICATE store ir<%0>, ir<%arrayidx2>
+; DISABLED_MASKED_STRIDED:  Cost of 3000000 for VF 16: REPLICATE store ir<%2>, ir<%arrayidx7>
 ;
 ; ENABLED_MASKED_STRIDED-LABEL: 'test2'
 ; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
 ; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 27 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED:  Cost of 13 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx2>, vp<[[VP8:%[0-9]+]]>
+; ENABLED_MASKED_STRIDED:    store ir<%0> to index 0
+; ENABLED_MASKED_STRIDED:    store ir<%2> to index 1
+; ENABLED_MASKED_STRIDED:  Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx2>, vp<[[VP8]]>
+; ENABLED_MASKED_STRIDED:    store ir<%0> to index 0
+; ENABLED_MASKED_STRIDED:    store ir<%2> to index 1
+; ENABLED_MASKED_STRIDED:  Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx2>, vp<[[VP8]]>
+; ENABLED_MASKED_STRIDED:    store ir<%0> to index 0
+; ENABLED_MASKED_STRIDED:    store ir<%2> to index 1
+; ENABLED_MASKED_STRIDED:  Cost of 27 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx2>, vp<[[VP8]]>
+; ENABLED_MASKED_STRIDED:    store ir<%0> to index 0
+; ENABLED_MASKED_STRIDED:    store ir<%2> to index 1
 ;
 entry:
   %cmp15 = icmp sgt i32 %numPoints, 0
@@ -153,19 +152,17 @@ for.end:
 define void @test(ptr noalias nocapture %points, ptr noalias nocapture readonly %x, ptr noalias nocapture readnone %y) {
 ; DISABLED_MASKED_STRIDED-LABEL: 'test'
 ; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, ptr %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, ptr %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, ptr %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED:  LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %0, ptr %arrayidx6, align 2
+; DISABLED_MASKED_STRIDED:  Cost of 2 for VF 2: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2
+; DISABLED_MASKED_STRIDED:  Cost of 4 for VF 4: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2
+; DISABLED_MASKED_STRIDED:  Cost of 8 for VF 8: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2
+; DISABLED_MASKED_STRIDED:  Cost of 16 for VF 16: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2
 ;
 ; ENABLED_MASKED_STRIDED-LABEL: 'test'
 ; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, ptr %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, ptr %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, ptr %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED:  LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %0, ptr %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED:  Cost of 2 for VF 2: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED:  Cost of 4 for VF 4: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED:  Cost of 8 for VF 8: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED:  Cost of 16 for VF 16: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i32-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i32-with-i8-index.ll
index 5e67bd57754e4..986d7b7104d88 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i32-with-i8-index.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i32-with-i8-index.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %valB, ptr %out, align 4"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i32 %valB, ptr %out" --filter "Cost of [1-9][0-9]* for VF [0-9]+: (profitable to scalarize\s+store i32 %valB|WIDEN store .*, ir<%valB>|REPLICATE store ir<%valB>)"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1
@@ -18,42 +18,42 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
-; SSE2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
+; SSE2:  Cost of 2 for VF 2: profitable to scalarize store i32 %valB, ptr %out, align 4
+; SSE2:  Cost of 5 for VF 4: profitable to scalarize store i32 %valB, ptr %out, align 4
+; SSE2:  Cost of 11 for VF 8: profitable to scalarize store i32 %valB, ptr %out, align 4
+; SSE2:  Cost of 22 for VF 16: profitable to scalarize store i32 %valB, ptr %out, align 4
 ;
 ; SSE42-LABEL: 'test'
 ; SSE42:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
-; SSE42:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
-; SSE42:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
-; SSE42:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
-; SSE42:  LV: Found an estimated cost of 16 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
+; SSE42:  Cost of 2 for VF 2: profitable to scalarize store i32 %valB, ptr %out, align 4
+; SSE42:  Cost of 4 for VF 4: profitable to scalarize store i32 %valB, ptr %out, align 4
+; SSE42:  Cost of 8 for VF 8: profitable to scalarize store i32 %valB, ptr %out, align 4
+; SSE42:  Cost of 16 for VF 16: profitable to scalarize store i32 %valB, ptr %out, align 4
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
-; AVX1:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
-; AVX1:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
-; AVX1:  LV: Found an estimated cost of 17 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
-; AVX1:  LV: Found an estimated cost of 34 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1:  Cost of 2 for VF 2: profitable to scalarize store i32 %valB, ptr %out, align 4
+; AVX1:  Cost of 4 for VF 4: profitable to scalarize store i32 %valB, ptr %out, align 4
+; AVX1:  Cost of 8 for VF 8: profitable to scalarize store i32 %valB, ptr %out, align 4
+; AVX1:  Cost of 17 for VF 16: profitable to scalarize store i32 %valB, ptr %out, align 4
+; AVX1:  Cost of 34 for VF 32: profitable to scalarize store i32 %valB, ptr %out, align 4
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
-; AVX2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
-; AVX2:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
-; AVX2:  LV: Found an estimated cost of 17 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
-; AVX2:  LV: Found an estimated cost of 34 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2:  Cost of 2 for VF 2: profitable to scalarize store i32 %valB, ptr %out, align 4
+; AVX2:  Cost of 4 for VF 4: profitable to scalarize store i32 %valB, ptr %out, align 4
+; AVX2:  Cost of 8 for VF 8: profitable to scalarize store i32 %valB, ptr %out, align 4
+; AVX2:  Cost of 17 for VF 16: profitable to scalarize store i32 %valB, ptr %out, align 4
+; AVX2:  Cost of 34 for VF 32: profitable to scalarize store i32 %valB, ptr %out, align 4
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 5 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 10 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 18 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 36 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 72 for VF 64 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512:  Cost of 5 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 10 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out>, ir<%valB>, ir<%canStore>
+; AVX512:  Cost of 18 for VF 16: WIDEN store ir<%out>, ir<%valB>, ir<%canStore>
+; AVX512:  Cost of 36 for VF 32: WIDEN store ir<%out>, ir<%valB>, ir<%canStore>
+; AVX512:  Cost of 72 for VF 64: WIDEN store ir<%out>, ir<%valB>, ir<%canStore>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i64-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i64-with-i8-index.ll
index faa2aa43d4934..56d3f973b3177 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i64-with-i8-index.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i64-with-i8-index.ll
@@ -1,8 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %valB, ptr %out, align 8"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i64 %valB, ptr %out" --filter "Cost of [1-9][0-9]* for VF [0-9]+: (profitable to scalarize\s+store i64 %valB|WIDEN store .*, ir<%valB>|REPLICATE store ir<%valB>)"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1
-; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2,AVX2-NOFAST
+; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512
 
@@ -18,42 +18,42 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
-; SSE2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
-; SSE2:  LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
+; SSE2:  Cost of 2 for VF 2: profitable to scalarize store i64 %valB, ptr %out, align 8
+; SSE2:  Cost of 5 for VF 4: profitable to scalarize store i64 %valB, ptr %out, align 8
+; SSE2:  Cost of 10 for VF 8: profitable to scalarize store i64 %valB, ptr %out, align 8
+; SSE2:  Cost of 20 for VF 16: profitable to scalarize store i64 %valB, ptr %out, align 8
 ;
 ; SSE42-LABEL: 'test'
 ; SSE42:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
-; SSE42:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
-; SSE42:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
-; SSE42:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
-; SSE42:  LV: Found an estimated cost of 16 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
+; SSE42:  Cost of 2 for VF 2: profitable to scalarize store i64 %valB, ptr %out, align 8
+; SSE42:  Cost of 4 for VF 4: profitable to scalarize store i64 %valB, ptr %out, align 8
+; SSE42:  Cost of 8 for VF 8: profitable to scalarize store i64 %valB, ptr %out, align 8
+; SSE42:  Cost of 16 for VF 16: profitable to scalarize store i64 %valB, ptr %out, align 8
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
-; AVX1:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
-; AVX1:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
-; AVX1:  LV: Found an estimated cost of 9 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
-; AVX1:  LV: Found an estimated cost of 18 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
-; AVX1:  LV: Found an estimated cost of 36 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1:  Cost of 2 for VF 2: profitable to scalarize store i64 %valB, ptr %out, align 8
+; AVX1:  Cost of 4 for VF 4: profitable to scalarize store i64 %valB, ptr %out, align 8
+; AVX1:  Cost of 9 for VF 8: profitable to scalarize store i64 %valB, ptr %out, align 8
+; AVX1:  Cost of 18 for VF 16: profitable to scalarize store i64 %valB, ptr %out, align 8
+; AVX1:  Cost of 36 for VF 32: profitable to scalarize store i64 %valB, ptr %out, align 8
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
-; AVX2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
-; AVX2:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
-; AVX2:  LV: Found an estimated cost of 9 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
-; AVX2:  LV: Found an estimated cost of 18 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
-; AVX2-NOFAST: LV: Found an estimated cost of 36 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2:  Cost of 2 for VF 2: profitable to scalarize store i64 %valB, ptr %out, align 8
+; AVX2:  Cost of 4 for VF 4: profitable to scalarize store i64 %valB, ptr %out, align 8
+; AVX2:  Cost of 9 for VF 8: profitable to scalarize store i64 %valB, ptr %out, align 8
+; AVX2:  Cost of 18 for VF 16: profitable to scalarize store i64 %valB, ptr %out, align 8
+; AVX2:  Cost of 36 for VF 32: profitable to scalarize store i64 %valB, ptr %out, align 8
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 5 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 11 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 80 for VF 64 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512:  Cost of 5 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 11 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out>, ir<%valB>, ir<%canStore>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out>, ir<%valB>, ir<%canStore>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out>, ir<%valB>, ir<%canStore>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out>, ir<%valB>, ir<%canStore>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i16.ll
index 1d51a32a520a9..89df198fc74a9 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i16.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i16.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %valB, ptr %out, align 2"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i16 %valB, ptr %out" --filter "Cost of [1-9][0-9]* for VF [0-9]+: (profitable to scalarize\s+store i16 %valB|WIDEN store .*, ir<%valB>|REPLICATE store ir<%valB>)"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1
@@ -17,35 +17,35 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test(ptr %C) {
 ; SSE-LABEL: 'test'
 ; SSE:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
-; SSE:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
-; SSE:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
-; SSE:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
-; SSE:  LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
+; SSE:  Cost of 2 for VF 2: profitable to scalarize store i16 %valB, ptr %out, align 2
+; SSE:  Cost of 4 for VF 4: profitable to scalarize store i16 %valB, ptr %out, align 2
+; SSE:  Cost of 8 for VF 8: profitable to scalarize store i16 %valB, ptr %out, align 2
+; SSE:  Cost of 16 for VF 16: profitable to scalarize store i16 %valB, ptr %out, align 2
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
-; AVX1:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
-; AVX1:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
-; AVX1:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
-; AVX1:  LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
-; AVX1:  LV: Found an estimated cost of 33 for VF 32 For instruction: store i16 %valB, ptr %out, align 2
+; AVX1:  Cost of 2 for VF 2: profitable to scalarize store i16 %valB, ptr %out, align 2
+; AVX1:  Cost of 4 for VF 4: profitable to scalarize store i16 %valB, ptr %out, align 2
+; AVX1:  Cost of 8 for VF 8: profitable to scalarize store i16 %valB, ptr %out, align 2
+; AVX1:  Cost of 16 for VF 16: profitable to scalarize store i16 %valB, ptr %out, align 2
+; AVX1:  Cost of 33 for VF 32: profitable to scalarize store i16 %valB, ptr %out, align 2
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
-; AVX2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
-; AVX2:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
-; AVX2:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
-; AVX2:  LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
-; AVX2:  LV: Found an estimated cost of 33 for VF 32 For instruction: store i16 %valB, ptr %out, align 2
+; AVX2:  Cost of 2 for VF 2: profitable to scalarize store i16 %valB, ptr %out, align 2
+; AVX2:  Cost of 4 for VF 4: profitable to scalarize store i16 %valB, ptr %out, align 2
+; AVX2:  Cost of 8 for VF 8: profitable to scalarize store i16 %valB, ptr %out, align 2
+; AVX2:  Cost of 16 for VF 16: profitable to scalarize store i16 %valB, ptr %out, align 2
+; AVX2:  Cost of 33 for VF 32: profitable to scalarize store i16 %valB, ptr %out, align 2
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
-; AVX512:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
-; AVX512:  LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
-; AVX512:  LV: Found an estimated cost of 1 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
-; AVX512:  LV: Found an estimated cost of 1 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
-; AVX512:  LV: Found an estimated cost of 1 for VF 32 For instruction: store i16 %valB, ptr %out, align 2
-; AVX512:  LV: Found an estimated cost of 2 for VF 64 For instruction: store i16 %valB, ptr %out, align 2
+; AVX512:  Cost of 2 for VF 2: WIDEN store vp<[[VP7:%[0-9]+]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX512:  Cost of 2 for VF 4: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX512:  Cost of 1 for VF 8: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX512:  Cost of 1 for VF 16: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX512:  Cost of 1 for VF 32: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX512:  Cost of 2 for VF 64: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i32.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i32.ll
index f011d06d319bb..0111cc162b4de 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i32.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i32.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %valB, ptr %out, align 4"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i32 %valB, ptr %out" --filter "Cost of [1-9][0-9]* for VF [0-9]+: (profitable to scalarize\s+store i32 %valB|WIDEN store .*, ir<%valB>|REPLICATE store ir<%valB>)"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1
@@ -17,42 +17,42 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test(ptr %C) {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
-; SSE2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
-; SSE2:  LV: Found an estimated cost of 5 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
-; SSE2:  LV: Found an estimated cost of 11 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
-; SSE2:  LV: Found an estimated cost of 22 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
+; SSE2:  Cost of 2 for VF 2: profitable to scalarize store i32 %valB, ptr %out, align 4
+; SSE2:  Cost of 5 for VF 4: profitable to scalarize store i32 %valB, ptr %out, align 4
+; SSE2:  Cost of 11 for VF 8: profitable to scalarize store i32 %valB, ptr %out, align 4
+; SSE2:  Cost of 22 for VF 16: profitable to scalarize store i32 %valB, ptr %out, align 4
 ;
 ; SSE42-LABEL: 'test'
 ; SSE42:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
-; SSE42:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
-; SSE42:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
-; SSE42:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
-; SSE42:  LV: Found an estimated cost of 16 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
+; SSE42:  Cost of 2 for VF 2: profitable to scalarize store i32 %valB, ptr %out, align 4
+; SSE42:  Cost of 4 for VF 4: profitable to scalarize store i32 %valB, ptr %out, align 4
+; SSE42:  Cost of 8 for VF 8: profitable to scalarize store i32 %valB, ptr %out, align 4
+; SSE42:  Cost of 16 for VF 16: profitable to scalarize store i32 %valB, ptr %out, align 4
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
-; AVX1:  LV: Found an estimated cost of 9 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
-; AVX1:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
-; AVX1:  LV: Found an estimated cost of 16 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
-; AVX1:  LV: Found an estimated cost of 32 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1:  Cost of 9 for VF 2: WIDEN store vp<[[VP7:%[0-9]+]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX1:  Cost of 8 for VF 4: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX1:  Cost of 8 for VF 8: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX1:  Cost of 16 for VF 16: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX1:  Cost of 32 for VF 32: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
-; AVX2:  LV: Found an estimated cost of 9 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
-; AVX2:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
-; AVX2:  LV: Found an estimated cost of 16 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
-; AVX2:  LV: Found an estimated cost of 32 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2:  Cost of 9 for VF 2: WIDEN store vp<[[VP7:%[0-9]+]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX2:  Cost of 8 for VF 4: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX2:  Cost of 8 for VF 8: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX2:  Cost of 16 for VF 16: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX2:  Cost of 32 for VF 32: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 1 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 2 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 4 for VF 64 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512:  Cost of 2 for VF 2: WIDEN store vp<[[VP7:%[0-9]+]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX512:  Cost of 1 for VF 4: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX512:  Cost of 1 for VF 8: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX512:  Cost of 1 for VF 16: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX512:  Cost of 2 for VF 32: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX512:  Cost of 4 for VF 64: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i64.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i64.ll
index c004b16ae207d..afa821586bb1c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i64.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i64.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %valB, ptr %out, align 8"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i64 %valB, ptr %out" --filter "Cost of [1-9][0-9]* for VF [0-9]+: (profitable to scalarize\s+store i64 %valB|WIDEN store .*, ir<%valB>|REPLICATE store ir<%valB>)"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1
@@ -17,42 +17,42 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test(ptr %C) {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
-; SSE2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
-; SSE2:  LV: Found an estimated cost of 5 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
-; SSE2:  LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
-; SSE2:  LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
+; SSE2:  Cost of 2 for VF 2: profitable to scalarize store i64 %valB, ptr %out, align 8
+; SSE2:  Cost of 5 for VF 4: profitable to scalarize store i64 %valB, ptr %out, align 8
+; SSE2:  Cost of 10 for VF 8: profitable to scalarize store i64 %valB, ptr %out, align 8
+; SSE2:  Cost of 20 for VF 16: profitable to scalarize store i64 %valB, ptr %out, align 8
 ;
 ; SSE42-LABEL: 'test'
 ; SSE42:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
-; SSE42:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
-; SSE42:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
-; SSE42:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
-; SSE42:  LV: Found an estimated cost of 16 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
+; SSE42:  Cost of 2 for VF 2: profitable to scalarize store i64 %valB, ptr %out, align 8
+; SSE42:  Cost of 4 for VF 4: profitable to scalarize store i64 %valB, ptr %out, align 8
+; SSE42:  Cost of 8 for VF 8: profitable to scalarize store i64 %valB, ptr %out, align 8
+; SSE42:  Cost of 16 for VF 16: profitable to scalarize store i64 %valB, ptr %out, align 8
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
-; AVX1:  LV: Found an estimated cost of 8 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
-; AVX1:  LV: Found an estimated cost of 8 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
-; AVX1:  LV: Found an estimated cost of 16 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
-; AVX1:  LV: Found an estimated cost of 32 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
-; AVX1:  LV: Found an estimated cost of 64 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1:  Cost of 8 for VF 2: WIDEN store vp<[[VP7:%[0-9]+]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX1:  Cost of 8 for VF 4: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX1:  Cost of 16 for VF 8: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX1:  Cost of 32 for VF 16: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX1:  Cost of 64 for VF 32: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
-; AVX2:  LV: Found an estimated cost of 8 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
-; AVX2:  LV: Found an estimated cost of 8 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
-; AVX2:  LV: Found an estimated cost of 16 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
-; AVX2:  LV: Found an estimated cost of 32 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
-; AVX2:  LV: Found an estimated cost of 64 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2:  Cost of 8 for VF 2: WIDEN store vp<[[VP7:%[0-9]+]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX2:  Cost of 8 for VF 4: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX2:  Cost of 16 for VF 8: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX2:  Cost of 32 for VF 16: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX2:  Cost of 64 for VF 32: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 1 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 2 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 4 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 8 for VF 64 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512:  Cost of 1 for VF 2: WIDEN store vp<[[VP7:%[0-9]+]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX512:  Cost of 1 for VF 4: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX512:  Cost of 1 for VF 8: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX512:  Cost of 2 for VF 16: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX512:  Cost of 4 for VF 32: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
+; AVX512:  Cost of 8 for VF 64: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore> (!alias.scope {{.*}}, !noalias {{.*}})
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i8.ll
index 8bbe624849783..862a572643895 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i8.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %valB, ptr %out, align 1"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i8 %valB, ptr %out" --filter "Cost of [1-9][0-9]* for VF [0-9]+: (profitable to scalarize\s+store i8 %valB|WIDEN store .*, ir<%valB>|REPLICATE store ir<%valB>)"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1
@@ -17,42 +17,42 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test(ptr %C) {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
-; SSE2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
-; SSE2:  LV: Found an estimated cost of 5 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
-; SSE2:  LV: Found an estimated cost of 11 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
-; SSE2:  LV: Found an estimated cost of 23 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
+; SSE2:  Cost of 2 for VF 2: profitable to scalarize store i8 %valB, ptr %out, align 1
+; SSE2:  Cost of 5 for VF 4: profitable to scalarize store i8 %valB, ptr %out, align 1
+; SSE2:  Cost of 11 for VF 8: profitable to scalarize store i8 %valB, ptr %out, align 1
+; SSE2:  Cost of 23 for VF 16: profitable to scalarize store i8 %valB, ptr %out, align 1
 ;
 ; SSE42-LABEL: 'test'
 ; SSE42:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
-; SSE42:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
-; SSE42:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
-; SSE42:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
-; SSE42:  LV: Found an estimated cost of 16 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
+; SSE42:  Cost of 2 for VF 2: profitable to scalarize store i8 %valB, ptr %out, align 1
+; SSE42:  Cost of 4 for VF 4: profitable to scalarize store i8 %valB, ptr %out, align 1
+; SSE42:  Cost of 8 for VF 8: profitable to scalarize store i8 %valB, ptr %out, align 1
+; SSE42:  Cost of 16 for VF 16: profitable to scalarize store i8 %valB, ptr %out, align 1
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
-; AVX1:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
-; AVX1:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
-; AVX1:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
-; AVX1:  LV: Found an estimated cost of 16 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
-; AVX1:  LV: Found an estimated cost of 32 for VF 32 For instruction: store i8 %valB, ptr %out, align 1
+; AVX1:  Cost of 2 for VF 2: profitable to scalarize store i8 %valB, ptr %out, align 1
+; AVX1:  Cost of 4 for VF 4: profitable to scalarize store i8 %valB, ptr %out, align 1
+; AVX1:  Cost of 8 for VF 8: profitable to scalarize store i8 %valB, ptr %out, align 1
+; AVX1:  Cost of 16 for VF 16: profitable to scalarize store i8 %valB, ptr %out, align 1
+; AVX1:  Cost of 32 for VF 32: profitable to scalarize store i8 %valB, ptr %out, align 1
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
-; AVX2:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
-; AVX2:  LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
-; AVX2:  LV: Found an estimated cost of 8 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
-; AVX2:  LV: Found an estimated cost of 16 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
-; AVX2:  LV: Found an estimated cost of 32 for VF 32 For instruction: store i8 %valB, ptr %out, align 1
+; AVX2:  Cost of 2 for VF 2: profitable to scalarize store i8 %valB, ptr %out, align 1
+; AVX2:  Cost of 4 for VF 4: profitable to scalarize store i8 %valB, ptr %out, align 1
+; AVX2:  Cost of 8 for VF 8: profitable to scalarize store i8 %valB, ptr %out, align 1
+; AVX2:  Cost of 16 for VF 16: profitable to scalarize store i8 %valB, ptr %out, align 1
+; AVX2:  Cost of 32 for VF 32: profitable to scalarize store i8 %valB, ptr %out, align 1
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
-; AVX512:  LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
-; AVX512:  LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
-; AVX512:  LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
-; AVX512:  LV: Found an estimated cost of 1 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
-; AVX512:  LV: Found an estimated cost of 1 for VF 32 For instruction: store i8 %valB, ptr %out, align 1
-; AVX512:  LV: Found an estimated cost of 1 for VF 64 For instruction: store i8 %valB, ptr %out, align 1
+; AVX512:  Cost of 2 for VF 2: WIDEN store vp<[[VP7:%[0-9]+]]>, ir<%valB>, ir<%canStore>
+; AVX512:  Cost of 2 for VF 4: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore>
+; AVX512:  Cost of 2 for VF 8: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore>
+; AVX512:  Cost of 1 for VF 16: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore>
+; AVX512:  Cost of 1 for VF 32: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore>
+; AVX512:  Cost of 1 for VF 64: WIDEN store vp<[[VP7]]>, ir<%valB>, ir<%canStore>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i16-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i16-with-i8-index.ll
index bd6b16831d09c..aa89dbbcd72ac 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i16-with-i8-index.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i16-with-i8-index.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %valB, ptr %out, align 2"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i16 %valB, ptr %out" --filter "Cost of [0-9]+ for VF [0-9]+: (WIDEN store|REPLICATE store ir<%valB>)"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1
@@ -18,42 +18,42 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
-; SSE2:  LV: Found an estimated cost of 28 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
-; SSE2:  LV: Found an estimated cost of 56 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
-; SSE2:  LV: Found an estimated cost of 112 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
-; SSE2:  LV: Found an estimated cost of 224 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
+; SSE2:  Cost of 28 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; SSE2:  Cost of 56 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; SSE2:  Cost of 112 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; SSE2:  Cost of 224 for VF 16: REPLICATE store ir<%valB>, ir<%out>
 ;
 ; SSE42-LABEL: 'test'
 ; SSE42:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
-; SSE42:  LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
-; SSE42:  LV: Found an estimated cost of 52 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
-; SSE42:  LV: Found an estimated cost of 104 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
-; SSE42:  LV: Found an estimated cost of 208 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
+; SSE42:  Cost of 26 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; SSE42:  Cost of 52 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; SSE42:  Cost of 104 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; SSE42:  Cost of 208 for VF 16: REPLICATE store ir<%valB>, ir<%out>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
-; AVX1:  LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
-; AVX1:  LV: Found an estimated cost of 53 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
-; AVX1:  LV: Found an estimated cost of 106 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
-; AVX1:  LV: Found an estimated cost of 213 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
-; AVX1:  LV: Found an estimated cost of 426 for VF 32 For instruction: store i16 %valB, ptr %out, align 2
+; AVX1:  Cost of 26 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; AVX1:  Cost of 53 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; AVX1:  Cost of 106 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; AVX1:  Cost of 213 for VF 16: REPLICATE store ir<%valB>, ir<%out>
+; AVX1:  Cost of 426 for VF 32: REPLICATE store ir<%valB>, ir<%out>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
-; AVX2:  LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
-; AVX2:  LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
-; AVX2:  LV: Found an estimated cost of 26 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
-; AVX2:  LV: Found an estimated cost of 53 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
-; AVX2:  LV: Found an estimated cost of 106 for VF 32 For instruction: store i16 %valB, ptr %out, align 2
+; AVX2:  Cost of 6 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; AVX2:  Cost of 13 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; AVX2:  Cost of 26 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; AVX2:  Cost of 53 for VF 16: REPLICATE store ir<%valB>, ir<%out>
+; AVX2:  Cost of 106 for VF 32: REPLICATE store ir<%valB>, ir<%out>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
-; AVX512:  LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
-; AVX512:  LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
-; AVX512:  LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
-; AVX512:  LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
-; AVX512:  LV: Found an estimated cost of 111 for VF 32 For instruction: store i16 %valB, ptr %out, align 2
-; AVX512:  LV: Found an estimated cost of 222 for VF 64 For instruction: store i16 %valB, ptr %out, align 2
+; AVX512:  Cost of 6 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 13 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 27 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 55 for VF 16: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 111 for VF 32: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 222 for VF 64: REPLICATE store ir<%valB>, ir<%out>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i32-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i32-with-i8-index.ll
index de76eb0782c0d..816ddfbadc4d1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i32-with-i8-index.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i32-with-i8-index.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %valB, ptr %out, align 4"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i32 %valB, ptr %out" --filter "Cost of [0-9]+ for VF [0-9]+: (WIDEN store|REPLICATE store ir<%valB>)"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1
@@ -18,42 +18,42 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
-; SSE2:  LV: Found an estimated cost of 29 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
-; SSE2:  LV: Found an estimated cost of 59 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
-; SSE2:  LV: Found an estimated cost of 118 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
-; SSE2:  LV: Found an estimated cost of 236 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
+; SSE2:  Cost of 29 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; SSE2:  Cost of 59 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; SSE2:  Cost of 118 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; SSE2:  Cost of 236 for VF 16: REPLICATE store ir<%valB>, ir<%out>
 ;
 ; SSE42-LABEL: 'test'
 ; SSE42:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
-; SSE42:  LV: Found an estimated cost of 26 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
-; SSE42:  LV: Found an estimated cost of 52 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
-; SSE42:  LV: Found an estimated cost of 104 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
-; SSE42:  LV: Found an estimated cost of 208 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
+; SSE42:  Cost of 26 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; SSE42:  Cost of 52 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; SSE42:  Cost of 104 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; SSE42:  Cost of 208 for VF 16: REPLICATE store ir<%valB>, ir<%out>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
-; AVX1:  LV: Found an estimated cost of 26 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
-; AVX1:  LV: Found an estimated cost of 53 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
-; AVX1:  LV: Found an estimated cost of 107 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
-; AVX1:  LV: Found an estimated cost of 214 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
-; AVX1:  LV: Found an estimated cost of 428 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1:  Cost of 26 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; AVX1:  Cost of 53 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; AVX1:  Cost of 107 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; AVX1:  Cost of 214 for VF 16: REPLICATE store ir<%valB>, ir<%out>
+; AVX1:  Cost of 428 for VF 32: REPLICATE store ir<%valB>, ir<%out>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
-; AVX2:  LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
-; AVX2:  LV: Found an estimated cost of 13 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
-; AVX2:  LV: Found an estimated cost of 27 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
-; AVX2:  LV: Found an estimated cost of 54 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
-; AVX2:  LV: Found an estimated cost of 108 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2:  Cost of 6 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; AVX2:  Cost of 13 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; AVX2:  Cost of 27 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; AVX2:  Cost of 54 for VF 16: REPLICATE store ir<%valB>, ir<%out>
+; AVX2:  Cost of 108 for VF 32: REPLICATE store ir<%valB>, ir<%out>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 13 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 18 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 36 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
-; AVX512:  LV: Found an estimated cost of 72 for VF 64 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512:  Cost of 6 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 13 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out>, ir<%valB>
+; AVX512:  Cost of 18 for VF 16: WIDEN store ir<%out>, ir<%valB>
+; AVX512:  Cost of 36 for VF 32: WIDEN store ir<%out>, ir<%valB>
+; AVX512:  Cost of 72 for VF 64: WIDEN store ir<%out>, ir<%valB>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i64-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i64-with-i8-index.ll
index c69711d1b71d5..64c41c2a31311 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i64-with-i8-index.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i64-with-i8-index.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %valB, ptr %out, align 8"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i64 %valB, ptr %out" --filter "Cost of [0-9]+ for VF [0-9]+: (WIDEN store|REPLICATE store ir<%valB>)"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1
@@ -18,42 +18,42 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
-; SSE2:  LV: Found an estimated cost of 29 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
-; SSE2:  LV: Found an estimated cost of 58 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
-; SSE2:  LV: Found an estimated cost of 116 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
-; SSE2:  LV: Found an estimated cost of 232 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
+; SSE2:  Cost of 29 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; SSE2:  Cost of 58 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; SSE2:  Cost of 116 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; SSE2:  Cost of 232 for VF 16: REPLICATE store ir<%valB>, ir<%out>
 ;
 ; SSE42-LABEL: 'test'
 ; SSE42:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
-; SSE42:  LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
-; SSE42:  LV: Found an estimated cost of 52 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
-; SSE42:  LV: Found an estimated cost of 104 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
-; SSE42:  LV: Found an estimated cost of 208 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
+; SSE42:  Cost of 26 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; SSE42:  Cost of 52 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; SSE42:  Cost of 104 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; SSE42:  Cost of 208 for VF 16: REPLICATE store ir<%valB>, ir<%out>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
-; AVX1:  LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
-; AVX1:  LV: Found an estimated cost of 54 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
-; AVX1:  LV: Found an estimated cost of 108 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
-; AVX1:  LV: Found an estimated cost of 216 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
-; AVX1:  LV: Found an estimated cost of 432 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1:  Cost of 26 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; AVX1:  Cost of 54 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; AVX1:  Cost of 108 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; AVX1:  Cost of 216 for VF 16: REPLICATE store ir<%valB>, ir<%out>
+; AVX1:  Cost of 432 for VF 32: REPLICATE store ir<%valB>, ir<%out>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
-; AVX2:  LV: Found an estimated cost of 6 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
-; AVX2:  LV: Found an estimated cost of 14 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
-; AVX2:  LV: Found an estimated cost of 28 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
-; AVX2:  LV: Found an estimated cost of 56 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
-; AVX2:  LV: Found an estimated cost of 112 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2:  Cost of 6 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; AVX2:  Cost of 14 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; AVX2:  Cost of 28 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; AVX2:  Cost of 56 for VF 16: REPLICATE store ir<%valB>, ir<%out>
+; AVX2:  Cost of 112 for VF 32: REPLICATE store ir<%valB>, ir<%out>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 6 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 14 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 40 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
-; AVX512:  LV: Found an estimated cost of 80 for VF 64 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512:  Cost of 6 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 14 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 10 for VF 8: WIDEN store ir<%out>, ir<%valB>
+; AVX512:  Cost of 20 for VF 16: WIDEN store ir<%out>, ir<%valB>
+; AVX512:  Cost of 40 for VF 32: WIDEN store ir<%out>, ir<%valB>
+; AVX512:  Cost of 80 for VF 64: WIDEN store ir<%out>, ir<%valB>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i8-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i8-with-i8-index.ll
index 4f62383d67927..1be3a62186e16 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i8-with-i8-index.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i8-with-i8-index.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %valB, ptr %out, align 1"
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i8 %valB, ptr %out" --filter "Cost of [0-9]+ for VF [0-9]+: (WIDEN store|REPLICATE store ir<%valB>)"
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42
 ; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx  --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1
@@ -18,42 +18,42 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test() {
 ; SSE2-LABEL: 'test'
 ; SSE2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
-; SSE2:  LV: Found an estimated cost of 29 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
-; SSE2:  LV: Found an estimated cost of 59 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
-; SSE2:  LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
-; SSE2:  LV: Found an estimated cost of 239 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
+; SSE2:  Cost of 29 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; SSE2:  Cost of 59 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; SSE2:  Cost of 119 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; SSE2:  Cost of 239 for VF 16: REPLICATE store ir<%valB>, ir<%out>
 ;
 ; SSE42-LABEL: 'test'
 ; SSE42:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
-; SSE42:  LV: Found an estimated cost of 26 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
-; SSE42:  LV: Found an estimated cost of 52 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
-; SSE42:  LV: Found an estimated cost of 104 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
-; SSE42:  LV: Found an estimated cost of 208 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
+; SSE42:  Cost of 26 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; SSE42:  Cost of 52 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; SSE42:  Cost of 104 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; SSE42:  Cost of 208 for VF 16: REPLICATE store ir<%valB>, ir<%out>
 ;
 ; AVX1-LABEL: 'test'
 ; AVX1:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
-; AVX1:  LV: Found an estimated cost of 26 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
-; AVX1:  LV: Found an estimated cost of 53 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
-; AVX1:  LV: Found an estimated cost of 106 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
-; AVX1:  LV: Found an estimated cost of 212 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
-; AVX1:  LV: Found an estimated cost of 425 for VF 32 For instruction: store i8 %valB, ptr %out, align 1
+; AVX1:  Cost of 26 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; AVX1:  Cost of 53 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; AVX1:  Cost of 106 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; AVX1:  Cost of 212 for VF 16: REPLICATE store ir<%valB>, ir<%out>
+; AVX1:  Cost of 425 for VF 32: REPLICATE store ir<%valB>, ir<%out>
 ;
 ; AVX2-LABEL: 'test'
 ; AVX2:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
-; AVX2:  LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
-; AVX2:  LV: Found an estimated cost of 13 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
-; AVX2:  LV: Found an estimated cost of 26 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
-; AVX2:  LV: Found an estimated cost of 52 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
-; AVX2:  LV: Found an estimated cost of 105 for VF 32 For instruction: store i8 %valB, ptr %out, align 1
+; AVX2:  Cost of 6 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; AVX2:  Cost of 13 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; AVX2:  Cost of 26 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; AVX2:  Cost of 52 for VF 16: REPLICATE store ir<%valB>, ir<%out>
+; AVX2:  Cost of 105 for VF 32: REPLICATE store ir<%valB>, ir<%out>
 ;
 ; AVX512-LABEL: 'test'
 ; AVX512:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
-; AVX512:  LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
-; AVX512:  LV: Found an estimated cost of 13 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
-; AVX512:  LV: Found an estimated cost of 27 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
-; AVX512:  LV: Found an estimated cost of 54 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
-; AVX512:  LV: Found an estimated cost of 109 for VF 32 For instruction: store i8 %valB, ptr %out, align 1
-; AVX512:  LV: Found an estimated cost of 219 for VF 64 For instruction: store i8 %valB, ptr %out, align 1
+; AVX512:  Cost of 6 for VF 2: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 13 for VF 4: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 27 for VF 8: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 54 for VF 16: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 109 for VF 32: REPLICATE store ir<%valB>, ir<%out>
+; AVX512:  Cost of 219 for VF 64: REPLICATE store ir<%valB>, ir<%out>
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i16.ll
index 46150a6955566..3e1c0d47b5f57 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i16.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i16.ll
@@ -10,11 +10,11 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @load_i16_stride2() {
 ;CHECK-LABEL: load_i16_stride2
 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 4 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 2 for VF 8 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 2 for VF 16 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 3 for VF 32 For instruction:   %1 = load
+;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 2 at %1,
+;CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 2 at %1,
+;CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %1,
+;CHECK: Cost of 2 for VF 16: INTERLEAVE-GROUP with factor 2 at %1,
+;CHECK: Cost of 3 for VF 32: INTERLEAVE-GROUP with factor 2 at %1,
 entry:
   br label %for.body
 
@@ -36,11 +36,11 @@ for.end:                                          ; preds = %for.body
 define void @load_i16_stride3() {
 ;CHECK-LABEL: load_i16_stride3
 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 2 for VF 8 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 3 for VF 16 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 5 for VF 32 For instruction:   %1 = load
+;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 3 at %1,
+;CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 3 at %1,
+;CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 3 at %1,
+;CHECK: Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 3 at %1,
+;CHECK: Cost of 5 for VF 32: INTERLEAVE-GROUP with factor 3 at %1,
 entry:
   br label %for.body
 
@@ -62,11 +62,11 @@ for.end:                                          ; preds = %for.body
 define void @load_i16_stride4() {
 ;CHECK-LABEL: load_i16_stride4
 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 2 for VF 8 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 3 for VF 16 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 8 for VF 32 For instruction:   %1 = load
+;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 4 at %1,
+;CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 4 at %1,
+;CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 4 at %1,
+;CHECK: Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 4 at %1,
+;CHECK: Cost of 8 for VF 32: INTERLEAVE-GROUP with factor 4 at %1,
 entry:
   br label %for.body
 
@@ -88,11 +88,11 @@ for.end:                                          ; preds = %for.body
 define void @load_i16_stride5() {
 ;CHECK-LABEL: load_i16_stride5
 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 2 for VF 2 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 3 for VF 8 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 5 for VF 16 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 10 for VF 32 For instruction:   %1 = load
+;CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 5 at %1,
+;CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 5 at %1,
+;CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 5 at %1,
+;CHECK: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 5 at %1,
+;CHECK: Cost of 10 for VF 32: INTERLEAVE-GROUP with factor 5 at %1,
 entry:
   br label %for.body
 
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i32.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i32.ll
index c88bbffca8213..2aa6686a5dcc3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i32.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i32.ll
@@ -10,10 +10,10 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @load_int_stride2() {
 ;CHECK-LABEL: load_int_stride2
 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 4 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 8 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 2 for VF 16 For instruction:  %1 = load
+;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 2 at %1,
+;CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 2 at %1,
+;CHECK: Cost of 1 for VF 8: INTERLEAVE-GROUP with factor 2 at %1,
+;CHECK: Cost of 2 for VF 16: INTERLEAVE-GROUP with factor 2 at %1,
 entry:
   br label %for.body
 
@@ -35,10 +35,10 @@ for.end:                                          ; preds = %for.body
 define void @load_int_stride3() {
 ;CHECK-LABEL: load_int_stride3
 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 4 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 2 for VF 8 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 3 for VF 16 For instruction:  %1 = load
+;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 3 at %1,
+;CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 3 at %1,
+;CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 3 at %1,
+;CHECK: Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 3 at %1,
 entry:
   br label %for.body
 
@@ -60,10 +60,10 @@ for.end:                                          ; preds = %for.body
 define void @load_int_stride4() {
 ;CHECK-LABEL: load_int_stride4
 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 4 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 2 for VF 8 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 5 for VF 16 For instruction:  %1 = load
+;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 4 at %1,
+;CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 4 at %1,
+;CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 4 at %1,
+;CHECK: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 4 at %1,
 entry:
   br label %for.body
 
@@ -85,10 +85,10 @@ for.end:                                          ; preds = %for.body
 define void @load_int_stride5() {
 ;CHECK-LABEL: load_int_stride5
 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 3 for VF 8 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 6 for VF 16 For instruction:  %1 = load
+;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 5 at %1,
+;CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 5 at %1,
+;CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 5 at %1,
+;CHECK: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 5 at %1,
 entry:
   br label %for.body
 
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i64.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i64.ll
index 21f6128f6fca5..fece9ddd655d9 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i64.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i64.ll
@@ -10,9 +10,9 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @load_i64_stride2() {
 ;CHECK-LABEL: load_i64_stride2
 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 4 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 2 for VF 8 For instruction:   %1 = load
+;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 2 at %1,
+;CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 2 at %1,
+;CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %1,
 entry:
   br label %for.body
 
@@ -34,9 +34,9 @@ for.end:                                          ; preds = %for.body
 define void @load_i64_stride3() {
 ;CHECK-LABEL: load_i64_stride3
 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 3 for VF 8 For instruction:   %1 = load
+;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 3 at %1,
+;CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 3 at %1,
+;CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 3 at %1,
 entry:
   br label %for.body
 
@@ -58,9 +58,9 @@ for.end:                                          ; preds = %for.body
 define void @load_i64_stride4() {
 ;CHECK-LABEL: load_i64_stride4
 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 5 for VF 8 For instruction:   %1 = load
+;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 4 at %1,
+;CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 4 at %1,
+;CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 4 at %1,
 entry:
   br label %for.body
 
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i8.ll
index 022a1374f1bbe..ae0a2f793feb3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i8.ll
@@ -10,12 +10,12 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @load_i8_stride2() {
 ;CHECK-LABEL: load_i8_stride2
 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 4 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 8 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 4 for VF 16 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 8 for VF 32 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 20 for VF 64 For instruction:   %1 = load
+;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 2 at %1,
+;CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 2 at %1,
+;CHECK: Cost of 1 for VF 8: INTERLEAVE-GROUP with factor 2 at %1,
+;CHECK: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at %1,
+;CHECK: Cost of 8 for VF 32: INTERLEAVE-GROUP with factor 2 at %1,
+;CHECK: Cost of 20 for VF 64: INTERLEAVE-GROUP with factor 2 at %1,
 entry:
   br label %for.body
 
@@ -37,12 +37,12 @@ for.end:                                          ; preds = %for.body
 define void @load_i8_stride3() {
 ;CHECK-LABEL: load_i8_stride3
 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 4 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 4 for VF 8 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 13 for VF 16 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 16 for VF 32 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 25 for VF 64 For instruction:   %1 = load
+;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 3 at %1,
+;CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 3 at %1,
+;CHECK: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 3 at %1,
+;CHECK: Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 3 at %1,
+;CHECK: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 3 at %1,
+;CHECK: Cost of 25 for VF 64: INTERLEAVE-GROUP with factor 3 at %1,
 entry:
   br label %for.body
 
@@ -64,12 +64,12 @@ for.end:                                          ; preds = %for.body
 define void @load_i8_stride4() {
 ;CHECK-LABEL: load_i8_stride4
 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 4 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 4 for VF 8 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 8 for VF 16 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 20 for VF 32 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 59 for VF 64 For instruction:   %1 = load
+;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 4 at %1,
+;CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 4 at %1,
+;CHECK: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 4 at %1,
+;CHECK: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 4 at %1,
+;CHECK: Cost of 20 for VF 32: INTERLEAVE-GROUP with factor 4 at %1,
+;CHECK: Cost of 59 for VF 64: INTERLEAVE-GROUP with factor 4 at %1,
 entry:
   br label %for.body
 
@@ -91,12 +91,12 @@ for.end:                                          ; preds = %for.body
 define void @load_i8_stride5() {
 ;CHECK-LABEL: load_i8_stride5
 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 4 for VF 4 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 8 for VF 8 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 20 for VF 16 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 39 for VF 32 For instruction:   %1 = load
-;CHECK: Found an estimated cost of 78 for VF 64 For instruction:   %1 = load
+;CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 5 at %1,
+;CHECK: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 5 at %1,
+;CHECK: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 5 at %1,
+;CHECK: Cost of 20 for VF 16: INTERLEAVE-GROUP with factor 5 at %1,
+;CHECK: Cost of 39 for VF 32: INTERLEAVE-GROUP with factor 5 at %1,
+;CHECK: Cost of 78 for VF 64: INTERLEAVE-GROUP with factor 5 at %1,
 entry:
   br label %for.body
 
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll
index 3c39a1d4e2463..83955f1a188bd 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll
@@ -12,7 +12,7 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst)
 ; CHECK:  Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
 ; CHECK:  Cost of 0 for VF 2: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
 ; CHECK:  Cost of 0 for VF 2: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<%0>
-; CHECK:  Cost of 0 for VF 2: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>
+; CHECK:  Cost of 0 for VF 2: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>, vp<%0>
 ; CHECK:  Cost of 0 for VF 2: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4>
 ; CHECK:  Cost of 0 for VF 2: vp<%5> = vector-pointer inbounds ir<%g.src>
 ; CHECK:  Cost of 1 for VF 2: WIDEN ir<%l> = load vp<%5>
@@ -25,12 +25,20 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst)
 ; CHECK:  Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>
 ; CHECK:  Cost of 0 for VF 2: EMIT branch-on-count vp<%index.next>, vp<%2>
 ; CHECK:  Cost of 0 for VF 2: vector loop backedge
+; CHECK:  Cost of 0 for VF 2: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%2>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK:  Cost of 0 for VF 2: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] (extra operand: vp<%bc.resume.val> from scalar.ph)
+; CHECK:  Cost of 0 for VF 2: IR %g.src = getelementptr inbounds i64, ptr %src, i64 %iv
+; CHECK:  Cost of 0 for VF 2: IR %l = load i64, ptr %g.src, align 8
+; CHECK:  Cost of 0 for VF 2: IR %iv.4 = add nuw nsw i64 %iv, 4
+; CHECK:  Cost of 0 for VF 2: IR %c = icmp ule i64 %l, 128
+; CHECK:  Cost of 0 for VF 2: EMIT vp<%cmp.n> = icmp eq ir<32>, vp<%2>
+; CHECK:  Cost of 0 for VF 2: EMIT branch-on-cond vp<%cmp.n>
 ; CHECK:  Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1
 ; CHECK:  Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
 ; CHECK:  Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
 ; CHECK:  Cost of 0 for VF 4: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
 ; CHECK:  Cost of 0 for VF 4: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<%0>
-; CHECK:  Cost of 0 for VF 4: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>
+; CHECK:  Cost of 0 for VF 4: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>, vp<%0>
 ; CHECK:  Cost of 0 for VF 4: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4>
 ; CHECK:  Cost of 0 for VF 4: vp<%5> = vector-pointer inbounds ir<%g.src>
 ; CHECK:  Cost of 1 for VF 4: WIDEN ir<%l> = load vp<%5>
@@ -43,9 +51,16 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst)
 ; CHECK:  Cost of 0 for VF 4: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>
 ; CHECK:  Cost of 0 for VF 4: EMIT branch-on-count vp<%index.next>, vp<%2>
 ; CHECK:  Cost of 0 for VF 4: vector loop backedge
-; CHECK:  Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1
-; CHECK:  Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
-; CHECK:  Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
+; CHECK:  Cost of 0 for VF 4: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%2>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK:  Cost of 0 for VF 4: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] (extra operand: vp<%bc.resume.val> from scalar.ph)
+; CHECK:  Cost of 0 for VF 4: IR %g.src = getelementptr inbounds i64, ptr %src, i64 %iv
+; CHECK:  Cost of 0 for VF 4: IR %l = load i64, ptr %g.src, align 8
+; CHECK:  Cost of 0 for VF 4: IR %iv.4 = add nuw nsw i64 %iv, 4
+; CHECK:  Cost of 0 for VF 4: IR %c = icmp ule i64 %l, 128
+; CHECK:  Cost of 0 for VF 4: EMIT vp<%cmp.n> = icmp eq ir<32>, vp<%2>
+; CHECK:  Cost of 0 for VF 4: EMIT branch-on-cond vp<%cmp.n>
+; CHECK:  Cost of 0 for VF 4: EMIT vp<%cmp.n> = icmp eq ir<32>, vp<%2>
+; CHECK:  Cost of 0 for VF 4: EMIT branch-on-cond vp<%cmp.n>
 ;
 entry:
   br label %loop.header