1; RUN: opt -disable-output -passes='print-access-info' %s 2>&1 | FileCheck %s 2 3target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 4 5; CHECK-LABEL: function 'forked_ptrs_simple': 6; CHECK-NEXT: loop: 7; CHECK-NEXT: Memory dependences are safe with run-time checks 8; CHECK-NEXT: Dependences: 9; CHECK-NEXT: Run-time memory checks: 10; CHECK-NEXT: Check 0: 11; CHECK-NEXT: Comparing group ([[G1:.+]]): 12; CHECK-NEXT: %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv 13; CHECK-NEXT: %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv 14; CHECK-NEXT: Against group ([[G2:.+]]): 15; CHECK-NEXT: %select = select i1 %cmp, float* %gep.1, float* %gep.2 16; CHECK-NEXT: Check 1: 17; CHECK-NEXT: Comparing group ([[G1]]): 18; CHECK-NEXT: %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv 19; CHECK-NEXT: %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv 20; CHECK-NEXT: Against group ([[G3:.+]]): 21; CHECK-NEXT: %select = select i1 %cmp, float* %gep.1, float* %gep.2 22; CHECK-NEXT: Grouped accesses: 23; CHECK-NEXT: Group [[G1]] 24; CHECK-NEXT: (Low: %Dest High: (400 + %Dest)) 25; CHECK-NEXT: Member: {%Dest,+,4}<nuw><%loop> 26; CHECK-NEXT: Member: {%Dest,+,4}<nuw><%loop> 27; CHECK-NEXT: Group [[G2]]: 28; CHECK-NEXT: (Low: %Base1 High: (400 + %Base1)) 29; CHECK-NEXT: Member: {%Base1,+,4}<nw><%loop> 30; CHECK-NEXT: Group [[G3]]: 31; CHECK-NEXT: (Low: %Base2 High: (400 + %Base2)) 32; CHECK-NEXT: Member: {%Base2,+,4}<nw><%loop> 33; CHECK-EMPTY: 34; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 35; CHECK-NEXT: SCEV assumptions: 36; CHECK-EMPTY: 37; CHECK-NEXT: Expressions re-written: 38 39define void @forked_ptrs_simple(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* %Dest) { 40entry: 41 br label %loop 42 43loop: 44 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 45 %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv 46 %l.Dest = load float, float* %gep.Dest 47 %cmp = fcmp une float %l.Dest, 0.0 48 %gep.1 = getelementptr inbounds float, float* %Base1, i64 %iv 49 %gep.2 = getelementptr inbounds float, float* %Base2, i64 %iv 50 %select = select i1 %cmp, float* %gep.1, float* %gep.2 51 %sink = load float, float* %select, align 4 52 store float %sink, float* %gep.Dest, align 4 53 %iv.next = add nuw nsw i64 %iv, 1 54 %exitcond.not = icmp eq i64 %iv.next, 100 55 br i1 %exitcond.not, label %exit, label %loop 56 57exit: 58 ret void 59} 60 61 62; CHECK-LABEL: function 'forked_ptrs_different_base_same_offset': 63; CHECK-NEXT: for.body: 64; CHECK-NEXT: Report: cannot identify array bounds 65; CHECK-NEXT: Dependences: 66; CHECK-NEXT: Run-time memory checks: 67; CHECK-NEXT: Grouped accesses: 68; CHECK-EMPTY: 69; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 70; CHECK-NEXT: SCEV assumptions: 71; CHECK-EMPTY: 72; CHECK-NEXT: Expressions re-written: 73 74;;;; Derived from the following C code 75;; void forked_ptrs_different_base_same_offset(float *A, float *B, float *C, int *D) { 76;; for (int i=0; i<100; i++) { 77;; if (D[i] != 0) { 78;; C[i] = A[i]; 79;; } else { 80;; C[i] = B[i]; 81;; } 82;; } 83;; } 84 85define dso_local void @forked_ptrs_different_base_same_offset(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* nocapture %Dest, i32* nocapture readonly %Preds) { 86entry: 87 br label %for.body 88 89for.cond.cleanup: 90 ret void 91 92for.body: 93 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 94 %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv 95 %0 = load i32, i32* %arrayidx, align 4 96 %cmp1.not = icmp eq i32 %0, 0 97 %spec.select = select i1 %cmp1.not, float* %Base2, float* %Base1 98 %.sink.in = getelementptr inbounds float, float* %spec.select, i64 %indvars.iv 99 %.sink = load float, float* %.sink.in, align 4 100 %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv 101 store float %.sink, float* %1, align 4 102 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 103 %exitcond.not = icmp eq i64 %indvars.iv.next, 100 104 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 105} 106 107; CHECK-LABEL: function 'forked_ptrs_same_base_different_offset': 108; CHECK-NEXT: for.body: 109; CHECK-NEXT: Report: cannot identify array bounds 110; CHECK-NEXT: Dependences: 111; CHECK-NEXT: Run-time memory checks: 112; CHECK-NEXT: Grouped accesses: 113; CHECK-EMPTY: 114; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 115; CHECK-NEXT: SCEV assumptions: 116; CHECK-EMPTY: 117; CHECK-NEXT: Expressions re-written: 118 119;;;; Derived from the following C code 120;; void forked_ptrs_same_base_different_offset(float *A, float *B, int *C) { 121;; int offset; 122;; for (int i = 0; i < 100; i++) { 123;; if (C[i] != 0) 124;; offset = i; 125;; else 126;; offset = i+1; 127;; B[i] = A[offset]; 128;; } 129;; } 130 131define dso_local void @forked_ptrs_same_base_different_offset(float* nocapture readonly %Base, float* nocapture %Dest, i32* nocapture readonly %Preds) { 132entry: 133 br label %for.body 134 135for.cond.cleanup: ; preds = %for.body 136 ret void 137 138for.body: ; preds = %entry, %for.body 139 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 140 %i.014 = phi i32 [ 0, %entry ], [ %add, %for.body ] 141 %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv 142 %0 = load i32, i32* %arrayidx, align 4 143 %cmp1.not = icmp eq i32 %0, 0 144 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 145 %add = add nuw nsw i32 %i.014, 1 146 %1 = trunc i64 %indvars.iv to i32 147 %offset.0 = select i1 %cmp1.not, i32 %add, i32 %1 148 %idxprom213 = zext i32 %offset.0 to i64 149 %arrayidx3 = getelementptr inbounds float, float* %Base, i64 %idxprom213 150 %2 = load float, float* %arrayidx3, align 4 151 %arrayidx5 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv 152 store float %2, float* %arrayidx5, align 4 153 %exitcond.not = icmp eq i64 %indvars.iv.next, 100 154 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 155} 156 157;;;; Cases that can be handled by a forked pointer but are not currently allowed. 158 159; CHECK-LABEL: function 'forked_ptrs_uniform_and_strided_forks': 160; CHECK-NEXT: for.body: 161; CHECK-NEXT: Report: cannot identify array bounds 162; CHECK-NEXT: Dependences: 163; CHECK-NEXT: Run-time memory checks: 164; CHECK-NEXT: Grouped accesses: 165; CHECK-EMPTY: 166; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 167; CHECK-NEXT: SCEV assumptions: 168; CHECK-EMPTY: 169; CHECK-NEXT: Expressions re-written: 170 171;;;; Derived from forked_ptrs_same_base_different_offset with a manually 172;;;; added uniform offset and a mul to provide a stride 173 174define dso_local void @forked_ptrs_uniform_and_strided_forks(float* nocapture readonly %Base, float* nocapture %Dest, i32* nocapture readonly %Preds) { 175entry: 176 br label %for.body 177 178for.cond.cleanup: ; preds = %for.body 179 ret void 180 181for.body: ; preds = %entry, %for.body 182 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 183 %i.014 = phi i32 [ 0, %entry ], [ %add, %for.body ] 184 %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv 185 %0 = load i32, i32* %arrayidx, align 4 186 %cmp1.not = icmp eq i32 %0, 0 187 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 188 %add = add nuw nsw i32 %i.014, 1 189 %1 = trunc i64 %indvars.iv to i32 190 %mul = mul i32 %1, 3 191 %offset.0 = select i1 %cmp1.not, i32 4, i32 %mul 192 %idxprom213 = sext i32 %offset.0 to i64 193 %arrayidx3 = getelementptr inbounds float, float* %Base, i64 %idxprom213 194 %2 = load float, float* %arrayidx3, align 4 195 %arrayidx5 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv 196 store float %2, float* %arrayidx5, align 4 197 %exitcond.not = icmp eq i64 %indvars.iv.next, 100 198 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 199} 200 201; CHECK-LABEL: function 'forked_ptrs_gather_and_contiguous_forks': 202; CHECK-NEXT: for.body: 203; CHECK-NEXT: Report: cannot identify array bounds 204; CHECK-NEXT: Dependences: 205; CHECK-NEXT: Run-time memory checks: 206; CHECK-NEXT: Grouped accesses: 207; CHECK-EMPTY: 208; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 209; CHECK-NEXT: SCEV assumptions: 210; CHECK-EMPTY: 211; CHECK-NEXT: Expressions re-written: 212 213;;;; Derived from forked_ptrs_same_base_different_offset with a gather 214;;;; added using Preds as an index array in addition to the per-iteration 215;;;; condition. 216 217define dso_local void @forked_ptrs_gather_and_contiguous_forks(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* nocapture %Dest, i32* nocapture readonly %Preds) { 218entry: 219 br label %for.body 220 221for.cond.cleanup: ; preds = %for.body 222 ret void 223 224for.body: ; preds = %entry, %for.body 225 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 226 %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv 227 %0 = load i32, i32* %arrayidx, align 4 228 %cmp1.not = icmp eq i32 %0, 0 229 %arrayidx9 = getelementptr inbounds float, float* %Base2, i64 %indvars.iv 230 %idxprom4 = sext i32 %0 to i64 231 %arrayidx5 = getelementptr inbounds float, float* %Base1, i64 %idxprom4 232 %.sink.in = select i1 %cmp1.not, float* %arrayidx9, float* %arrayidx5 233 %.sink = load float, float* %.sink.in, align 4 234 %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv 235 store float %.sink, float* %1, align 4 236 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 237 %exitcond.not = icmp eq i64 %indvars.iv.next, 100 238 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 239} 240