1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -riscv-v-vector-bits-min=0 -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=SCALABLE 3; RUN: opt < %s -loop-vectorize -scalable-vectorization=off -riscv-v-vector-bits-min=-1 -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=FIXEDLEN 4; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -riscv-v-vector-bits-min=0 -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=TF-SCALABLE 5; RUN: opt < %s -loop-vectorize -scalable-vectorization=off -riscv-v-vector-bits-min=-1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=TF-FIXEDLEN 6 7 8 9target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" 10target triple = "riscv64" 11 12define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) { 13; SCALABLE-LABEL: @uniform_load( 14; SCALABLE-NEXT: entry: 15; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 16; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 17; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 18; SCALABLE: vector.ph: 19; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 20; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 21; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 22; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 23; SCALABLE: vector.body: 24; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 25; SCALABLE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 26; SCALABLE-NEXT: [[TMP3:%.*]] = load i64, ptr [[B:%.*]], align 8 27; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP3]], i32 0 28; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 29; SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] 30; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 31; SCALABLE-NEXT: store <vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 32; SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 33; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 34; SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 35; SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 36; SCALABLE: middle.block: 37; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 38; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 39; SCALABLE: scalar.ph: 40; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 41; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 42; SCALABLE: for.body: 43; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 44; SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 45; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 46; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 47; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 48; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 49; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 50; SCALABLE: for.end: 51; SCALABLE-NEXT: ret void 52; 53; FIXEDLEN-LABEL: @uniform_load( 54; FIXEDLEN-NEXT: entry: 55; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 56; FIXEDLEN: vector.ph: 57; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 58; FIXEDLEN: vector.body: 59; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 60; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 61; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 62; FIXEDLEN-NEXT: [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 8 63; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 64; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 65; FIXEDLEN-NEXT: [[TMP3:%.*]] = load i64, ptr [[B]], align 8 66; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0 67; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer 68; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 69; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 70; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 71; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 72; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 2 73; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP7]], align 8 74; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 75; FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 76; FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 77; FIXEDLEN: middle.block: 78; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 79; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 80; FIXEDLEN: scalar.ph: 81; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 82; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 83; FIXEDLEN: for.body: 84; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 85; FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 86; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 87; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 88; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 89; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 90; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 91; FIXEDLEN: for.end: 92; FIXEDLEN-NEXT: ret void 93; 94; TF-SCALABLE-LABEL: @uniform_load( 95; TF-SCALABLE-NEXT: entry: 96; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 97; TF-SCALABLE-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]] 98; TF-SCALABLE-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 99; TF-SCALABLE: vector.ph: 100; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 101; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 102; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 103; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] 104; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]] 105; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 106; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 107; TF-SCALABLE: vector.body: 108; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 109; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 110; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP5]], i64 1024) 111; TF-SCALABLE-NEXT: [[TMP6:%.*]] = load i64, ptr [[B:%.*]], align 8 112; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP6]], i32 0 113; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 114; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] 115; TF-SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 116; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]]) 117; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 118; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] 119; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 120; TF-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 121; TF-SCALABLE: middle.block: 122; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 123; TF-SCALABLE: scalar.ph: 124; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 125; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 126; TF-SCALABLE: for.body: 127; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 128; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 129; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 130; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 131; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 132; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 133; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 134; TF-SCALABLE: for.end: 135; TF-SCALABLE-NEXT: ret void 136; 137; TF-FIXEDLEN-LABEL: @uniform_load( 138; TF-FIXEDLEN-NEXT: entry: 139; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 140; TF-FIXEDLEN: vector.ph: 141; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 142; TF-FIXEDLEN: vector.body: 143; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 144; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 145; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 8 146; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i32 0 147; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 148; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 149; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 150; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 151; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 152; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 153; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 154; TF-FIXEDLEN: middle.block: 155; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 156; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 157; TF-FIXEDLEN: scalar.ph: 158; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 159; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 160; TF-FIXEDLEN: for.body: 161; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 162; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 163; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 164; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 165; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 166; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 167; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 168; TF-FIXEDLEN: for.end: 169; TF-FIXEDLEN-NEXT: ret void 170; 171entry: 172 br label %for.body 173 174for.body: 175 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 176 %v = load i64, ptr %b, align 8 177 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 178 store i64 %v, ptr %arrayidx 179 %iv.next = add nuw nsw i64 %iv, 1 180 %exitcond.not = icmp eq i64 %iv.next, 1024 181 br i1 %exitcond.not, label %for.end, label %for.body 182 183for.end: 184 ret void 185} 186 187define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) { 188; SCALABLE-LABEL: @uniform_load_outside_use( 189; SCALABLE-NEXT: entry: 190; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 191; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 192; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 193; SCALABLE: vector.ph: 194; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 195; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 196; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 197; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 198; SCALABLE: vector.body: 199; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 200; SCALABLE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 201; SCALABLE-NEXT: [[TMP3:%.*]] = load i64, ptr [[B:%.*]], align 8 202; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP3]], i32 0 203; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 204; SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] 205; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 206; SCALABLE-NEXT: store <vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 207; SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 208; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 209; SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 210; SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 211; SCALABLE: middle.block: 212; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 213; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 214; SCALABLE: scalar.ph: 215; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 216; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 217; SCALABLE: for.body: 218; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 219; SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 220; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 221; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 222; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 223; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 224; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 225; SCALABLE: for.end: 226; SCALABLE-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] 227; SCALABLE-NEXT: ret i64 [[V_LCSSA]] 228; 229; FIXEDLEN-LABEL: @uniform_load_outside_use( 230; FIXEDLEN-NEXT: entry: 231; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 232; FIXEDLEN: vector.ph: 233; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 234; FIXEDLEN: vector.body: 235; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 236; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 237; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 238; FIXEDLEN-NEXT: [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 8 239; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 240; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 241; FIXEDLEN-NEXT: [[TMP3:%.*]] = load i64, ptr [[B]], align 8 242; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0 243; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer 244; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 245; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 246; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 247; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 248; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 2 249; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP7]], align 8 250; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 251; FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 252; FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 253; FIXEDLEN: middle.block: 254; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 255; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 256; FIXEDLEN: scalar.ph: 257; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 258; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 259; FIXEDLEN: for.body: 260; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 261; FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 262; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 263; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 264; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 265; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 266; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 267; FIXEDLEN: for.end: 268; FIXEDLEN-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] 269; FIXEDLEN-NEXT: ret i64 [[V_LCSSA]] 270; 271; TF-SCALABLE-LABEL: @uniform_load_outside_use( 272; TF-SCALABLE-NEXT: entry: 273; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 274; TF-SCALABLE: for.body: 275; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 276; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 8 277; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 278; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 279; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 280; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 281; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 282; TF-SCALABLE: for.end: 283; TF-SCALABLE-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ] 284; TF-SCALABLE-NEXT: ret i64 [[V_LCSSA]] 285; 286; TF-FIXEDLEN-LABEL: @uniform_load_outside_use( 287; TF-FIXEDLEN-NEXT: entry: 288; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 289; TF-FIXEDLEN: vector.ph: 290; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 291; TF-FIXEDLEN: vector.body: 292; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 293; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 294; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 8 295; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i32 0 296; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 297; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 298; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 299; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 300; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 301; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 302; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 303; TF-FIXEDLEN: middle.block: 304; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 305; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 306; TF-FIXEDLEN: scalar.ph: 307; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 308; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 309; TF-FIXEDLEN: for.body: 310; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 311; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 312; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 313; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 314; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 315; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 316; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 317; TF-FIXEDLEN: for.end: 318; TF-FIXEDLEN-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] 319; TF-FIXEDLEN-NEXT: ret i64 [[V_LCSSA]] 320; 321entry: 322 br label %for.body 323 324for.body: 325 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 326 %v = load i64, ptr %b, align 8 327 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 328 store i64 %v, ptr %arrayidx 329 %iv.next = add nuw nsw i64 %iv, 1 330 %exitcond.not = icmp eq i64 %iv.next, 1024 331 br i1 %exitcond.not, label %for.end, label %for.body 332 333for.end: 334 ret i64 %v 335} 336 337 338define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) { 339; SCALABLE-LABEL: @conditional_uniform_load( 340; SCALABLE-NEXT: entry: 341; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 342; SCALABLE: for.body: 343; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] 344; SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 345; SCALABLE-NEXT: br i1 [[CMP]], label [[DO_LOAD:%.*]], label [[LATCH]] 346; SCALABLE: do_load: 347; SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 8 348; SCALABLE-NEXT: br label [[LATCH]] 349; SCALABLE: latch: 350; SCALABLE-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[FOR_BODY]] ], [ [[V]], [[DO_LOAD]] ] 351; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 352; SCALABLE-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 353; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 354; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 355; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 356; SCALABLE: for.end: 357; SCALABLE-NEXT: ret void 358; 359; FIXEDLEN-LABEL: @conditional_uniform_load( 360; FIXEDLEN-NEXT: entry: 361; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 362; FIXEDLEN: vector.ph: 363; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i32 0 364; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer 365; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x ptr> poison, ptr [[B]], i32 0 366; FIXEDLEN-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT2]], <2 x ptr> poison, <2 x i32> zeroinitializer 367; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 368; FIXEDLEN: vector.body: 369; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 370; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 371; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 372; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 373; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 374; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], <i64 10, i64 10> 375; FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp ugt <2 x i64> [[STEP_ADD]], <i64 10, i64 10> 376; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[BROADCAST_SPLAT]], i32 8, <2 x i1> [[TMP2]], <2 x i64> undef) 377; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[BROADCAST_SPLAT3]], i32 8, <2 x i1> [[TMP3]], <2 x i64> undef) 378; FIXEDLEN-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true> 379; FIXEDLEN-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true> 380; FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[WIDE_MASKED_GATHER]], <2 x i64> zeroinitializer 381; FIXEDLEN-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[TMP3]], <2 x i64> [[WIDE_MASKED_GATHER4]], <2 x i64> zeroinitializer 382; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 383; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 384; FIXEDLEN-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 385; FIXEDLEN-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP8]], align 8 386; FIXEDLEN-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 2 387; FIXEDLEN-NEXT: store <2 x i64> [[PREDPHI5]], ptr [[TMP9]], align 8 388; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 389; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2> 390; FIXEDLEN-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 391; FIXEDLEN-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 392; FIXEDLEN: middle.block: 393; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 394; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 395; FIXEDLEN: scalar.ph: 396; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 397; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 398; FIXEDLEN: for.body: 399; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] 400; FIXEDLEN-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 401; FIXEDLEN-NEXT: br i1 [[CMP]], label [[DO_LOAD:%.*]], label [[LATCH]] 402; FIXEDLEN: do_load: 403; FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 404; FIXEDLEN-NEXT: br label [[LATCH]] 405; FIXEDLEN: latch: 406; FIXEDLEN-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[FOR_BODY]] ], [ [[V]], [[DO_LOAD]] ] 407; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 408; FIXEDLEN-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 409; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 410; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 411; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 412; FIXEDLEN: for.end: 413; FIXEDLEN-NEXT: ret void 414; 415; TF-SCALABLE-LABEL: @conditional_uniform_load( 416; TF-SCALABLE-NEXT: entry: 417; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 418; TF-SCALABLE: for.body: 419; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] 420; TF-SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 421; TF-SCALABLE-NEXT: br i1 [[CMP]], label [[DO_LOAD:%.*]], label [[LATCH]] 422; TF-SCALABLE: do_load: 423; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 8 424; TF-SCALABLE-NEXT: br label [[LATCH]] 425; TF-SCALABLE: latch: 426; TF-SCALABLE-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[FOR_BODY]] ], [ [[V]], [[DO_LOAD]] ] 427; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 428; TF-SCALABLE-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 429; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 430; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 431; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 432; TF-SCALABLE: for.end: 433; TF-SCALABLE-NEXT: ret void 434; 435; TF-FIXEDLEN-LABEL: @conditional_uniform_load( 436; TF-FIXEDLEN-NEXT: entry: 437; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 438; TF-FIXEDLEN: vector.ph: 439; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i32 0 440; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer 441; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 442; TF-FIXEDLEN: vector.body: 443; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 444; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 445; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 446; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], <i64 10, i64 10> 447; TF-FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[BROADCAST_SPLAT]], i32 8, <2 x i1> [[TMP1]], <2 x i64> undef) 448; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], <i1 true, i1 true> 449; TF-FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[WIDE_MASKED_GATHER]], <2 x i64> zeroinitializer 450; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 451; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 452; TF-FIXEDLEN-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 453; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 454; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 455; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 456; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 457; TF-FIXEDLEN: middle.block: 458; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 459; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 460; TF-FIXEDLEN: scalar.ph: 461; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 462; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 463; TF-FIXEDLEN: for.body: 464; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] 465; TF-FIXEDLEN-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 466; TF-FIXEDLEN-NEXT: br i1 [[CMP]], label [[DO_LOAD:%.*]], label [[LATCH]] 467; TF-FIXEDLEN: do_load: 468; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 469; TF-FIXEDLEN-NEXT: br label [[LATCH]] 470; TF-FIXEDLEN: latch: 471; TF-FIXEDLEN-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[FOR_BODY]] ], [ [[V]], [[DO_LOAD]] ] 472; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 473; TF-FIXEDLEN-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 474; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 475; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 476; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 477; TF-FIXEDLEN: for.end: 478; TF-FIXEDLEN-NEXT: ret void 479; 480entry: 481 br label %for.body 482 483for.body: 484 %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] 485 %cmp = icmp ugt i64 %iv, 10 486 br i1 %cmp, label %do_load, label %latch 487do_load: 488 %v = load i64, ptr %b, align 8 489 br label %latch 490 491latch: 492 %phi = phi i64 [0, %for.body], [%v, %do_load] 493 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 494 store i64 %phi, ptr %arrayidx 495 %iv.next = add nuw nsw i64 %iv, 1 496 %exitcond.not = icmp eq i64 %iv.next, 1024 497 br i1 %exitcond.not, label %for.end, label %for.body 498 499for.end: 500 ret void 501} 502 503define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) { 504; SCALABLE-LABEL: @uniform_load_unaligned( 505; SCALABLE-NEXT: entry: 506; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 507; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 508; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 509; SCALABLE: vector.ph: 510; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 511; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 512; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 513; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] 514; SCALABLE: vector.body: 515; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 516; SCALABLE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 517; SCALABLE-NEXT: [[TMP3:%.*]] = load i64, ptr [[B:%.*]], align 1 518; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP3]], i32 0 519; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 520; SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] 521; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 522; SCALABLE-NEXT: store <vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 523; SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 524; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 525; SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 526; SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 527; SCALABLE: middle.block: 528; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 529; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 530; SCALABLE: scalar.ph: 531; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 532; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 533; SCALABLE: for.body: 534; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 535; SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 1 536; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 537; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 538; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 539; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 540; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 541; SCALABLE: for.end: 542; SCALABLE-NEXT: ret void 543; 544; FIXEDLEN-LABEL: @uniform_load_unaligned( 545; FIXEDLEN-NEXT: entry: 546; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 547; FIXEDLEN: vector.ph: 548; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 549; FIXEDLEN: vector.body: 550; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 551; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 552; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 553; FIXEDLEN-NEXT: [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 1 554; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 555; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 556; FIXEDLEN-NEXT: [[TMP3:%.*]] = load i64, ptr [[B]], align 1 557; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0 558; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer 559; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 560; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 561; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 562; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 563; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 2 564; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP7]], align 8 565; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 566; FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 567; FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 568; FIXEDLEN: middle.block: 569; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 570; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 571; FIXEDLEN: scalar.ph: 572; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 573; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 574; FIXEDLEN: for.body: 575; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 576; FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 1 577; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 578; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 579; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 580; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 581; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 582; FIXEDLEN: for.end: 583; FIXEDLEN-NEXT: ret void 584; 585; TF-SCALABLE-LABEL: @uniform_load_unaligned( 586; TF-SCALABLE-NEXT: entry: 587; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 588; TF-SCALABLE: for.body: 589; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 590; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 1 591; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 592; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 593; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 594; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 595; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 596; TF-SCALABLE: for.end: 597; TF-SCALABLE-NEXT: ret void 598; 599; TF-FIXEDLEN-LABEL: @uniform_load_unaligned( 600; TF-FIXEDLEN-NEXT: entry: 601; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 602; TF-FIXEDLEN: vector.ph: 603; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 604; TF-FIXEDLEN: vector.body: 605; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 606; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 607; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 1 608; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i32 0 609; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 610; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 611; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 612; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 613; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 614; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 615; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 616; TF-FIXEDLEN: middle.block: 617; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 618; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 619; TF-FIXEDLEN: scalar.ph: 620; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 621; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 622; TF-FIXEDLEN: for.body: 623; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 624; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 1 625; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 626; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 627; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 628; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 629; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 630; TF-FIXEDLEN: for.end: 631; TF-FIXEDLEN-NEXT: ret void 632; 633entry: 634 br label %for.body 635 636for.body: 637 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 638 %v = load i64, ptr %b, align 1 639 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 640 store i64 %v, ptr %arrayidx 641 %iv.next = add nuw nsw i64 %iv, 1 642 %exitcond.not = icmp eq i64 %iv.next, 1024 643 br i1 %exitcond.not, label %for.end, label %for.body 644 645for.end: 646 ret void 647} 648 649define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 650; SCALABLE-LABEL: @uniform_store( 651; SCALABLE-NEXT: entry: 652; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 653; SCALABLE: for.body: 654; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 655; SCALABLE-NEXT: store i64 [[V:%.*]], ptr [[B:%.*]], align 8 656; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 657; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 658; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 659; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 660; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 661; SCALABLE: for.end: 662; SCALABLE-NEXT: ret void 663; 664; FIXEDLEN-LABEL: @uniform_store( 665; FIXEDLEN-NEXT: entry: 666; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 667; FIXEDLEN: vector.ph: 668; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0 669; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 670; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0 671; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer 672; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 673; FIXEDLEN: vector.body: 674; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 675; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 676; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 677; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 678; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 679; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 680; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 681; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 682; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 683; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 684; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 685; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 686; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 687; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 688; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 689; FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 690; FIXEDLEN: middle.block: 691; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 692; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 693; FIXEDLEN: scalar.ph: 694; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 695; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 696; FIXEDLEN: for.body: 697; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 698; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 699; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 700; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 701; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 702; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 703; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 704; FIXEDLEN: for.end: 705; FIXEDLEN-NEXT: ret void 706; 707; TF-SCALABLE-LABEL: @uniform_store( 708; TF-SCALABLE-NEXT: entry: 709; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 710; TF-SCALABLE: for.body: 711; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 712; TF-SCALABLE-NEXT: store i64 [[V:%.*]], ptr [[B:%.*]], align 8 713; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 714; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 715; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 716; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 717; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 718; TF-SCALABLE: for.end: 719; TF-SCALABLE-NEXT: ret void 720; 721; TF-FIXEDLEN-LABEL: @uniform_store( 722; TF-FIXEDLEN-NEXT: entry: 723; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 724; TF-FIXEDLEN: vector.ph: 725; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0 726; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 727; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 728; TF-FIXEDLEN: vector.body: 729; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 730; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 731; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 732; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 733; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 734; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 735; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 736; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 737; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 738; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 739; TF-FIXEDLEN: middle.block: 740; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 741; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 742; TF-FIXEDLEN: scalar.ph: 743; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 744; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 745; TF-FIXEDLEN: for.body: 746; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 747; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 748; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 749; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 750; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 751; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 752; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 753; TF-FIXEDLEN: for.end: 754; TF-FIXEDLEN-NEXT: ret void 755; 756entry: 757 br label %for.body 758 759for.body: 760 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 761 store i64 %v, ptr %b, align 8 762 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 763 store i64 %v, ptr %arrayidx 764 %iv.next = add nuw nsw i64 %iv, 1 765 %exitcond.not = icmp eq i64 %iv.next, 1024 766 br i1 %exitcond.not, label %for.end, label %for.body 767 768for.end: 769 ret void 770} 771 772define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 773; SCALABLE-LABEL: @uniform_store_of_loop_varying( 774; SCALABLE-NEXT: entry: 775; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 776; SCALABLE: for.body: 777; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 778; SCALABLE-NEXT: store i64 [[IV]], ptr [[B:%.*]], align 8 779; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 780; SCALABLE-NEXT: store i64 [[V:%.*]], ptr [[ARRAYIDX]], align 8 781; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 782; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 783; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 784; SCALABLE: for.end: 785; SCALABLE-NEXT: ret void 786; 787; FIXEDLEN-LABEL: @uniform_store_of_loop_varying( 788; FIXEDLEN-NEXT: entry: 789; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 790; FIXEDLEN: vector.ph: 791; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0 792; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 793; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0 794; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer 795; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 796; FIXEDLEN: vector.body: 797; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 798; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 799; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 800; FIXEDLEN-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 801; FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 802; FIXEDLEN-NEXT: store i64 [[TMP0]], ptr [[B:%.*]], align 8 803; FIXEDLEN-NEXT: store i64 [[TMP1]], ptr [[B]], align 8 804; FIXEDLEN-NEXT: store i64 [[TMP2]], ptr [[B]], align 8 805; FIXEDLEN-NEXT: store i64 [[TMP3]], ptr [[B]], align 8 806; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 807; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] 808; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 809; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 810; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 2 811; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP7]], align 8 812; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 813; FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 814; FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 815; FIXEDLEN: middle.block: 816; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 817; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 818; FIXEDLEN: scalar.ph: 819; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 820; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 821; FIXEDLEN: for.body: 822; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 823; FIXEDLEN-NEXT: store i64 [[IV]], ptr [[B]], align 8 824; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 825; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 826; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 827; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 828; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 829; FIXEDLEN: for.end: 830; FIXEDLEN-NEXT: ret void 831; 832; TF-SCALABLE-LABEL: @uniform_store_of_loop_varying( 833; TF-SCALABLE-NEXT: entry: 834; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 835; TF-SCALABLE: for.body: 836; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 837; TF-SCALABLE-NEXT: store i64 [[IV]], ptr [[B:%.*]], align 8 838; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 839; TF-SCALABLE-NEXT: store i64 [[V:%.*]], ptr [[ARRAYIDX]], align 8 840; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 841; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 842; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 843; TF-SCALABLE: for.end: 844; TF-SCALABLE-NEXT: ret void 845; 846; TF-FIXEDLEN-LABEL: @uniform_store_of_loop_varying( 847; TF-FIXEDLEN-NEXT: entry: 848; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 849; TF-FIXEDLEN: vector.ph: 850; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0 851; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 852; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 853; TF-FIXEDLEN: vector.body: 854; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 855; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 856; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 857; TF-FIXEDLEN-NEXT: store i64 [[TMP0]], ptr [[B:%.*]], align 8 858; TF-FIXEDLEN-NEXT: store i64 [[TMP1]], ptr [[B]], align 8 859; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 860; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 861; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 862; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 863; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 864; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 865; TF-FIXEDLEN: middle.block: 866; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 867; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 868; TF-FIXEDLEN: scalar.ph: 869; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 870; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 871; TF-FIXEDLEN: for.body: 872; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 873; TF-FIXEDLEN-NEXT: store i64 [[IV]], ptr [[B]], align 8 874; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 875; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 876; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 877; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 878; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 879; TF-FIXEDLEN: for.end: 880; TF-FIXEDLEN-NEXT: ret void 881; 882entry: 883 br label %for.body 884 885for.body: 886 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 887 store i64 %iv, ptr %b, align 8 888 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 889 store i64 %v, ptr %arrayidx 890 %iv.next = add nuw nsw i64 %iv, 1 891 %exitcond.not = icmp eq i64 %iv.next, 1024 892 br i1 %exitcond.not, label %for.end, label %for.body 893 894for.end: 895 ret void 896} 897 898define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 899; SCALABLE-LABEL: @conditional_uniform_store( 900; SCALABLE-NEXT: entry: 901; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 902; SCALABLE: for.body: 903; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] 904; SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 905; SCALABLE-NEXT: br i1 [[CMP]], label [[DO_STORE:%.*]], label [[LATCH]] 906; SCALABLE: do_store: 907; SCALABLE-NEXT: store i64 [[V:%.*]], ptr [[B:%.*]], align 8 908; SCALABLE-NEXT: br label [[LATCH]] 909; SCALABLE: latch: 910; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 911; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 912; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 913; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 914; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 915; SCALABLE: for.end: 916; SCALABLE-NEXT: ret void 917; 918; FIXEDLEN-LABEL: @conditional_uniform_store( 919; FIXEDLEN-NEXT: entry: 920; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 921; FIXEDLEN: vector.ph: 922; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0 923; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 924; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i32 0 925; FIXEDLEN-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT2]], <2 x ptr> poison, <2 x i32> zeroinitializer 926; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0 927; FIXEDLEN-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT4]], <2 x i64> poison, <2 x i32> zeroinitializer 928; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x ptr> poison, ptr [[B]], i32 0 929; FIXEDLEN-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT6]], <2 x ptr> poison, <2 x i32> zeroinitializer 930; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 931; FIXEDLEN: vector.body: 932; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 933; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 934; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 935; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 936; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 937; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], <i64 10, i64 10> 938; FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp ugt <2 x i64> [[STEP_ADD]], <i64 10, i64 10> 939; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT]], <2 x ptr> [[BROADCAST_SPLAT3]], i32 8, <2 x i1> [[TMP2]]) 940; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT5]], <2 x ptr> [[BROADCAST_SPLAT7]], i32 8, <2 x i1> [[TMP3]]) 941; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 942; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 943; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 944; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 945; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 2 946; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT5]], ptr [[TMP7]], align 8 947; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 948; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2> 949; FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 950; FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 951; FIXEDLEN: middle.block: 952; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 953; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 954; FIXEDLEN: scalar.ph: 955; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 956; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 957; FIXEDLEN: for.body: 958; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] 959; FIXEDLEN-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 960; FIXEDLEN-NEXT: br i1 [[CMP]], label [[DO_STORE:%.*]], label [[LATCH]] 961; FIXEDLEN: do_store: 962; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 963; FIXEDLEN-NEXT: br label [[LATCH]] 964; FIXEDLEN: latch: 965; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 966; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 967; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 968; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 969; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 970; FIXEDLEN: for.end: 971; FIXEDLEN-NEXT: ret void 972; 973; TF-SCALABLE-LABEL: @conditional_uniform_store( 974; TF-SCALABLE-NEXT: entry: 975; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 976; TF-SCALABLE: for.body: 977; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] 978; TF-SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 979; TF-SCALABLE-NEXT: br i1 [[CMP]], label [[DO_STORE:%.*]], label [[LATCH]] 980; TF-SCALABLE: do_store: 981; TF-SCALABLE-NEXT: store i64 [[V:%.*]], ptr [[B:%.*]], align 8 982; TF-SCALABLE-NEXT: br label [[LATCH]] 983; TF-SCALABLE: latch: 984; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 985; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 986; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 987; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 988; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 989; TF-SCALABLE: for.end: 990; TF-SCALABLE-NEXT: ret void 991; 992; TF-FIXEDLEN-LABEL: @conditional_uniform_store( 993; TF-FIXEDLEN-NEXT: entry: 994; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 995; TF-FIXEDLEN: vector.ph: 996; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0 997; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 998; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i32 0 999; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT1]], <2 x ptr> poison, <2 x i32> zeroinitializer 1000; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 1001; TF-FIXEDLEN: vector.body: 1002; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1003; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1004; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1005; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], <i64 10, i64 10> 1006; TF-FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT]], <2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <2 x i1> [[TMP1]]) 1007; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 1008; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 1009; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 1010; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1011; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 1012; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 1013; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 1014; TF-FIXEDLEN: middle.block: 1015; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 1016; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1017; TF-FIXEDLEN: scalar.ph: 1018; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1019; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 1020; TF-FIXEDLEN: for.body: 1021; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] 1022; TF-FIXEDLEN-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 1023; TF-FIXEDLEN-NEXT: br i1 [[CMP]], label [[DO_STORE:%.*]], label [[LATCH]] 1024; TF-FIXEDLEN: do_store: 1025; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 1026; TF-FIXEDLEN-NEXT: br label [[LATCH]] 1027; TF-FIXEDLEN: latch: 1028; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 1029; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 1030; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1031; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 1032; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 1033; TF-FIXEDLEN: for.end: 1034; TF-FIXEDLEN-NEXT: ret void 1035; 1036entry: 1037 br label %for.body 1038 1039for.body: 1040 %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] 1041 %cmp = icmp ugt i64 %iv, 10 1042 br i1 %cmp, label %do_store, label %latch 1043do_store: 1044 store i64 %v, ptr %b, align 8 1045 br label %latch 1046latch: 1047 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 1048 store i64 %v, ptr %arrayidx 1049 %iv.next = add nuw nsw i64 %iv, 1 1050 %exitcond.not = icmp eq i64 %iv.next, 1024 1051 br i1 %exitcond.not, label %for.end, label %for.body 1052 1053for.end: 1054 ret void 1055} 1056 1057 1058define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 1059; SCALABLE-LABEL: @uniform_store_unaligned( 1060; SCALABLE-NEXT: entry: 1061; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 1062; SCALABLE: for.body: 1063; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1064; SCALABLE-NEXT: store i64 [[V:%.*]], ptr [[B:%.*]], align 1 1065; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 1066; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 1067; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1068; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 1069; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 1070; SCALABLE: for.end: 1071; SCALABLE-NEXT: ret void 1072; 1073; FIXEDLEN-LABEL: @uniform_store_unaligned( 1074; FIXEDLEN-NEXT: entry: 1075; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1076; FIXEDLEN: vector.ph: 1077; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0 1078; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 1079; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0 1080; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer 1081; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 1082; FIXEDLEN: vector.body: 1083; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1084; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1085; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 1086; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 1087; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 1 1088; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 1 1089; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 1 1090; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 1091; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 1092; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 1093; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 1094; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 1095; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 1096; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1097; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 1098; FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 1099; FIXEDLEN: middle.block: 1100; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 1101; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1102; FIXEDLEN: scalar.ph: 1103; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1104; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 1105; FIXEDLEN: for.body: 1106; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1107; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 1 1108; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 1109; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 1110; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1111; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 1112; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 1113; FIXEDLEN: for.end: 1114; FIXEDLEN-NEXT: ret void 1115; 1116; TF-SCALABLE-LABEL: @uniform_store_unaligned( 1117; TF-SCALABLE-NEXT: entry: 1118; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] 1119; TF-SCALABLE: for.body: 1120; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1121; TF-SCALABLE-NEXT: store i64 [[V:%.*]], ptr [[B:%.*]], align 1 1122; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 1123; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 1124; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1125; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 1126; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 1127; TF-SCALABLE: for.end: 1128; TF-SCALABLE-NEXT: ret void 1129; 1130; TF-FIXEDLEN-LABEL: @uniform_store_unaligned( 1131; TF-FIXEDLEN-NEXT: entry: 1132; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1133; TF-FIXEDLEN: vector.ph: 1134; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0 1135; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 1136; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] 1137; TF-FIXEDLEN: vector.body: 1138; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1139; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1140; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 1141; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 1 1142; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 1143; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 1144; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 1145; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1146; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 1147; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 1148; TF-FIXEDLEN: middle.block: 1149; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 1150; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1151; TF-FIXEDLEN: scalar.ph: 1152; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1153; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] 1154; TF-FIXEDLEN: for.body: 1155; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1156; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 1 1157; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 1158; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 1159; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1160; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 1161; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 1162; TF-FIXEDLEN: for.end: 1163; TF-FIXEDLEN-NEXT: ret void 1164; 1165entry: 1166 br label %for.body 1167 1168for.body: 1169 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1170 store i64 %v, ptr %b, align 1 1171 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 1172 store i64 %v, ptr %arrayidx 1173 %iv.next = add nuw nsw i64 %iv, 1 1174 %exitcond.not = icmp eq i64 %iv.next, 1024 1175 br i1 %exitcond.not, label %for.end, label %for.body 1176 1177for.end: 1178 ret void 1179} 1180