1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s 3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 4 5declare <4 x float> @ext(<4 x float>) 6@g = global <4 x float> zeroinitializer 7 8define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) { 9; CHECK-LABEL: @f1( 10; CHECK-NEXT: entry: 11; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i32 0 12; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i32 1 13; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i32 2 14; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i32 3 15; CHECK-NEXT: br label [[LOOP:%.*]] 16; CHECK: loop: 17; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] 18; CHECK-NEXT: [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ] 19; CHECK-NEXT: [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ] 20; CHECK-NEXT: [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ] 21; CHECK-NEXT: [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ] 22; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 23; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, <4 x float>* [[BASE:%.*]], i32 [[I]] 24; CHECK-NEXT: [[PTR_I0:%.*]] = bitcast <4 x float>* [[PTR]] to float* 25; CHECK-NEXT: [[VAL_I0:%.*]] = load float, float* [[PTR_I0]], align 16 26; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, float* [[PTR_I0]], i32 1 27; CHECK-NEXT: [[VAL_I1:%.*]] = load float, float* [[PTR_I1]], align 4 28; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr float, float* [[PTR_I0]], i32 2 29; CHECK-NEXT: [[VAL_I2:%.*]] = load float, float* [[PTR_I2]], align 8 30; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr float, float* [[PTR_I0]], i32 3 31; CHECK-NEXT: [[VAL_I3:%.*]] = load float, float* [[PTR_I3]], align 4 32; CHECK-NEXT: [[ADD_I0:%.*]] = fadd float [[VAL_I0]], [[VAL_I2]] 33; CHECK-NEXT: [[ADD_I1:%.*]] = fadd float [[VAL_I1]], [[VAL_I3]] 34; CHECK-NEXT: [[ADD_I2:%.*]] = fadd float [[ACC_I0]], [[ACC_I2]] 35; CHECK-NEXT: [[ADD_I3:%.*]] = fadd float [[ACC_I1]], [[ACC_I3]] 36; CHECK-NEXT: [[ADD_UPTO0:%.*]] = insertelement <4 x float> poison, float [[ADD_I0]], i32 0 37; CHECK-NEXT: [[ADD_UPTO1:%.*]] = insertelement <4 x float> [[ADD_UPTO0]], float [[ADD_I1]], i32 1 38; CHECK-NEXT: [[ADD_UPTO2:%.*]] = insertelement <4 x float> [[ADD_UPTO1]], float [[ADD_I2]], i32 2 39; CHECK-NEXT: [[ADD:%.*]] = insertelement <4 x float> [[ADD_UPTO2]], float [[ADD_I3]], i32 3 40; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[ADD]]) 41; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0 42; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00 43; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1 44; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00 45; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2 46; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00 47; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3 48; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00 49; CHECK-NEXT: [[SEL_I0]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00 50; CHECK-NEXT: [[SEL_I1]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00 51; CHECK-NEXT: [[SEL_I2]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00 52; CHECK-NEXT: [[SEL_I3]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00 53; CHECK-NEXT: store float [[SEL_I0]], float* [[PTR_I0]], align 16 54; CHECK-NEXT: store float [[SEL_I1]], float* [[PTR_I1]], align 4 55; CHECK-NEXT: store float [[SEL_I2]], float* [[PTR_I2]], align 8 56; CHECK-NEXT: store float [[SEL_I3]], float* [[PTR_I3]], align 4 57; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 58; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] 59; CHECK: exit: 60; CHECK-NEXT: ret void 61; 62entry: 63 br label %loop 64 65loop: 66 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] 67 %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ] 68 %nexti = sub i32 %i, 1 69 70 %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i 71 %val = load <4 x float> , <4 x float> *%ptr 72 %dval = bitcast <4 x float> %val to <2 x double> 73 %dacc = bitcast <4 x float> %acc to <2 x double> 74 %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc, 75 <2 x i32> <i32 0, i32 2> 76 %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc, 77 <2 x i32> <i32 1, i32 3> 78 %f1 = bitcast <2 x double> %shuffle1 to <4 x float> 79 %f2 = bitcast <2 x double> %shuffle2 to <4 x float> 80 %add = fadd <4 x float> %f1, %f2 81 %call = call <4 x float> @ext(<4 x float> %add) 82 %cmp = fcmp ogt <4 x float> %call, 83 <float 1.0, float 2.0, float 3.0, float 4.0> 84 %sel = select <4 x i1> %cmp, <4 x float> %call, 85 <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0> 86 store <4 x float> %sel, <4 x float> *%ptr 87 88 %test = icmp eq i32 %nexti, 0 89 br i1 %test, label %loop, label %exit 90 91exit: 92 ret void 93} 94 95define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) { 96; CHECK-LABEL: @f2( 97; CHECK-NEXT: entry: 98; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i32 0 99; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i32 1 100; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i32 2 101; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i32 3 102; CHECK-NEXT: br label [[LOOP:%.*]] 103; CHECK: loop: 104; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] 105; CHECK-NEXT: [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ] 106; CHECK-NEXT: [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ] 107; CHECK-NEXT: [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ] 108; CHECK-NEXT: [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ] 109; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 110; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x i8>, <4 x i8>* [[BASE:%.*]], i32 [[I]] 111; CHECK-NEXT: [[PTR_I0:%.*]] = bitcast <4 x i8>* [[PTR]] to i8* 112; CHECK-NEXT: [[VAL_I0:%.*]] = load i8, i8* [[PTR_I0]], align 4 113; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 1 114; CHECK-NEXT: [[VAL_I1:%.*]] = load i8, i8* [[PTR_I1]], align 1 115; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 2 116; CHECK-NEXT: [[VAL_I2:%.*]] = load i8, i8* [[PTR_I2]], align 2 117; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 3 118; CHECK-NEXT: [[VAL_I3:%.*]] = load i8, i8* [[PTR_I3]], align 1 119; CHECK-NEXT: [[EXT_I0:%.*]] = sext i8 [[VAL_I0]] to i32 120; CHECK-NEXT: [[EXT_I1:%.*]] = sext i8 [[VAL_I1]] to i32 121; CHECK-NEXT: [[EXT_I2:%.*]] = sext i8 [[VAL_I2]] to i32 122; CHECK-NEXT: [[EXT_I3:%.*]] = sext i8 [[VAL_I3]] to i32 123; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[EXT_I0]], [[ACC_I0]] 124; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[EXT_I1]], [[ACC_I1]] 125; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[EXT_I2]], [[ACC_I2]] 126; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[EXT_I3]], [[ACC_I3]] 127; CHECK-NEXT: [[CMP_I0:%.*]] = icmp slt i32 [[ADD_I0]], -10 128; CHECK-NEXT: [[CMP_I1:%.*]] = icmp slt i32 [[ADD_I1]], -11 129; CHECK-NEXT: [[CMP_I2:%.*]] = icmp slt i32 [[ADD_I2]], -12 130; CHECK-NEXT: [[CMP_I3:%.*]] = icmp slt i32 [[ADD_I3]], -13 131; CHECK-NEXT: [[SEL_I0]] = select i1 [[CMP_I0]], i32 [[ADD_I0]], i32 [[I]] 132; CHECK-NEXT: [[SEL_I1]] = select i1 [[CMP_I1]], i32 [[ADD_I1]], i32 [[I]] 133; CHECK-NEXT: [[SEL_I2]] = select i1 [[CMP_I2]], i32 [[ADD_I2]], i32 [[I]] 134; CHECK-NEXT: [[SEL_I3]] = select i1 [[CMP_I3]], i32 [[ADD_I3]], i32 [[I]] 135; CHECK-NEXT: [[TRUNC_I0:%.*]] = trunc i32 [[SEL_I0]] to i8 136; CHECK-NEXT: [[TRUNC_I1:%.*]] = trunc i32 [[SEL_I1]] to i8 137; CHECK-NEXT: [[TRUNC_I2:%.*]] = trunc i32 [[SEL_I2]] to i8 138; CHECK-NEXT: [[TRUNC_I3:%.*]] = trunc i32 [[SEL_I3]] to i8 139; CHECK-NEXT: store i8 [[TRUNC_I0]], i8* [[PTR_I0]], align 4 140; CHECK-NEXT: store i8 [[TRUNC_I1]], i8* [[PTR_I1]], align 1 141; CHECK-NEXT: store i8 [[TRUNC_I2]], i8* [[PTR_I2]], align 2 142; CHECK-NEXT: store i8 [[TRUNC_I3]], i8* [[PTR_I3]], align 1 143; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 144; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] 145; CHECK: exit: 146; CHECK-NEXT: ret void 147; 148entry: 149 br label %loop 150 151loop: 152 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] 153 %acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ] 154 %nexti = sub i32 %i, 1 155 156 %ptr = getelementptr <4 x i8>, <4 x i8> *%base, i32 %i 157 %val = load <4 x i8> , <4 x i8> *%ptr 158 %ext = sext <4 x i8> %val to <4 x i32> 159 %add = add <4 x i32> %ext, %acc 160 %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13> 161 %single = insertelement <4 x i32> undef, i32 %i, i32 0 162 %limit = shufflevector <4 x i32> %single, <4 x i32> undef, 163 <4 x i32> zeroinitializer 164 %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit 165 %trunc = trunc <4 x i32> %sel to <4 x i8> 166 store <4 x i8> %trunc, <4 x i8> *%ptr 167 168 %test = icmp eq i32 %nexti, 0 169 br i1 %test, label %loop, label %exit 170 171exit: 172 ret void 173} 174 175; Check that !tbaa information is preserved. 176define void @f3(<4 x i32> *%src, <4 x i32> *%dst) { 177; CHECK-LABEL: @f3( 178; CHECK-NEXT: [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32* 179; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1 180; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2 181; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3 182; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32* 183; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16, !tbaa [[TBAA0:![0-9]+]] 184; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1 185; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4, !tbaa [[TBAA0]] 186; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2 187; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8, !tbaa [[TBAA0]] 188; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3 189; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4, !tbaa [[TBAA0]] 190; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] 191; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] 192; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] 193; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] 194; CHECK-NEXT: store i32 [[ADD_I0]], i32* [[DST_I0]], align 16, !tbaa [[TBAA3:![0-9]+]] 195; CHECK-NEXT: store i32 [[ADD_I1]], i32* [[DST_I1]], align 4, !tbaa [[TBAA3]] 196; CHECK-NEXT: store i32 [[ADD_I2]], i32* [[DST_I2]], align 8, !tbaa [[TBAA3]] 197; CHECK-NEXT: store i32 [[ADD_I3]], i32* [[DST_I3]], align 4, !tbaa [[TBAA3]] 198; CHECK-NEXT: ret void 199; 200 %val = load <4 x i32> , <4 x i32> *%src, !tbaa !1 201 %add = add <4 x i32> %val, %val 202 store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2 203 ret void 204} 205 206; Check that !tbaa.struct information is preserved. 207define void @f4(<4 x i32> *%src, <4 x i32> *%dst) { 208; CHECK-LABEL: @f4( 209; CHECK-NEXT: [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32* 210; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1 211; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2 212; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3 213; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32* 214; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16, !tbaa.struct !5 215; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1 216; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4, !tbaa.struct !5 217; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2 218; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8, !tbaa.struct !5 219; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3 220; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4, !tbaa.struct !5 221; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] 222; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] 223; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] 224; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] 225; CHECK-NEXT: store i32 [[ADD_I0]], i32* [[DST_I0]], align 16, !tbaa.struct !5 226; CHECK-NEXT: store i32 [[ADD_I1]], i32* [[DST_I1]], align 4, !tbaa.struct !5 227; CHECK-NEXT: store i32 [[ADD_I2]], i32* [[DST_I2]], align 8, !tbaa.struct !5 228; CHECK-NEXT: store i32 [[ADD_I3]], i32* [[DST_I3]], align 4, !tbaa.struct !5 229; CHECK-NEXT: ret void 230; 231 %val = load <4 x i32> , <4 x i32> *%src, !tbaa.struct !5 232 %add = add <4 x i32> %val, %val 233 store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5 234 ret void 235} 236 237; Check that llvm.access.group information is preserved. 238define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) { 239; CHECK-LABEL: @f5( 240; CHECK-NEXT: entry: 241; CHECK-NEXT: br label [[LOOP:%.*]] 242; CHECK: loop: 243; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ] 244; CHECK-NEXT: [[THIS_SRC:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[SRC:%.*]], i32 [[INDEX]] 245; CHECK-NEXT: [[THIS_SRC_I0:%.*]] = bitcast <4 x i32>* [[THIS_SRC]] to i32* 246; CHECK-NEXT: [[THIS_SRC_I1:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 1 247; CHECK-NEXT: [[THIS_SRC_I2:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 2 248; CHECK-NEXT: [[THIS_SRC_I3:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 3 249; CHECK-NEXT: [[THIS_DST:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[DST:%.*]], i32 [[INDEX]] 250; CHECK-NEXT: [[THIS_DST_I0:%.*]] = bitcast <4 x i32>* [[THIS_DST]] to i32* 251; CHECK-NEXT: [[THIS_DST_I1:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 1 252; CHECK-NEXT: [[THIS_DST_I2:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 2 253; CHECK-NEXT: [[THIS_DST_I3:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 3 254; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, i32* [[THIS_SRC_I0]], align 16, !llvm.access.group !6 255; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, i32* [[THIS_SRC_I1]], align 4, !llvm.access.group !6 256; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, i32* [[THIS_SRC_I2]], align 8, !llvm.access.group !6 257; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, i32* [[THIS_SRC_I3]], align 4, !llvm.access.group !6 258; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] 259; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] 260; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] 261; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] 262; CHECK-NEXT: store i32 [[ADD_I0]], i32* [[THIS_DST_I0]], align 16, !llvm.access.group !6 263; CHECK-NEXT: store i32 [[ADD_I1]], i32* [[THIS_DST_I1]], align 4, !llvm.access.group !6 264; CHECK-NEXT: store i32 [[ADD_I2]], i32* [[THIS_DST_I2]], align 8, !llvm.access.group !6 265; CHECK-NEXT: store i32 [[ADD_I3]], i32* [[THIS_DST_I3]], align 4, !llvm.access.group !6 266; CHECK-NEXT: [[NEXT_INDEX]] = add i32 [[INDEX]], -1 267; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT:%.*]] 268; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[END:%.*]], !llvm.loop [[LOOP7:![0-9]+]] 269; CHECK: end: 270; CHECK-NEXT: ret void 271; 272entry: 273 br label %loop 274 275loop: 276 %index = phi i32 [ 0, %entry ], [ %next_index, %loop ] 277 %this_src = getelementptr <4 x i32>, <4 x i32> *%src, i32 %index 278 %this_dst = getelementptr <4 x i32>, <4 x i32> *%dst, i32 %index 279 %val = load <4 x i32> , <4 x i32> *%this_src, !llvm.access.group !13 280 %add = add <4 x i32> %val, %val 281 store <4 x i32> %add, <4 x i32> *%this_dst, !llvm.access.group !13 282 %next_index = add i32 %index, -1 283 %continue = icmp ne i32 %next_index, %count 284 br i1 %continue, label %loop, label %end, !llvm.loop !3 285 286end: 287 ret void 288} 289 290; Check that fpmath information is preserved. 291define <4 x float> @f6(<4 x float> %x) { 292; CHECK-LABEL: @f6( 293; CHECK-NEXT: [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0 294; CHECK-NEXT: [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath !9 295; CHECK-NEXT: [[X_I1:%.*]] = extractelement <4 x float> [[X]], i32 1 296; CHECK-NEXT: [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath !9 297; CHECK-NEXT: [[X_I2:%.*]] = extractelement <4 x float> [[X]], i32 2 298; CHECK-NEXT: [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath !9 299; CHECK-NEXT: [[X_I3:%.*]] = extractelement <4 x float> [[X]], i32 3 300; CHECK-NEXT: [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath !9 301; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i32 0 302; CHECK-NEXT: [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 303; CHECK-NEXT: [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i32 2 304; CHECK-NEXT: [[RES:%.*]] = insertelement <4 x float> [[RES_UPTO2]], float [[RES_I3]], i32 3 305; CHECK-NEXT: ret <4 x float> [[RES]] 306; 307 %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>, 308 !fpmath !4 309 ret <4 x float> %res 310} 311 312; Check that random metadata isn't kept. 313define void @f7(<4 x i32> *%src, <4 x i32> *%dst) { 314; CHECK-LABEL: @f7( 315; CHECK-NEXT: [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32* 316; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1 317; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2 318; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3 319; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32* 320; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16 321; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1 322; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4 323; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2 324; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8 325; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3 326; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4 327; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] 328; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] 329; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] 330; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] 331; CHECK-NEXT: store i32 [[ADD_I0]], i32* [[DST_I0]], align 16 332; CHECK-NEXT: store i32 [[ADD_I1]], i32* [[DST_I1]], align 4 333; CHECK-NEXT: store i32 [[ADD_I2]], i32* [[DST_I2]], align 8 334; CHECK-NEXT: store i32 [[ADD_I3]], i32* [[DST_I3]], align 4 335; CHECK-NEXT: ret void 336; 337 %val = load <4 x i32> , <4 x i32> *%src, !foo !5 338 %add = add <4 x i32> %val, %val 339 store <4 x i32> %add, <4 x i32> *%dst, !foo !5 340 ret void 341} 342 343; Test GEP with vectors. 344define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0, 345; CHECK-LABEL: @f8( 346; CHECK-NEXT: [[DEST_I0:%.*]] = bitcast <4 x float*>* [[DEST:%.*]] to float** 347; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 1 348; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 2 349; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 3 350; CHECK-NEXT: [[PTR0_I0:%.*]] = extractelement <4 x float*> [[PTR0:%.*]], i32 0 351; CHECK-NEXT: [[PTR0_I2:%.*]] = extractelement <4 x float*> [[PTR0]], i32 2 352; CHECK-NEXT: [[PTR0_I3:%.*]] = extractelement <4 x float*> [[PTR0]], i32 3 353; CHECK-NEXT: [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i32 1 354; CHECK-NEXT: [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i32 3 355; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr float, float* [[PTR0_I0]], i32 100 356; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr float, float* [[OTHER:%.*]], i32 [[I0_I1]] 357; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr float, float* [[PTR0_I2]], i32 100 358; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr float, float* [[PTR0_I3]], i32 [[I0_I3]] 359; CHECK-NEXT: store float* [[VAL_I0]], float** [[DEST_I0]], align 32 360; CHECK-NEXT: store float* [[VAL_I1]], float** [[DEST_I1]], align 8 361; CHECK-NEXT: store float* [[VAL_I2]], float** [[DEST_I2]], align 16 362; CHECK-NEXT: store float* [[VAL_I3]], float** [[DEST_I3]], align 8 363; CHECK-NEXT: ret void 364; 365 float *%other) { 366 %i1 = insertelement <4 x i32> %i0, i32 100, i32 0 367 %i2 = insertelement <4 x i32> %i1, i32 100, i32 2 368 %ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1 369 %val = getelementptr float, <4 x float *> %ptr1, <4 x i32> %i2 370 store <4 x float *> %val, <4 x float *> *%dest 371 ret void 372} 373 374; Test the handling of unaligned loads. 375define void @f9(<4 x float> *%dest, <4 x float> *%src) { 376; CHECK-LABEL: @f9( 377; CHECK-NEXT: [[DEST_I0:%.*]] = bitcast <4 x float>* [[DEST:%.*]] to float* 378; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, float* [[DEST_I0]], i32 1 379; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, float* [[DEST_I0]], i32 2 380; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, float* [[DEST_I0]], i32 3 381; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x float>* [[SRC:%.*]] to float* 382; CHECK-NEXT: [[VAL_I0:%.*]] = load float, float* [[SRC_I0]], align 4 383; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, float* [[SRC_I0]], i32 1 384; CHECK-NEXT: [[VAL_I1:%.*]] = load float, float* [[SRC_I1]], align 4 385; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, float* [[SRC_I0]], i32 2 386; CHECK-NEXT: [[VAL_I2:%.*]] = load float, float* [[SRC_I2]], align 4 387; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr float, float* [[SRC_I0]], i32 3 388; CHECK-NEXT: [[VAL_I3:%.*]] = load float, float* [[SRC_I3]], align 4 389; CHECK-NEXT: store float [[VAL_I0]], float* [[DEST_I0]], align 8 390; CHECK-NEXT: store float [[VAL_I1]], float* [[DEST_I1]], align 4 391; CHECK-NEXT: store float [[VAL_I2]], float* [[DEST_I2]], align 8 392; CHECK-NEXT: store float [[VAL_I3]], float* [[DEST_I3]], align 4 393; CHECK-NEXT: ret void 394; 395 %val = load <4 x float> , <4 x float> *%src, align 4 396 store <4 x float> %val, <4 x float> *%dest, align 8 397 ret void 398} 399 400; ...and again with subelement alignment. 401define void @f10(<4 x float> *%dest, <4 x float> *%src) { 402; CHECK-LABEL: @f10( 403; CHECK-NEXT: [[DEST_I0:%.*]] = bitcast <4 x float>* [[DEST:%.*]] to float* 404; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, float* [[DEST_I0]], i32 1 405; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, float* [[DEST_I0]], i32 2 406; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, float* [[DEST_I0]], i32 3 407; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x float>* [[SRC:%.*]] to float* 408; CHECK-NEXT: [[VAL_I0:%.*]] = load float, float* [[SRC_I0]], align 1 409; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, float* [[SRC_I0]], i32 1 410; CHECK-NEXT: [[VAL_I1:%.*]] = load float, float* [[SRC_I1]], align 1 411; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, float* [[SRC_I0]], i32 2 412; CHECK-NEXT: [[VAL_I2:%.*]] = load float, float* [[SRC_I2]], align 1 413; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr float, float* [[SRC_I0]], i32 3 414; CHECK-NEXT: [[VAL_I3:%.*]] = load float, float* [[SRC_I3]], align 1 415; CHECK-NEXT: store float [[VAL_I0]], float* [[DEST_I0]], align 2 416; CHECK-NEXT: store float [[VAL_I1]], float* [[DEST_I1]], align 2 417; CHECK-NEXT: store float [[VAL_I2]], float* [[DEST_I2]], align 2 418; CHECK-NEXT: store float [[VAL_I3]], float* [[DEST_I3]], align 2 419; CHECK-NEXT: ret void 420; 421 %val = load <4 x float> , <4 x float> *%src, align 1 422 store <4 x float> %val, <4 x float> *%dest, align 2 423 ret void 424} 425 426; Test that sub-byte loads aren't scalarized. 427define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) { 428; CHECK-LABEL: @f11( 429; CHECK-NEXT: [[SRC1:%.*]] = getelementptr <32 x i1>, <32 x i1>* [[SRC0:%.*]], i32 1 430; CHECK-NEXT: [[VAL0:%.*]] = load <32 x i1>, <32 x i1>* [[SRC0]], align 4 431; CHECK-NEXT: [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i32 0 432; CHECK-NEXT: [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i32 1 433; CHECK-NEXT: [[VAL0_I2:%.*]] = extractelement <32 x i1> [[VAL0]], i32 2 434; CHECK-NEXT: [[VAL0_I3:%.*]] = extractelement <32 x i1> [[VAL0]], i32 3 435; CHECK-NEXT: [[VAL0_I4:%.*]] = extractelement <32 x i1> [[VAL0]], i32 4 436; CHECK-NEXT: [[VAL0_I5:%.*]] = extractelement <32 x i1> [[VAL0]], i32 5 437; CHECK-NEXT: [[VAL0_I6:%.*]] = extractelement <32 x i1> [[VAL0]], i32 6 438; CHECK-NEXT: [[VAL0_I7:%.*]] = extractelement <32 x i1> [[VAL0]], i32 7 439; CHECK-NEXT: [[VAL0_I8:%.*]] = extractelement <32 x i1> [[VAL0]], i32 8 440; CHECK-NEXT: [[VAL0_I9:%.*]] = extractelement <32 x i1> [[VAL0]], i32 9 441; CHECK-NEXT: [[VAL0_I10:%.*]] = extractelement <32 x i1> [[VAL0]], i32 10 442; CHECK-NEXT: [[VAL0_I11:%.*]] = extractelement <32 x i1> [[VAL0]], i32 11 443; CHECK-NEXT: [[VAL0_I12:%.*]] = extractelement <32 x i1> [[VAL0]], i32 12 444; CHECK-NEXT: [[VAL0_I13:%.*]] = extractelement <32 x i1> [[VAL0]], i32 13 445; CHECK-NEXT: [[VAL0_I14:%.*]] = extractelement <32 x i1> [[VAL0]], i32 14 446; CHECK-NEXT: [[VAL0_I15:%.*]] = extractelement <32 x i1> [[VAL0]], i32 15 447; CHECK-NEXT: [[VAL0_I16:%.*]] = extractelement <32 x i1> [[VAL0]], i32 16 448; CHECK-NEXT: [[VAL0_I17:%.*]] = extractelement <32 x i1> [[VAL0]], i32 17 449; CHECK-NEXT: [[VAL0_I18:%.*]] = extractelement <32 x i1> [[VAL0]], i32 18 450; CHECK-NEXT: [[VAL0_I19:%.*]] = extractelement <32 x i1> [[VAL0]], i32 19 451; CHECK-NEXT: [[VAL0_I20:%.*]] = extractelement <32 x i1> [[VAL0]], i32 20 452; CHECK-NEXT: [[VAL0_I21:%.*]] = extractelement <32 x i1> [[VAL0]], i32 21 453; CHECK-NEXT: [[VAL0_I22:%.*]] = extractelement <32 x i1> [[VAL0]], i32 22 454; CHECK-NEXT: [[VAL0_I23:%.*]] = extractelement <32 x i1> [[VAL0]], i32 23 455; CHECK-NEXT: [[VAL0_I24:%.*]] = extractelement <32 x i1> [[VAL0]], i32 24 456; CHECK-NEXT: [[VAL0_I25:%.*]] = extractelement <32 x i1> [[VAL0]], i32 25 457; CHECK-NEXT: [[VAL0_I26:%.*]] = extractelement <32 x i1> [[VAL0]], i32 26 458; CHECK-NEXT: [[VAL0_I27:%.*]] = extractelement <32 x i1> [[VAL0]], i32 27 459; CHECK-NEXT: [[VAL0_I28:%.*]] = extractelement <32 x i1> [[VAL0]], i32 28 460; CHECK-NEXT: [[VAL0_I29:%.*]] = extractelement <32 x i1> [[VAL0]], i32 29 461; CHECK-NEXT: [[VAL0_I30:%.*]] = extractelement <32 x i1> [[VAL0]], i32 30 462; CHECK-NEXT: [[VAL0_I31:%.*]] = extractelement <32 x i1> [[VAL0]], i32 31 463; CHECK-NEXT: [[VAL1:%.*]] = load <32 x i1>, <32 x i1>* [[SRC1]], align 4 464; CHECK-NEXT: [[VAL1_I0:%.*]] = extractelement <32 x i1> [[VAL1]], i32 0 465; CHECK-NEXT: [[AND_I0:%.*]] = and i1 [[VAL0_I0]], [[VAL1_I0]] 466; CHECK-NEXT: [[VAL1_I1:%.*]] = extractelement <32 x i1> [[VAL1]], i32 1 467; CHECK-NEXT: [[AND_I1:%.*]] = and i1 [[VAL0_I1]], [[VAL1_I1]] 468; CHECK-NEXT: [[VAL1_I2:%.*]] = extractelement <32 x i1> [[VAL1]], i32 2 469; CHECK-NEXT: [[AND_I2:%.*]] = and i1 [[VAL0_I2]], [[VAL1_I2]] 470; CHECK-NEXT: [[VAL1_I3:%.*]] = extractelement <32 x i1> [[VAL1]], i32 3 471; CHECK-NEXT: [[AND_I3:%.*]] = and i1 [[VAL0_I3]], [[VAL1_I3]] 472; CHECK-NEXT: [[VAL1_I4:%.*]] = extractelement <32 x i1> [[VAL1]], i32 4 473; CHECK-NEXT: [[AND_I4:%.*]] = and i1 [[VAL0_I4]], [[VAL1_I4]] 474; CHECK-NEXT: [[VAL1_I5:%.*]] = extractelement <32 x i1> [[VAL1]], i32 5 475; CHECK-NEXT: [[AND_I5:%.*]] = and i1 [[VAL0_I5]], [[VAL1_I5]] 476; CHECK-NEXT: [[VAL1_I6:%.*]] = extractelement <32 x i1> [[VAL1]], i32 6 477; CHECK-NEXT: [[AND_I6:%.*]] = and i1 [[VAL0_I6]], [[VAL1_I6]] 478; CHECK-NEXT: [[VAL1_I7:%.*]] = extractelement <32 x i1> [[VAL1]], i32 7 479; CHECK-NEXT: [[AND_I7:%.*]] = and i1 [[VAL0_I7]], [[VAL1_I7]] 480; CHECK-NEXT: [[VAL1_I8:%.*]] = extractelement <32 x i1> [[VAL1]], i32 8 481; CHECK-NEXT: [[AND_I8:%.*]] = and i1 [[VAL0_I8]], [[VAL1_I8]] 482; CHECK-NEXT: [[VAL1_I9:%.*]] = extractelement <32 x i1> [[VAL1]], i32 9 483; CHECK-NEXT: [[AND_I9:%.*]] = and i1 [[VAL0_I9]], [[VAL1_I9]] 484; CHECK-NEXT: [[VAL1_I10:%.*]] = extractelement <32 x i1> [[VAL1]], i32 10 485; CHECK-NEXT: [[AND_I10:%.*]] = and i1 [[VAL0_I10]], [[VAL1_I10]] 486; CHECK-NEXT: [[VAL1_I11:%.*]] = extractelement <32 x i1> [[VAL1]], i32 11 487; CHECK-NEXT: [[AND_I11:%.*]] = and i1 [[VAL0_I11]], [[VAL1_I11]] 488; CHECK-NEXT: [[VAL1_I12:%.*]] = extractelement <32 x i1> [[VAL1]], i32 12 489; CHECK-NEXT: [[AND_I12:%.*]] = and i1 [[VAL0_I12]], [[VAL1_I12]] 490; CHECK-NEXT: [[VAL1_I13:%.*]] = extractelement <32 x i1> [[VAL1]], i32 13 491; CHECK-NEXT: [[AND_I13:%.*]] = and i1 [[VAL0_I13]], [[VAL1_I13]] 492; CHECK-NEXT: [[VAL1_I14:%.*]] = extractelement <32 x i1> [[VAL1]], i32 14 493; CHECK-NEXT: [[AND_I14:%.*]] = and i1 [[VAL0_I14]], [[VAL1_I14]] 494; CHECK-NEXT: [[VAL1_I15:%.*]] = extractelement <32 x i1> [[VAL1]], i32 15 495; CHECK-NEXT: [[AND_I15:%.*]] = and i1 [[VAL0_I15]], [[VAL1_I15]] 496; CHECK-NEXT: [[VAL1_I16:%.*]] = extractelement <32 x i1> [[VAL1]], i32 16 497; CHECK-NEXT: [[AND_I16:%.*]] = and i1 [[VAL0_I16]], [[VAL1_I16]] 498; CHECK-NEXT: [[VAL1_I17:%.*]] = extractelement <32 x i1> [[VAL1]], i32 17 499; CHECK-NEXT: [[AND_I17:%.*]] = and i1 [[VAL0_I17]], [[VAL1_I17]] 500; CHECK-NEXT: [[VAL1_I18:%.*]] = extractelement <32 x i1> [[VAL1]], i32 18 501; CHECK-NEXT: [[AND_I18:%.*]] = and i1 [[VAL0_I18]], [[VAL1_I18]] 502; CHECK-NEXT: [[VAL1_I19:%.*]] = extractelement <32 x i1> [[VAL1]], i32 19 503; CHECK-NEXT: [[AND_I19:%.*]] = and i1 [[VAL0_I19]], [[VAL1_I19]] 504; CHECK-NEXT: [[VAL1_I20:%.*]] = extractelement <32 x i1> [[VAL1]], i32 20 505; CHECK-NEXT: [[AND_I20:%.*]] = and i1 [[VAL0_I20]], [[VAL1_I20]] 506; CHECK-NEXT: [[VAL1_I21:%.*]] = extractelement <32 x i1> [[VAL1]], i32 21 507; CHECK-NEXT: [[AND_I21:%.*]] = and i1 [[VAL0_I21]], [[VAL1_I21]] 508; CHECK-NEXT: [[VAL1_I22:%.*]] = extractelement <32 x i1> [[VAL1]], i32 22 509; CHECK-NEXT: [[AND_I22:%.*]] = and i1 [[VAL0_I22]], [[VAL1_I22]] 510; CHECK-NEXT: [[VAL1_I23:%.*]] = extractelement <32 x i1> [[VAL1]], i32 23 511; CHECK-NEXT: [[AND_I23:%.*]] = and i1 [[VAL0_I23]], [[VAL1_I23]] 512; CHECK-NEXT: [[VAL1_I24:%.*]] = extractelement <32 x i1> [[VAL1]], i32 24 513; CHECK-NEXT: [[AND_I24:%.*]] = and i1 [[VAL0_I24]], [[VAL1_I24]] 514; CHECK-NEXT: [[VAL1_I25:%.*]] = extractelement <32 x i1> [[VAL1]], i32 25 515; CHECK-NEXT: [[AND_I25:%.*]] = and i1 [[VAL0_I25]], [[VAL1_I25]] 516; CHECK-NEXT: [[VAL1_I26:%.*]] = extractelement <32 x i1> [[VAL1]], i32 26 517; CHECK-NEXT: [[AND_I26:%.*]] = and i1 [[VAL0_I26]], [[VAL1_I26]] 518; CHECK-NEXT: [[VAL1_I27:%.*]] = extractelement <32 x i1> [[VAL1]], i32 27 519; CHECK-NEXT: [[AND_I27:%.*]] = and i1 [[VAL0_I27]], [[VAL1_I27]] 520; CHECK-NEXT: [[VAL1_I28:%.*]] = extractelement <32 x i1> [[VAL1]], i32 28 521; CHECK-NEXT: [[AND_I28:%.*]] = and i1 [[VAL0_I28]], [[VAL1_I28]] 522; CHECK-NEXT: [[VAL1_I29:%.*]] = extractelement <32 x i1> [[VAL1]], i32 29 523; CHECK-NEXT: [[AND_I29:%.*]] = and i1 [[VAL0_I29]], [[VAL1_I29]] 524; CHECK-NEXT: [[VAL1_I30:%.*]] = extractelement <32 x i1> [[VAL1]], i32 30 525; CHECK-NEXT: [[AND_I30:%.*]] = and i1 [[VAL0_I30]], [[VAL1_I30]] 526; CHECK-NEXT: [[VAL1_I31:%.*]] = extractelement <32 x i1> [[VAL1]], i32 31 527; CHECK-NEXT: [[AND_I31:%.*]] = and i1 [[VAL0_I31]], [[VAL1_I31]] 528; CHECK-NEXT: [[AND_UPTO0:%.*]] = insertelement <32 x i1> poison, i1 [[AND_I0]], i32 0 529; CHECK-NEXT: [[AND_UPTO1:%.*]] = insertelement <32 x i1> [[AND_UPTO0]], i1 [[AND_I1]], i32 1 530; CHECK-NEXT: [[AND_UPTO2:%.*]] = insertelement <32 x i1> [[AND_UPTO1]], i1 [[AND_I2]], i32 2 531; CHECK-NEXT: [[AND_UPTO3:%.*]] = insertelement <32 x i1> [[AND_UPTO2]], i1 [[AND_I3]], i32 3 532; CHECK-NEXT: [[AND_UPTO4:%.*]] = insertelement <32 x i1> [[AND_UPTO3]], i1 [[AND_I4]], i32 4 533; CHECK-NEXT: [[AND_UPTO5:%.*]] = insertelement <32 x i1> [[AND_UPTO4]], i1 [[AND_I5]], i32 5 534; CHECK-NEXT: [[AND_UPTO6:%.*]] = insertelement <32 x i1> [[AND_UPTO5]], i1 [[AND_I6]], i32 6 535; CHECK-NEXT: [[AND_UPTO7:%.*]] = insertelement <32 x i1> [[AND_UPTO6]], i1 [[AND_I7]], i32 7 536; CHECK-NEXT: [[AND_UPTO8:%.*]] = insertelement <32 x i1> [[AND_UPTO7]], i1 [[AND_I8]], i32 8 537; CHECK-NEXT: [[AND_UPTO9:%.*]] = insertelement <32 x i1> [[AND_UPTO8]], i1 [[AND_I9]], i32 9 538; CHECK-NEXT: [[AND_UPTO10:%.*]] = insertelement <32 x i1> [[AND_UPTO9]], i1 [[AND_I10]], i32 10 539; CHECK-NEXT: [[AND_UPTO11:%.*]] = insertelement <32 x i1> [[AND_UPTO10]], i1 [[AND_I11]], i32 11 540; CHECK-NEXT: [[AND_UPTO12:%.*]] = insertelement <32 x i1> [[AND_UPTO11]], i1 [[AND_I12]], i32 12 541; CHECK-NEXT: [[AND_UPTO13:%.*]] = insertelement <32 x i1> [[AND_UPTO12]], i1 [[AND_I13]], i32 13 542; CHECK-NEXT: [[AND_UPTO14:%.*]] = insertelement <32 x i1> [[AND_UPTO13]], i1 [[AND_I14]], i32 14 543; CHECK-NEXT: [[AND_UPTO15:%.*]] = insertelement <32 x i1> [[AND_UPTO14]], i1 [[AND_I15]], i32 15 544; CHECK-NEXT: [[AND_UPTO16:%.*]] = insertelement <32 x i1> [[AND_UPTO15]], i1 [[AND_I16]], i32 16 545; CHECK-NEXT: [[AND_UPTO17:%.*]] = insertelement <32 x i1> [[AND_UPTO16]], i1 [[AND_I17]], i32 17 546; CHECK-NEXT: [[AND_UPTO18:%.*]] = insertelement <32 x i1> [[AND_UPTO17]], i1 [[AND_I18]], i32 18 547; CHECK-NEXT: [[AND_UPTO19:%.*]] = insertelement <32 x i1> [[AND_UPTO18]], i1 [[AND_I19]], i32 19 548; CHECK-NEXT: [[AND_UPTO20:%.*]] = insertelement <32 x i1> [[AND_UPTO19]], i1 [[AND_I20]], i32 20 549; CHECK-NEXT: [[AND_UPTO21:%.*]] = insertelement <32 x i1> [[AND_UPTO20]], i1 [[AND_I21]], i32 21 550; CHECK-NEXT: [[AND_UPTO22:%.*]] = insertelement <32 x i1> [[AND_UPTO21]], i1 [[AND_I22]], i32 22 551; CHECK-NEXT: [[AND_UPTO23:%.*]] = insertelement <32 x i1> [[AND_UPTO22]], i1 [[AND_I23]], i32 23 552; CHECK-NEXT: [[AND_UPTO24:%.*]] = insertelement <32 x i1> [[AND_UPTO23]], i1 [[AND_I24]], i32 24 553; CHECK-NEXT: [[AND_UPTO25:%.*]] = insertelement <32 x i1> [[AND_UPTO24]], i1 [[AND_I25]], i32 25 554; CHECK-NEXT: [[AND_UPTO26:%.*]] = insertelement <32 x i1> [[AND_UPTO25]], i1 [[AND_I26]], i32 26 555; CHECK-NEXT: [[AND_UPTO27:%.*]] = insertelement <32 x i1> [[AND_UPTO26]], i1 [[AND_I27]], i32 27 556; CHECK-NEXT: [[AND_UPTO28:%.*]] = insertelement <32 x i1> [[AND_UPTO27]], i1 [[AND_I28]], i32 28 557; CHECK-NEXT: [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i32 29 558; CHECK-NEXT: [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i32 30 559; CHECK-NEXT: [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i32 31 560; CHECK-NEXT: store <32 x i1> [[AND]], <32 x i1>* [[DEST:%.*]], align 4 561; CHECK-NEXT: ret void 562; 563 %src1 = getelementptr <32 x i1>, <32 x i1> *%src0, i32 1 564 %val0 = load <32 x i1> , <32 x i1> *%src0 565 %val1 = load <32 x i1> , <32 x i1> *%src1 566 %and = and <32 x i1> %val0, %val1 567 store <32 x i1> %and, <32 x i1> *%dest 568 ret void 569} 570 571; Test vector GEPs with more than one index. 572define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i, 573; CHECK-LABEL: @f13( 574; CHECK-NEXT: [[DEST_I0:%.*]] = bitcast <4 x float*>* [[DEST:%.*]] to float** 575; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 1 576; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 2 577; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 3 578; CHECK-NEXT: [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i32 0 579; CHECK-NEXT: [[PTR_I0:%.*]] = extractelement <4 x [4 x float]*> [[PTR:%.*]], i32 0 580; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I0]], i32 0, i32 [[I_I0]] 581; CHECK-NEXT: [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i32 1 582; CHECK-NEXT: [[PTR_I1:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 1 583; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I1]], i32 1, i32 [[I_I1]] 584; CHECK-NEXT: [[I_I2:%.*]] = extractelement <4 x i32> [[I]], i32 2 585; CHECK-NEXT: [[PTR_I2:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 2 586; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I2]], i32 2, i32 [[I_I2]] 587; CHECK-NEXT: [[I_I3:%.*]] = extractelement <4 x i32> [[I]], i32 3 588; CHECK-NEXT: [[PTR_I3:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 3 589; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I3]], i32 3, i32 [[I_I3]] 590; CHECK-NEXT: store float* [[VAL_I0]], float** [[DEST_I0]], align 32 591; CHECK-NEXT: store float* [[VAL_I1]], float** [[DEST_I1]], align 8 592; CHECK-NEXT: store float* [[VAL_I2]], float** [[DEST_I2]], align 16 593; CHECK-NEXT: store float* [[VAL_I3]], float** [[DEST_I3]], align 8 594; CHECK-NEXT: ret void 595; 596 float *%other) { 597 %val = getelementptr inbounds [4 x float], <4 x [4 x float] *> %ptr, 598 <4 x i32> <i32 0, i32 1, i32 2, i32 3>, 599 <4 x i32> %i 600 store <4 x float *> %val, <4 x float *> *%dest 601 ret void 602} 603 604; Test combinations of vector and non-vector PHIs. 605define <4 x float> @f14(<4 x float> %acc, i32 %count) { 606; CHECK-LABEL: @f14( 607; CHECK-NEXT: entry: 608; CHECK-NEXT: [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i32 0 609; CHECK-NEXT: [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i32 1 610; CHECK-NEXT: [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i32 2 611; CHECK-NEXT: [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i32 3 612; CHECK-NEXT: br label [[LOOP:%.*]] 613; CHECK: loop: 614; CHECK-NEXT: [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ] 615; CHECK-NEXT: [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], [[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], [[LOOP]] ] 616; CHECK-NEXT: [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ] 617; CHECK-NEXT: [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ] 618; CHECK-NEXT: [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ] 619; CHECK-NEXT: [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i32 0 620; CHECK-NEXT: [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i32 1 621; CHECK-NEXT: [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i32 2 622; CHECK-NEXT: [[THIS_ACC:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO2]], float [[THIS_ACC_I3]], i32 3 623; CHECK-NEXT: [[FOO:%.*]] = call <4 x float> @ext(<4 x float> [[THIS_ACC]]) 624; CHECK-NEXT: [[FOO_I0:%.*]] = extractelement <4 x float> [[FOO]], i32 0 625; CHECK-NEXT: [[NEXT_ACC_I0]] = fadd float [[THIS_ACC_I0]], [[FOO_I0]] 626; CHECK-NEXT: [[FOO_I1:%.*]] = extractelement <4 x float> [[FOO]], i32 1 627; CHECK-NEXT: [[NEXT_ACC_I1]] = fadd float [[THIS_ACC_I1]], [[FOO_I1]] 628; CHECK-NEXT: [[FOO_I2:%.*]] = extractelement <4 x float> [[FOO]], i32 2 629; CHECK-NEXT: [[NEXT_ACC_I2]] = fadd float [[THIS_ACC_I2]], [[FOO_I2]] 630; CHECK-NEXT: [[FOO_I3:%.*]] = extractelement <4 x float> [[FOO]], i32 3 631; CHECK-NEXT: [[NEXT_ACC_I3]] = fadd float [[THIS_ACC_I3]], [[FOO_I3]] 632; CHECK-NEXT: [[NEXT_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEXT_ACC_I0]], i32 0 633; CHECK-NEXT: [[NEXT_ACC_UPTO1:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO0]], float [[NEXT_ACC_I1]], i32 1 634; CHECK-NEXT: [[NEXT_ACC_UPTO2:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO1]], float [[NEXT_ACC_I2]], i32 2 635; CHECK-NEXT: [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i32 3 636; CHECK-NEXT: [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1 637; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0 638; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] 639; CHECK: exit: 640; CHECK-NEXT: ret <4 x float> [[NEXT_ACC]] 641; 642entry: 643 br label %loop 644 645loop: 646 %this_acc = phi <4 x float> [ %acc, %entry ], [ %next_acc, %loop ] 647 %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ] 648 %foo = call <4 x float> @ext(<4 x float> %this_acc) 649 %next_acc = fadd <4 x float> %this_acc, %foo 650 %next_count = sub i32 %this_count, 1 651 %cmp = icmp eq i32 %next_count, 0 652 br i1 %cmp, label %loop, label %exit 653 654exit: 655 ret <4 x float> %next_acc 656} 657 658; Test unary operator scalarization. 659define void @f15(<4 x float> %init, <4 x float> *%base, i32 %count) { 660; CHECK-LABEL: @f15( 661; CHECK-NEXT: entry: 662; CHECK-NEXT: br label [[LOOP:%.*]] 663; CHECK: loop: 664; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] 665; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 666; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, <4 x float>* [[BASE:%.*]], i32 [[I]] 667; CHECK-NEXT: [[PTR_I0:%.*]] = bitcast <4 x float>* [[PTR]] to float* 668; CHECK-NEXT: [[VAL_I0:%.*]] = load float, float* [[PTR_I0]], align 16 669; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, float* [[PTR_I0]], i32 1 670; CHECK-NEXT: [[VAL_I1:%.*]] = load float, float* [[PTR_I1]], align 4 671; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr float, float* [[PTR_I0]], i32 2 672; CHECK-NEXT: [[VAL_I2:%.*]] = load float, float* [[PTR_I2]], align 8 673; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr float, float* [[PTR_I0]], i32 3 674; CHECK-NEXT: [[VAL_I3:%.*]] = load float, float* [[PTR_I3]], align 4 675; CHECK-NEXT: [[NEG_I0:%.*]] = fneg float [[VAL_I0]] 676; CHECK-NEXT: [[NEG_I1:%.*]] = fneg float [[VAL_I1]] 677; CHECK-NEXT: [[NEG_I2:%.*]] = fneg float [[VAL_I2]] 678; CHECK-NEXT: [[NEG_I3:%.*]] = fneg float [[VAL_I3]] 679; CHECK-NEXT: [[NEG_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEG_I0]], i32 0 680; CHECK-NEXT: [[NEG_UPTO1:%.*]] = insertelement <4 x float> [[NEG_UPTO0]], float [[NEG_I1]], i32 1 681; CHECK-NEXT: [[NEG_UPTO2:%.*]] = insertelement <4 x float> [[NEG_UPTO1]], float [[NEG_I2]], i32 2 682; CHECK-NEXT: [[NEG:%.*]] = insertelement <4 x float> [[NEG_UPTO2]], float [[NEG_I3]], i32 3 683; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[NEG]]) 684; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0 685; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00 686; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1 687; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00 688; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2 689; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00 690; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3 691; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00 692; CHECK-NEXT: [[SEL_I0:%.*]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00 693; CHECK-NEXT: [[SEL_I1:%.*]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00 694; CHECK-NEXT: [[SEL_I2:%.*]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00 695; CHECK-NEXT: [[SEL_I3:%.*]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00 696; CHECK-NEXT: store float [[SEL_I0]], float* [[PTR_I0]], align 16 697; CHECK-NEXT: store float [[SEL_I1]], float* [[PTR_I1]], align 4 698; CHECK-NEXT: store float [[SEL_I2]], float* [[PTR_I2]], align 8 699; CHECK-NEXT: store float [[SEL_I3]], float* [[PTR_I3]], align 4 700; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 701; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] 702; CHECK: exit: 703; CHECK-NEXT: ret void 704; 705entry: 706 br label %loop 707 708loop: 709 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] 710 %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ] 711 %nexti = sub i32 %i, 1 712 713 %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i 714 %val = load <4 x float> , <4 x float> *%ptr 715 %neg = fneg <4 x float> %val 716 %call = call <4 x float> @ext(<4 x float> %neg) 717 %cmp = fcmp ogt <4 x float> %call, 718 <float 1.0, float 2.0, float 3.0, float 4.0> 719 %sel = select <4 x i1> %cmp, <4 x float> %call, 720 <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0> 721 store <4 x float> %sel, <4 x float> *%ptr 722 723 %test = icmp eq i32 %nexti, 0 724 br i1 %test, label %loop, label %exit 725 726exit: 727 ret void 728} 729 730; Check that IR flags are preserved. 731define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) { 732; CHECK-LABEL: @f16( 733; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 734; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 735; CHECK-NEXT: [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]] 736; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 737; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 738; CHECK-NEXT: [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]] 739; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0 740; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 741; CHECK-NEXT: ret <2 x i32> [[RES]] 742; 743 %res = add nuw nsw <2 x i32> %i, %j 744 ret <2 x i32> %res 745} 746define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) { 747; CHECK-LABEL: @f17( 748; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 749; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 750; CHECK-NEXT: [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]] 751; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 752; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 753; CHECK-NEXT: [[RES_I1:%.*]] = sdiv exact i32 [[I_I1]], [[J_I1]] 754; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0 755; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 756; CHECK-NEXT: ret <2 x i32> [[RES]] 757; 758 %res = sdiv exact <2 x i32> %i, %j 759 ret <2 x i32> %res 760} 761define <2 x float> @f18(<2 x float> %x, <2 x float> %y) { 762; CHECK-LABEL: @f18( 763; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 764; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 765; CHECK-NEXT: [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]] 766; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 767; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 768; CHECK-NEXT: [[RES_I1:%.*]] = fadd fast float [[X_I1]], [[Y_I1]] 769; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 770; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 771; CHECK-NEXT: ret <2 x float> [[RES]] 772; 773 %res = fadd fast <2 x float> %x, %y 774 ret <2 x float> %res 775} 776define <2 x float> @f19(<2 x float> %x) { 777; CHECK-LABEL: @f19( 778; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 779; CHECK-NEXT: [[RES_I0:%.*]] = fneg fast float [[X_I0]] 780; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 781; CHECK-NEXT: [[RES_I1:%.*]] = fneg fast float [[X_I1]] 782; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 783; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 784; CHECK-NEXT: ret <2 x float> [[RES]] 785; 786 %res = fneg fast <2 x float> %x 787 ret <2 x float> %res 788} 789define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) { 790; CHECK-LABEL: @f20( 791; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 792; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 793; CHECK-NEXT: [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]] 794; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 795; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 796; CHECK-NEXT: [[RES_I1:%.*]] = fcmp fast ogt float [[X_I1]], [[Y_I1]] 797; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[RES_I0]], i32 0 798; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i1> [[RES_UPTO0]], i1 [[RES_I1]], i32 1 799; CHECK-NEXT: ret <2 x i1> [[RES]] 800; 801 %res = fcmp fast ogt <2 x float> %x, %y 802 ret <2 x i1> %res 803} 804declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) 805define <2 x float> @f21(<2 x float> %x) { 806; CHECK-LABEL: @f21( 807; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 808; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]]) 809; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 810; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]]) 811; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 812; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 813; CHECK-NEXT: ret <2 x float> [[RES]] 814; 815 %res = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) 816 ret <2 x float> %res 817} 818declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) 819define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) { 820; CHECK-LABEL: @f22( 821; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 822; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 823; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i32 0 824; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]]) 825; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 826; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 827; CHECK-NEXT: [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i32 1 828; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.fma.f32(float [[X_I1]], float [[Y_I1]], float [[Z_I1]]) 829; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 830; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 831; CHECK-NEXT: ret <2 x float> [[RES]] 832; 833 %res = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) 834 ret <2 x float> %res 835} 836 837; See https://reviews.llvm.org/D83101#2133062 838define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) { 839; CHECK-LABEL: @f23_crash( 840; CHECK-NEXT: [[V0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i32 0 841; CHECK-NEXT: [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[V0]], i32 0 842; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i32 1 843; CHECK-NEXT: ret <2 x i32> [[T1]] 844; 845 %v0 = extractelement <2 x i32> %srcvec, i32 0 846 %t0 = insertelement <2 x i32> undef, i32 %v0, i32 0 847 %t1 = insertelement <2 x i32> %t0, i32 %v1, i32 1 848 ret <2 x i32> %t1 849} 850 851!0 = !{ !"root" } 852!1 = !{ !"set1", !0 } 853!2 = !{ !"set2", !0 } 854!3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} } 855!4 = !{ float 4.0 } 856!5 = !{ i64 0, i64 8, null } 857!13 = distinct !{} 858