1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -vector-combine -data-layout=e < %s | FileCheck %s 3; RUN: opt -S -vector-combine -data-layout=E < %s | FileCheck %s 4 5define void @insert_store(<16 x i8>* %q, i8 zeroext %s) { 6; CHECK-LABEL: @insert_store( 7; CHECK-NEXT: entry: 8; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 3 9; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 10; CHECK-NEXT: ret void 11; 12entry: 13 %0 = load <16 x i8>, <16 x i8>* %q 14 %vecins = insertelement <16 x i8> %0, i8 %s, i32 3 15 store <16 x i8> %vecins, <16 x i8>* %q, align 16 16 ret void 17} 18 19define void @insert_store_i16_align1(<8 x i16>* %q, i16 zeroext %s) { 20; CHECK-LABEL: @insert_store_i16_align1( 21; CHECK-NEXT: entry: 22; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[Q:%.*]], i32 0, i32 3 23; CHECK-NEXT: store i16 [[S:%.*]], i16* [[TMP0]], align 2 24; CHECK-NEXT: ret void 25; 26entry: 27 %0 = load <8 x i16>, <8 x i16>* %q 28 %vecins = insertelement <8 x i16> %0, i16 %s, i32 3 29 store <8 x i16> %vecins, <8 x i16>* %q, align 1 30 ret void 31} 32 33; To verify case when index is out of bounds 34define void @insert_store_outofbounds(<8 x i16>* %q, i16 zeroext %s) { 35; CHECK-LABEL: @insert_store_outofbounds( 36; CHECK-NEXT: entry: 37; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16 38; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 9 39; CHECK-NEXT: store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16 40; CHECK-NEXT: ret void 41; 42entry: 43 %0 = load <8 x i16>, <8 x i16>* %q 44 %vecins = insertelement <8 x i16> %0, i16 %s, i32 9 45 store <8 x i16> %vecins, <8 x i16>* %q 46 ret void 47} 48 49define void @insert_store_vscale(<vscale x 8 x i16>* %q, i16 zeroext %s) { 50; CHECK-LABEL: @insert_store_vscale( 51; CHECK-NEXT: entry: 52; CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 8 x i16>, <vscale x 8 x i16>* [[Q:%.*]], align 16 53; CHECK-NEXT: [[VECINS:%.*]] = insertelement <vscale x 8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3 54; CHECK-NEXT: store <vscale x 8 x i16> [[VECINS]], <vscale x 8 x i16>* [[Q]], align 16 55; CHECK-NEXT: ret void 56; 57entry: 58 %0 = load <vscale x 8 x i16>, <vscale x 8 x i16>* %q 59 %vecins = insertelement <vscale x 8 x i16> %0, i16 %s, i32 3 60 store <vscale x 8 x i16> %vecins, <vscale x 8 x i16>* %q 61 ret void 62} 63 64define void @insert_store_v9i4(<9 x i4>* %q, i4 zeroext %s) { 65; CHECK-LABEL: @insert_store_v9i4( 66; CHECK-NEXT: entry: 67; CHECK-NEXT: [[TMP0:%.*]] = load <9 x i4>, <9 x i4>* [[Q:%.*]], align 16 68; CHECK-NEXT: [[VECINS:%.*]] = insertelement <9 x i4> [[TMP0]], i4 [[S:%.*]], i32 3 69; CHECK-NEXT: store <9 x i4> [[VECINS]], <9 x i4>* [[Q]], align 1 70; CHECK-NEXT: ret void 71; 72entry: 73 %0 = load <9 x i4>, <9 x i4>* %q 74 %vecins = insertelement <9 x i4> %0, i4 %s, i32 3 75 store <9 x i4> %vecins, <9 x i4>* %q, align 1 76 ret void 77} 78 79define void @insert_store_v4i27(<4 x i27>* %q, i27 zeroext %s) { 80; CHECK-LABEL: @insert_store_v4i27( 81; CHECK-NEXT: entry: 82; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i27>, <4 x i27>* [[Q:%.*]], align 16 83; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x i27> [[TMP0]], i27 [[S:%.*]], i32 3 84; CHECK-NEXT: store <4 x i27> [[VECINS]], <4 x i27>* [[Q]], align 1 85; CHECK-NEXT: ret void 86; 87entry: 88 %0 = load <4 x i27>, <4 x i27>* %q 89 %vecins = insertelement <4 x i27> %0, i27 %s, i32 3 90 store <4 x i27> %vecins, <4 x i27>* %q, align 1 91 ret void 92} 93 94define void @insert_store_blk_differ(<8 x i16>* %q, i16 zeroext %s) { 95; CHECK-LABEL: @insert_store_blk_differ( 96; CHECK-NEXT: entry: 97; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16 98; CHECK-NEXT: br label [[CONT:%.*]] 99; CHECK: cont: 100; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3 101; CHECK-NEXT: store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16 102; CHECK-NEXT: ret void 103; 104entry: 105 %0 = load <8 x i16>, <8 x i16>* %q 106 br label %cont 107cont: 108 %vecins = insertelement <8 x i16> %0, i16 %s, i32 3 109 store <8 x i16> %vecins, <8 x i16>* %q 110 ret void 111} 112 113define void @insert_store_nonconst(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 114; CHECK-LABEL: @insert_store_nonconst( 115; CHECK-NEXT: entry: 116; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 117; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX:%.*]] 118; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 119; CHECK-NEXT: ret void 120; 121entry: 122 %0 = load <16 x i8>, <16 x i8>* %q 123 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx 124 store <16 x i8> %vecins, <16 x i8>* %q 125 ret void 126} 127 128; To verify align here is narrowed to scalar store size 129define void @insert_store_nonconst_large_alignment(<4 x i32>* %q, i32 zeroext %s, i32 %idx) { 130; CHECK-LABEL: @insert_store_nonconst_large_alignment( 131; CHECK-NEXT: entry: 132; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4 133; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 134; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[Q:%.*]], i32 0, i32 [[IDX]] 135; CHECK-NEXT: store i32 [[S:%.*]], i32* [[TMP0]], align 4 136; CHECK-NEXT: ret void 137; 138entry: 139 %cmp = icmp ult i32 %idx, 4 140 call void @llvm.assume(i1 %cmp) 141 %i = load <4 x i32>, <4 x i32>* %q, align 128 142 %vecins = insertelement <4 x i32> %i, i32 %s, i32 %idx 143 store <4 x i32> %vecins, <4 x i32>* %q, align 128 144 ret void 145} 146 147define void @insert_store_nonconst_align_maximum_8(<8 x i64>* %q, i64 %s, i32 %idx) { 148; CHECK-LABEL: @insert_store_nonconst_align_maximum_8( 149; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2 150; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 151; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]] 152; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 8 153; CHECK-NEXT: ret void 154; 155 %cmp = icmp ult i32 %idx, 2 156 call void @llvm.assume(i1 %cmp) 157 %i = load <8 x i64>, <8 x i64>* %q, align 8 158 %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx 159 store <8 x i64> %vecins, <8 x i64>* %q, align 8 160 ret void 161} 162 163define void @insert_store_nonconst_align_maximum_4(<8 x i64>* %q, i64 %s, i32 %idx) { 164; CHECK-LABEL: @insert_store_nonconst_align_maximum_4( 165; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2 166; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 167; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]] 168; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 4 169; CHECK-NEXT: ret void 170; 171 %cmp = icmp ult i32 %idx, 2 172 call void @llvm.assume(i1 %cmp) 173 %i = load <8 x i64>, <8 x i64>* %q, align 4 174 %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx 175 store <8 x i64> %vecins, <8 x i64>* %q, align 4 176 ret void 177} 178 179define void @insert_store_nonconst_align_larger(<8 x i64>* %q, i64 %s, i32 %idx) { 180; CHECK-LABEL: @insert_store_nonconst_align_larger( 181; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2 182; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 183; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]] 184; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 4 185; CHECK-NEXT: ret void 186; 187 %cmp = icmp ult i32 %idx, 2 188 call void @llvm.assume(i1 %cmp) 189 %i = load <8 x i64>, <8 x i64>* %q, align 4 190 %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx 191 store <8 x i64> %vecins, <8 x i64>* %q, align 2 192 ret void 193} 194 195define void @insert_store_nonconst_index_known_valid_by_assume(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 196; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_assume( 197; CHECK-NEXT: entry: 198; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4 199; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 200; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX]] 201; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 202; CHECK-NEXT: ret void 203; 204entry: 205 %cmp = icmp ult i32 %idx, 4 206 call void @llvm.assume(i1 %cmp) 207 %0 = load <16 x i8>, <16 x i8>* %q 208 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx 209 store <16 x i8> %vecins, <16 x i8>* %q 210 ret void 211} 212 213declare void @maythrow() readnone 214 215define void @insert_store_nonconst_index_not_known_valid_by_assume_after_load(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 216; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_assume_after_load( 217; CHECK-NEXT: entry: 218; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4 219; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 220; CHECK-NEXT: call void @maythrow() 221; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 222; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]] 223; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 224; CHECK-NEXT: ret void 225; 226entry: 227 %cmp = icmp ult i32 %idx, 4 228 %0 = load <16 x i8>, <16 x i8>* %q 229 call void @maythrow() 230 call void @llvm.assume(i1 %cmp) 231 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx 232 store <16 x i8> %vecins, <16 x i8>* %q 233 ret void 234} 235 236define void @insert_store_nonconst_index_not_known_valid_by_assume(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 237; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_assume( 238; CHECK-NEXT: entry: 239; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 17 240; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 241; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 242; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]] 243; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 244; CHECK-NEXT: ret void 245; 246entry: 247 %cmp = icmp ult i32 %idx, 17 248 call void @llvm.assume(i1 %cmp) 249 %0 = load <16 x i8>, <16 x i8>* %q 250 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx 251 store <16 x i8> %vecins, <16 x i8>* %q 252 ret void 253} 254 255declare void @llvm.assume(i1) 256 257define void @insert_store_nonconst_index_known_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 258; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_and( 259; CHECK-NEXT: entry: 260; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7 261; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] 262; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 263; CHECK-NEXT: ret void 264; 265entry: 266 %0 = load <16 x i8>, <16 x i8>* %q 267 %idx.clamped = and i32 %idx, 7 268 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped 269 store <16 x i8> %vecins, <16 x i8>* %q 270 ret void 271} 272 273define void @insert_store_nonconst_index_not_known_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 274; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_and( 275; CHECK-NEXT: entry: 276; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 277; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 16 278; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]] 279; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 280; CHECK-NEXT: ret void 281; 282entry: 283 %0 = load <16 x i8>, <16 x i8>* %q 284 %idx.clamped = and i32 %idx, 16 285 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped 286 store <16 x i8> %vecins, <16 x i8>* %q 287 ret void 288} 289 290define void @insert_store_nonconst_index_known_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 291; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_urem( 292; CHECK-NEXT: entry: 293; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16 294; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] 295; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 296; CHECK-NEXT: ret void 297; 298entry: 299 %0 = load <16 x i8>, <16 x i8>* %q 300 %idx.clamped = urem i32 %idx, 16 301 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped 302 store <16 x i8> %vecins, <16 x i8>* %q 303 ret void 304} 305 306define void @insert_store_nonconst_index_not_known_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 307; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_urem( 308; CHECK-NEXT: entry: 309; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 310; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 17 311; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]] 312; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 313; CHECK-NEXT: ret void 314; 315entry: 316 %0 = load <16 x i8>, <16 x i8>* %q 317 %idx.clamped = urem i32 %idx, 17 318 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped 319 store <16 x i8> %vecins, <16 x i8>* %q 320 ret void 321} 322 323define void @insert_store_ptr_strip(<16 x i8>* %q, i8 zeroext %s) { 324; CHECK-LABEL: @insert_store_ptr_strip( 325; CHECK-NEXT: entry: 326; CHECK-NEXT: [[ADDR0:%.*]] = bitcast <16 x i8>* [[Q:%.*]] to <2 x i64>* 327; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 3 328; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 329; CHECK-NEXT: ret void 330; 331entry: 332 %0 = load <16 x i8>, <16 x i8>* %q 333 %vecins = insertelement <16 x i8> %0, i8 %s, i32 3 334 %addr0 = bitcast <16 x i8>* %q to <2 x i64>* 335 %addr1 = getelementptr <2 x i64>, <2 x i64>* %addr0, i64 0 336 %addr2 = bitcast <2 x i64>* %addr1 to <16 x i8>* 337 store <16 x i8> %vecins, <16 x i8>* %addr2 338 ret void 339} 340 341define void @volatile_update(<16 x i8>* %q, <16 x i8>* %p, i8 zeroext %s) { 342; CHECK-LABEL: @volatile_update( 343; CHECK-NEXT: entry: 344; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 345; CHECK-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 3 346; CHECK-NEXT: store volatile <16 x i8> [[VECINS0]], <16 x i8>* [[Q]], align 16 347; CHECK-NEXT: [[TMP1:%.*]] = load volatile <16 x i8>, <16 x i8>* [[P:%.*]], align 16 348; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[S]], i32 1 349; CHECK-NEXT: store <16 x i8> [[VECINS1]], <16 x i8>* [[P]], align 16 350; CHECK-NEXT: ret void 351; 352entry: 353 %0 = load <16 x i8>, <16 x i8>* %q 354 %vecins0 = insertelement <16 x i8> %0, i8 %s, i32 3 355 store volatile <16 x i8> %vecins0, <16 x i8>* %q 356 357 %1 = load volatile <16 x i8>, <16 x i8>* %p 358 %vecins1 = insertelement <16 x i8> %1, i8 %s, i32 1 359 store <16 x i8> %vecins1, <16 x i8>* %p 360 ret void 361} 362 363define void @insert_store_addr_differ(<16 x i8>* %p, <16 x i8>* %q, i8 %s) { 364; CHECK-LABEL: @insert_store_addr_differ( 365; CHECK-NEXT: entry: 366; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16 367; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3 368; CHECK-NEXT: store <16 x i8> [[INS]], <16 x i8>* [[Q:%.*]], align 16 369; CHECK-NEXT: ret void 370; 371entry: 372 %ld = load <16 x i8>, <16 x i8>* %p 373 %ins = insertelement <16 x i8> %ld, i8 %s, i32 3 374 store <16 x i8> %ins, <16 x i8>* %q 375 ret void 376} 377 378; We can't transform if any instr could modify memory in between. 379define void @insert_store_mem_modify(<16 x i8>* %p, <16 x i8>* %q, <16 x i8>* noalias %r, i8 %s, i32 %m) { 380; CHECK-LABEL: @insert_store_mem_modify( 381; CHECK-NEXT: entry: 382; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16 383; CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[Q:%.*]], align 16 384; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3 385; CHECK-NEXT: store <16 x i8> [[INS]], <16 x i8>* [[P]], align 16 386; CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[R:%.*]], align 16 387; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 7 388; CHECK-NEXT: store i8 [[S]], i8* [[TMP0]], align 1 389; CHECK-NEXT: [[PTR0:%.*]] = bitcast <16 x i8>* [[P]] to <4 x i32>* 390; CHECK-NEXT: [[LD3:%.*]] = load <4 x i32>, <4 x i32>* [[PTR0]], align 16 391; CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[P]], align 16 392; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i32> [[LD3]], i32 [[M:%.*]], i32 0 393; CHECK-NEXT: store <4 x i32> [[INS3]], <4 x i32>* [[PTR0]], align 16 394; CHECK-NEXT: ret void 395; 396entry: 397 ; p may alias q 398 %ld = load <16 x i8>, <16 x i8>* %p 399 store <16 x i8> zeroinitializer, <16 x i8>* %q 400 %ins = insertelement <16 x i8> %ld, i8 %s, i32 3 401 store <16 x i8> %ins, <16 x i8>* %p 402 403 ; p never aliases r 404 %ld2 = load <16 x i8>, <16 x i8>* %q 405 store <16 x i8> zeroinitializer, <16 x i8>* %r 406 %ins2 = insertelement <16 x i8> %ld2, i8 %s, i32 7 407 store <16 x i8> %ins2, <16 x i8>* %q 408 409 ; p must alias ptr0 410 %ptr0 = bitcast <16 x i8>* %p to <4 x i32>* 411 %ld3 = load <4 x i32>, <4 x i32>* %ptr0 412 store <16 x i8> zeroinitializer, <16 x i8>* %p 413 %ins3 = insertelement <4 x i32> %ld3, i32 %m, i32 0 414 store <4 x i32> %ins3, <4 x i32>* %ptr0 415 416 ret void 417} 418 419; Check cases when calls may modify memory 420define void @insert_store_with_call(<16 x i8>* %p, <16 x i8>* %q, i8 %s) { 421; CHECK-LABEL: @insert_store_with_call( 422; CHECK-NEXT: entry: 423; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16 424; CHECK-NEXT: call void @maywrite(<16 x i8>* [[P]]) 425; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3 426; CHECK-NEXT: store <16 x i8> [[INS]], <16 x i8>* [[P]], align 16 427; CHECK-NEXT: call void @foo() 428; CHECK-NEXT: call void @nowrite(<16 x i8>* [[P]]) 429; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P]], i32 0, i32 7 430; CHECK-NEXT: store i8 [[S]], i8* [[TMP0]], align 1 431; CHECK-NEXT: ret void 432; 433entry: 434 %ld = load <16 x i8>, <16 x i8>* %p 435 call void @maywrite(<16 x i8>* %p) 436 %ins = insertelement <16 x i8> %ld, i8 %s, i32 3 437 store <16 x i8> %ins, <16 x i8>* %p 438 call void @foo() ; Barrier 439 %ld2 = load <16 x i8>, <16 x i8>* %p 440 call void @nowrite(<16 x i8>* %p) 441 %ins2 = insertelement <16 x i8> %ld2, i8 %s, i32 7 442 store <16 x i8> %ins2, <16 x i8>* %p 443 ret void 444} 445 446declare void @foo() 447declare void @maywrite(<16 x i8>*) 448declare void @nowrite(<16 x i8>*) readonly 449 450; To test if number of instructions in-between exceeds the limit (default 30), 451; the combine will quit. 452define i32 @insert_store_maximum_scan_instrs(i32 %arg, i16* %arg1, <16 x i8>* %arg2, i8 zeroext %arg3) { 453; CHECK-LABEL: @insert_store_maximum_scan_instrs( 454; CHECK-NEXT: bb: 455; CHECK-NEXT: [[I:%.*]] = or i32 [[ARG:%.*]], 1 456; CHECK-NEXT: [[I4:%.*]] = load <16 x i8>, <16 x i8>* [[ARG2:%.*]], align 16 457; CHECK-NEXT: [[I5:%.*]] = tail call i32 @bar(i32 [[I]], i1 true) 458; CHECK-NEXT: [[I6:%.*]] = shl i32 [[ARG]], [[I5]] 459; CHECK-NEXT: [[I7:%.*]] = lshr i32 [[I6]], 26 460; CHECK-NEXT: [[I8:%.*]] = trunc i32 [[I7]] to i8 461; CHECK-NEXT: [[I9:%.*]] = and i8 [[I8]], 31 462; CHECK-NEXT: [[I10:%.*]] = lshr i32 [[I6]], 11 463; CHECK-NEXT: [[I11:%.*]] = and i32 [[I10]], 32767 464; CHECK-NEXT: [[I12:%.*]] = zext i8 [[I9]] to i64 465; CHECK-NEXT: [[I13:%.*]] = getelementptr inbounds i16, i16* [[ARG1:%.*]], i64 [[I12]] 466; CHECK-NEXT: [[I14:%.*]] = load i16, i16* [[I13]], align 2 467; CHECK-NEXT: [[I15:%.*]] = zext i16 [[I14]] to i32 468; CHECK-NEXT: [[I16:%.*]] = add nuw nsw i8 [[I9]], 1 469; CHECK-NEXT: [[I17:%.*]] = zext i8 [[I16]] to i64 470; CHECK-NEXT: [[I18:%.*]] = getelementptr inbounds i16, i16* [[ARG1]], i64 [[I17]] 471; CHECK-NEXT: [[I19:%.*]] = load i16, i16* [[I18]], align 2 472; CHECK-NEXT: [[I20:%.*]] = zext i16 [[I19]] to i32 473; CHECK-NEXT: [[I21:%.*]] = sub nsw i32 [[I20]], [[I15]] 474; CHECK-NEXT: [[I22:%.*]] = mul nsw i32 [[I11]], [[I21]] 475; CHECK-NEXT: [[I23:%.*]] = ashr i32 [[I22]], 15 476; CHECK-NEXT: [[I24:%.*]] = shl nuw nsw i32 [[I5]], 15 477; CHECK-NEXT: [[I25:%.*]] = xor i32 [[I24]], 1015808 478; CHECK-NEXT: [[I26:%.*]] = add nuw nsw i32 [[I25]], [[I15]] 479; CHECK-NEXT: [[I27:%.*]] = add nsw i32 [[I26]], [[I23]] 480; CHECK-NEXT: [[I28:%.*]] = sitofp i32 [[ARG]] to double 481; CHECK-NEXT: [[I29:%.*]] = tail call double @llvm.log2.f64(double [[I28]]) 482; CHECK-NEXT: [[I30:%.*]] = fptosi double [[I29]] to i32 483; CHECK-NEXT: [[I31:%.*]] = shl nsw i32 [[I30]], 15 484; CHECK-NEXT: [[I32:%.*]] = or i32 [[I31]], 4 485; CHECK-NEXT: [[I33:%.*]] = icmp eq i32 [[I27]], [[I32]] 486; CHECK-NEXT: [[I34:%.*]] = select i1 [[I33]], i32 [[ARG]], i32 [[I31]] 487; CHECK-NEXT: [[I35:%.*]] = lshr i32 [[I34]], 1 488; CHECK-NEXT: [[I36:%.*]] = insertelement <16 x i8> [[I4]], i8 [[ARG3:%.*]], i32 3 489; CHECK-NEXT: store <16 x i8> [[I36]], <16 x i8>* [[ARG2]], align 16 490; CHECK-NEXT: ret i32 [[I35]] 491; 492bb: 493 %i = or i32 %arg, 1 494 %i4 = load <16 x i8>, <16 x i8>* %arg2, align 16 495 %i5 = tail call i32 @bar(i32 %i, i1 true) 496 %i6 = shl i32 %arg, %i5 497 %i7 = lshr i32 %i6, 26 498 %i8 = trunc i32 %i7 to i8 499 %i9 = and i8 %i8, 31 500 %i10 = lshr i32 %i6, 11 501 %i11 = and i32 %i10, 32767 502 %i12 = zext i8 %i9 to i64 503 %i13 = getelementptr inbounds i16, i16* %arg1, i64 %i12 504 %i14 = load i16, i16* %i13, align 2 505 %i15 = zext i16 %i14 to i32 506 %i16 = add nuw nsw i8 %i9, 1 507 %i17 = zext i8 %i16 to i64 508 %i18 = getelementptr inbounds i16, i16* %arg1, i64 %i17 509 %i19 = load i16, i16* %i18, align 2 510 %i20 = zext i16 %i19 to i32 511 %i21 = sub nsw i32 %i20, %i15 512 %i22 = mul nsw i32 %i11, %i21 513 %i23 = ashr i32 %i22, 15 514 %i24 = shl nuw nsw i32 %i5, 15 515 %i25 = xor i32 %i24, 1015808 516 %i26 = add nuw nsw i32 %i25, %i15 517 %i27 = add nsw i32 %i26, %i23 518 %i28 = sitofp i32 %arg to double 519 %i29 = tail call double @llvm.log2.f64(double %i28) 520 %i30 = fptosi double %i29 to i32 521 %i31 = shl nsw i32 %i30, 15 522 %i32 = or i32 %i31, 4 523 %i33 = icmp eq i32 %i27, %i32 524 %i34 = select i1 %i33, i32 %arg, i32 %i31 525 %i35 = lshr i32 %i34, 1 526 %i36 = insertelement <16 x i8> %i4, i8 %arg3, i32 3 527 store <16 x i8> %i36, <16 x i8>* %arg2, align 16 528 ret i32 %i35 529} 530 531declare i32 @bar(i32, i1) readonly 532declare double @llvm.log2.f64(double) 533