1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -passes=vector-combine -data-layout=e < %s | FileCheck %s 3; RUN: opt -S -passes=vector-combine -data-layout=E < %s | FileCheck %s 4 5define void @insert_store(<16 x i8>* %q, i8 zeroext %s) { 6; CHECK-LABEL: @insert_store( 7; CHECK-NEXT: entry: 8; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 3 9; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 10; CHECK-NEXT: ret void 11; 12entry: 13 %0 = load <16 x i8>, <16 x i8>* %q 14 %vecins = insertelement <16 x i8> %0, i8 %s, i32 3 15 store <16 x i8> %vecins, <16 x i8>* %q, align 16 16 ret void 17} 18 19define void @insert_store_i16_align1(<8 x i16>* %q, i16 zeroext %s) { 20; CHECK-LABEL: @insert_store_i16_align1( 21; CHECK-NEXT: entry: 22; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[Q:%.*]], i32 0, i32 3 23; CHECK-NEXT: store i16 [[S:%.*]], i16* [[TMP0]], align 2 24; CHECK-NEXT: ret void 25; 26entry: 27 %0 = load <8 x i16>, <8 x i16>* %q 28 %vecins = insertelement <8 x i16> %0, i16 %s, i32 3 29 store <8 x i16> %vecins, <8 x i16>* %q, align 1 30 ret void 31} 32 33; To verify case when index is out of bounds 34define void @insert_store_outofbounds(<8 x i16>* %q, i16 zeroext %s) { 35; CHECK-LABEL: @insert_store_outofbounds( 36; CHECK-NEXT: entry: 37; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16 38; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 9 39; CHECK-NEXT: store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16 40; CHECK-NEXT: ret void 41; 42entry: 43 %0 = load <8 x i16>, <8 x i16>* %q 44 %vecins = insertelement <8 x i16> %0, i16 %s, i32 9 45 store <8 x i16> %vecins, <8 x i16>* %q 46 ret void 47} 48 49define void @insert_store_vscale(<vscale x 8 x i16>* %q, i16 zeroext %s) { 50; CHECK-LABEL: @insert_store_vscale( 51; CHECK-NEXT: entry: 52; CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 8 x i16>, <vscale x 8 x i16>* [[Q:%.*]], align 16 53; CHECK-NEXT: [[VECINS:%.*]] = insertelement <vscale x 8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3 54; CHECK-NEXT: store <vscale x 8 x i16> [[VECINS]], <vscale x 8 x i16>* [[Q]], align 16 55; CHECK-NEXT: ret void 56; 57entry: 58 %0 = load <vscale x 8 x i16>, <vscale x 8 x i16>* %q 59 %vecins = insertelement <vscale x 8 x i16> %0, i16 %s, i32 3 60 store <vscale x 8 x i16> %vecins, <vscale x 8 x i16>* %q 61 ret void 62} 63 64define void @insert_store_v9i4(<9 x i4>* %q, i4 zeroext %s) { 65; CHECK-LABEL: @insert_store_v9i4( 66; CHECK-NEXT: entry: 67; CHECK-NEXT: [[TMP0:%.*]] = load <9 x i4>, <9 x i4>* [[Q:%.*]], align 8 68; CHECK-NEXT: [[VECINS:%.*]] = insertelement <9 x i4> [[TMP0]], i4 [[S:%.*]], i32 3 69; CHECK-NEXT: store <9 x i4> [[VECINS]], <9 x i4>* [[Q]], align 1 70; CHECK-NEXT: ret void 71; 72entry: 73 %0 = load <9 x i4>, <9 x i4>* %q 74 %vecins = insertelement <9 x i4> %0, i4 %s, i32 3 75 store <9 x i4> %vecins, <9 x i4>* %q, align 1 76 ret void 77} 78 79define void @insert_store_v4i27(<4 x i27>* %q, i27 zeroext %s) { 80; CHECK-LABEL: @insert_store_v4i27( 81; CHECK-NEXT: entry: 82; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i27>, <4 x i27>* [[Q:%.*]], align 16 83; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x i27> [[TMP0]], i27 [[S:%.*]], i32 3 84; CHECK-NEXT: store <4 x i27> [[VECINS]], <4 x i27>* [[Q]], align 1 85; CHECK-NEXT: ret void 86; 87entry: 88 %0 = load <4 x i27>, <4 x i27>* %q 89 %vecins = insertelement <4 x i27> %0, i27 %s, i32 3 90 store <4 x i27> %vecins, <4 x i27>* %q, align 1 91 ret void 92} 93 94define void @insert_store_blk_differ(<8 x i16>* %q, i16 zeroext %s) { 95; CHECK-LABEL: @insert_store_blk_differ( 96; CHECK-NEXT: entry: 97; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16 98; CHECK-NEXT: br label [[CONT:%.*]] 99; CHECK: cont: 100; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3 101; CHECK-NEXT: store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16 102; CHECK-NEXT: ret void 103; 104entry: 105 %0 = load <8 x i16>, <8 x i16>* %q 106 br label %cont 107cont: 108 %vecins = insertelement <8 x i16> %0, i16 %s, i32 3 109 store <8 x i16> %vecins, <8 x i16>* %q 110 ret void 111} 112 113define void @insert_store_nonconst(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 114; CHECK-LABEL: @insert_store_nonconst( 115; CHECK-NEXT: entry: 116; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 117; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX:%.*]] 118; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 119; CHECK-NEXT: ret void 120; 121entry: 122 %0 = load <16 x i8>, <16 x i8>* %q 123 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx 124 store <16 x i8> %vecins, <16 x i8>* %q 125 ret void 126} 127 128; To verify align here is narrowed to scalar store size 129define void @insert_store_nonconst_large_alignment(<4 x i32>* %q, i32 zeroext %s, i32 %idx) { 130; CHECK-LABEL: @insert_store_nonconst_large_alignment( 131; CHECK-NEXT: entry: 132; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4 133; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 134; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[Q:%.*]], i32 0, i32 [[IDX]] 135; CHECK-NEXT: store i32 [[S:%.*]], i32* [[TMP0]], align 4 136; CHECK-NEXT: ret void 137; 138entry: 139 %cmp = icmp ult i32 %idx, 4 140 call void @llvm.assume(i1 %cmp) 141 %i = load <4 x i32>, <4 x i32>* %q, align 128 142 %vecins = insertelement <4 x i32> %i, i32 %s, i32 %idx 143 store <4 x i32> %vecins, <4 x i32>* %q, align 128 144 ret void 145} 146 147define void @insert_store_nonconst_align_maximum_8(<8 x i64>* %q, i64 %s, i32 %idx) { 148; CHECK-LABEL: @insert_store_nonconst_align_maximum_8( 149; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2 150; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 151; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]] 152; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 8 153; CHECK-NEXT: ret void 154; 155 %cmp = icmp ult i32 %idx, 2 156 call void @llvm.assume(i1 %cmp) 157 %i = load <8 x i64>, <8 x i64>* %q, align 8 158 %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx 159 store <8 x i64> %vecins, <8 x i64>* %q, align 8 160 ret void 161} 162 163define void @insert_store_nonconst_align_maximum_4(<8 x i64>* %q, i64 %s, i32 %idx) { 164; CHECK-LABEL: @insert_store_nonconst_align_maximum_4( 165; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2 166; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 167; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]] 168; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 4 169; CHECK-NEXT: ret void 170; 171 %cmp = icmp ult i32 %idx, 2 172 call void @llvm.assume(i1 %cmp) 173 %i = load <8 x i64>, <8 x i64>* %q, align 4 174 %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx 175 store <8 x i64> %vecins, <8 x i64>* %q, align 4 176 ret void 177} 178 179define void @insert_store_nonconst_align_larger(<8 x i64>* %q, i64 %s, i32 %idx) { 180; CHECK-LABEL: @insert_store_nonconst_align_larger( 181; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2 182; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 183; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]] 184; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 4 185; CHECK-NEXT: ret void 186; 187 %cmp = icmp ult i32 %idx, 2 188 call void @llvm.assume(i1 %cmp) 189 %i = load <8 x i64>, <8 x i64>* %q, align 4 190 %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx 191 store <8 x i64> %vecins, <8 x i64>* %q, align 2 192 ret void 193} 194 195define void @insert_store_nonconst_index_known_valid_by_assume(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 196; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_assume( 197; CHECK-NEXT: entry: 198; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4 199; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 200; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX]] 201; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 202; CHECK-NEXT: ret void 203; 204entry: 205 %cmp = icmp ult i32 %idx, 4 206 call void @llvm.assume(i1 %cmp) 207 %0 = load <16 x i8>, <16 x i8>* %q 208 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx 209 store <16 x i8> %vecins, <16 x i8>* %q 210 ret void 211} 212 213declare void @maythrow() readnone 214 215define void @insert_store_nonconst_index_not_known_valid_by_assume_after_load(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 216; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_assume_after_load( 217; CHECK-NEXT: entry: 218; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4 219; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 220; CHECK-NEXT: call void @maythrow() 221; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 222; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]] 223; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 224; CHECK-NEXT: ret void 225; 226entry: 227 %cmp = icmp ult i32 %idx, 4 228 %0 = load <16 x i8>, <16 x i8>* %q 229 call void @maythrow() 230 call void @llvm.assume(i1 %cmp) 231 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx 232 store <16 x i8> %vecins, <16 x i8>* %q 233 ret void 234} 235 236define void @insert_store_nonconst_index_not_known_valid_by_assume(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 237; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_assume( 238; CHECK-NEXT: entry: 239; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 17 240; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 241; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 242; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]] 243; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 244; CHECK-NEXT: ret void 245; 246entry: 247 %cmp = icmp ult i32 %idx, 17 248 call void @llvm.assume(i1 %cmp) 249 %0 = load <16 x i8>, <16 x i8>* %q 250 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx 251 store <16 x i8> %vecins, <16 x i8>* %q 252 ret void 253} 254 255declare void @llvm.assume(i1) 256 257define void @insert_store_nonconst_index_known_noundef_and_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) { 258; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_and_valid_by_and( 259; CHECK-NEXT: entry: 260; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7 261; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] 262; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 263; CHECK-NEXT: ret void 264; 265entry: 266 %0 = load <16 x i8>, <16 x i8>* %q 267 %idx.clamped = and i32 %idx, 7 268 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped 269 store <16 x i8> %vecins, <16 x i8>* %q 270 ret void 271} 272 273define void @insert_store_nonconst_index_base_frozen_and_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 274; CHECK-LABEL: @insert_store_nonconst_index_base_frozen_and_valid_by_and( 275; CHECK-NEXT: entry: 276; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i32 [[IDX:%.*]] 277; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX_FROZEN]], 7 278; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] 279; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 280; CHECK-NEXT: ret void 281; 282entry: 283 %0 = load <16 x i8>, <16 x i8>* %q 284 %idx.frozen = freeze i32 %idx 285 %idx.clamped = and i32 %idx.frozen, 7 286 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped 287 store <16 x i8> %vecins, <16 x i8>* %q 288 ret void 289} 290 291define void @insert_store_nonconst_index_frozen_and_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 292; CHECK-LABEL: @insert_store_nonconst_index_frozen_and_valid_by_and( 293; CHECK-NEXT: entry: 294; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 295; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7 296; CHECK-NEXT: [[IDX_CLAMPED_FROZEN:%.*]] = freeze i32 [[IDX_CLAMPED]] 297; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED_FROZEN]] 298; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 299; CHECK-NEXT: ret void 300; 301entry: 302 %0 = load <16 x i8>, <16 x i8>* %q 303 %idx.clamped = and i32 %idx, 7 304 %idx.clamped.frozen = freeze i32 %idx.clamped 305 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped.frozen 306 store <16 x i8> %vecins, <16 x i8>* %q 307 ret void 308} 309 310define void @insert_store_nonconst_index_known_valid_by_and_but_may_be_poison(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 311; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_and_but_may_be_poison( 312; CHECK-NEXT: entry: 313; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[IDX:%.*]] 314; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[TMP0]], 7 315; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] 316; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP1]], align 1 317; CHECK-NEXT: ret void 318; 319entry: 320 %0 = load <16 x i8>, <16 x i8>* %q 321 %idx.clamped = and i32 %idx, 7 322 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped 323 store <16 x i8> %vecins, <16 x i8>* %q 324 ret void 325} 326 327define void @insert_store_nonconst_index_not_known_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 328; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_and( 329; CHECK-NEXT: entry: 330; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 331; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 16 332; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]] 333; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 334; CHECK-NEXT: ret void 335; 336entry: 337 %0 = load <16 x i8>, <16 x i8>* %q 338 %idx.clamped = and i32 %idx, 16 339 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped 340 store <16 x i8> %vecins, <16 x i8>* %q 341 ret void 342} 343 344define void @insert_store_nonconst_index_known_noundef_not_known_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) { 345; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_not_known_valid_by_and( 346; CHECK-NEXT: entry: 347; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 348; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 16 349; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]] 350; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 351; CHECK-NEXT: ret void 352; 353entry: 354 %0 = load <16 x i8>, <16 x i8>* %q 355 %idx.clamped = and i32 %idx, 16 356 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped 357 store <16 x i8> %vecins, <16 x i8>* %q 358 ret void 359} 360define void @insert_store_nonconst_index_known_noundef_and_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) { 361; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_and_valid_by_urem( 362; CHECK-NEXT: entry: 363; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16 364; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] 365; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 366; CHECK-NEXT: ret void 367; 368entry: 369 %0 = load <16 x i8>, <16 x i8>* %q 370 %idx.clamped = urem i32 %idx, 16 371 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped 372 store <16 x i8> %vecins, <16 x i8>* %q 373 ret void 374} 375 376define void @insert_store_nonconst_index_base_frozen_and_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 377; CHECK-LABEL: @insert_store_nonconst_index_base_frozen_and_valid_by_urem( 378; CHECK-NEXT: entry: 379; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i32 [[IDX:%.*]] 380; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX_FROZEN]], 16 381; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] 382; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 383; CHECK-NEXT: ret void 384; 385entry: 386 %0 = load <16 x i8>, <16 x i8>* %q 387 %idx.frozen = freeze i32 %idx 388 %idx.clamped = urem i32 %idx.frozen, 16 389 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped 390 store <16 x i8> %vecins, <16 x i8>* %q 391 ret void 392} 393 394define void @insert_store_nonconst_index_frozen_and_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 395; CHECK-LABEL: @insert_store_nonconst_index_frozen_and_valid_by_urem( 396; CHECK-NEXT: entry: 397; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 398; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16 399; CHECK-NEXT: [[IDX_CLAMPED_FROZEN:%.*]] = freeze i32 [[IDX_CLAMPED]] 400; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED_FROZEN]] 401; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 402; CHECK-NEXT: ret void 403; 404entry: 405 %0 = load <16 x i8>, <16 x i8>* %q 406 %idx.clamped = urem i32 %idx, 16 407 %idx.clamped.frozen = freeze i32 %idx.clamped 408 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped.frozen 409 store <16 x i8> %vecins, <16 x i8>* %q 410 ret void 411} 412 413define void @insert_store_nonconst_index_known_valid_by_urem_but_may_be_poison(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 414; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_urem_but_may_be_poison( 415; CHECK-NEXT: entry: 416; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[IDX:%.*]] 417; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[TMP0]], 16 418; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] 419; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP1]], align 1 420; CHECK-NEXT: ret void 421; 422entry: 423 %0 = load <16 x i8>, <16 x i8>* %q 424 %idx.clamped = urem i32 %idx, 16 425 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped 426 store <16 x i8> %vecins, <16 x i8>* %q 427 ret void 428} 429 430define void @insert_store_nonconst_index_not_known_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { 431; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_urem( 432; CHECK-NEXT: entry: 433; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 434; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 17 435; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]] 436; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 437; CHECK-NEXT: ret void 438; 439entry: 440 %0 = load <16 x i8>, <16 x i8>* %q 441 %idx.clamped = urem i32 %idx, 17 442 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped 443 store <16 x i8> %vecins, <16 x i8>* %q 444 ret void 445} 446 447define void @insert_store_nonconst_index_known_noundef_not_known_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) { 448; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_not_known_valid_by_urem( 449; CHECK-NEXT: entry: 450; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 451; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 17 452; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]] 453; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 454; CHECK-NEXT: ret void 455; 456entry: 457 %0 = load <16 x i8>, <16 x i8>* %q 458 %idx.clamped = urem i32 %idx, 17 459 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped 460 store <16 x i8> %vecins, <16 x i8>* %q 461 ret void 462} 463 464define void @insert_store_ptr_strip(<16 x i8>* %q, i8 zeroext %s) { 465; CHECK-LABEL: @insert_store_ptr_strip( 466; CHECK-NEXT: entry: 467; CHECK-NEXT: [[ADDR0:%.*]] = bitcast <16 x i8>* [[Q:%.*]] to <2 x i64>* 468; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[ADDR0]], i64 0 469; CHECK-NEXT: [[ADDR2:%.*]] = bitcast <2 x i64>* [[ADDR1]] to <16 x i8>* 470; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[ADDR2]], i32 0, i32 3 471; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 472; CHECK-NEXT: ret void 473; 474entry: 475 %0 = load <16 x i8>, <16 x i8>* %q 476 %vecins = insertelement <16 x i8> %0, i8 %s, i32 3 477 %addr0 = bitcast <16 x i8>* %q to <2 x i64>* 478 %addr1 = getelementptr <2 x i64>, <2 x i64>* %addr0, i64 0 479 %addr2 = bitcast <2 x i64>* %addr1 to <16 x i8>* 480 store <16 x i8> %vecins, <16 x i8>* %addr2 481 ret void 482} 483 484define void @volatile_update(<16 x i8>* %q, <16 x i8>* %p, i8 zeroext %s) { 485; CHECK-LABEL: @volatile_update( 486; CHECK-NEXT: entry: 487; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 488; CHECK-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 3 489; CHECK-NEXT: store volatile <16 x i8> [[VECINS0]], <16 x i8>* [[Q]], align 16 490; CHECK-NEXT: [[TMP1:%.*]] = load volatile <16 x i8>, <16 x i8>* [[P:%.*]], align 16 491; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[S]], i32 1 492; CHECK-NEXT: store <16 x i8> [[VECINS1]], <16 x i8>* [[P]], align 16 493; CHECK-NEXT: ret void 494; 495entry: 496 %0 = load <16 x i8>, <16 x i8>* %q 497 %vecins0 = insertelement <16 x i8> %0, i8 %s, i32 3 498 store volatile <16 x i8> %vecins0, <16 x i8>* %q 499 500 %1 = load volatile <16 x i8>, <16 x i8>* %p 501 %vecins1 = insertelement <16 x i8> %1, i8 %s, i32 1 502 store <16 x i8> %vecins1, <16 x i8>* %p 503 ret void 504} 505 506define void @insert_store_addr_differ(<16 x i8>* %p, <16 x i8>* %q, i8 %s) { 507; CHECK-LABEL: @insert_store_addr_differ( 508; CHECK-NEXT: entry: 509; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16 510; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3 511; CHECK-NEXT: store <16 x i8> [[INS]], <16 x i8>* [[Q:%.*]], align 16 512; CHECK-NEXT: ret void 513; 514entry: 515 %ld = load <16 x i8>, <16 x i8>* %p 516 %ins = insertelement <16 x i8> %ld, i8 %s, i32 3 517 store <16 x i8> %ins, <16 x i8>* %q 518 ret void 519} 520 521; We can't transform if any instr could modify memory in between. 522define void @insert_store_mem_modify(<16 x i8>* %p, <16 x i8>* %q, <16 x i8>* noalias %r, i8 %s, i32 %m) { 523; CHECK-LABEL: @insert_store_mem_modify( 524; CHECK-NEXT: entry: 525; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16 526; CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[Q:%.*]], align 16 527; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3 528; CHECK-NEXT: store <16 x i8> [[INS]], <16 x i8>* [[P]], align 16 529; CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[R:%.*]], align 16 530; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 7 531; CHECK-NEXT: store i8 [[S]], i8* [[TMP0]], align 1 532; CHECK-NEXT: [[PTR0:%.*]] = bitcast <16 x i8>* [[P]] to <4 x i32>* 533; CHECK-NEXT: [[LD3:%.*]] = load <4 x i32>, <4 x i32>* [[PTR0]], align 16 534; CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[P]], align 16 535; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i32> [[LD3]], i32 [[M:%.*]], i32 0 536; CHECK-NEXT: store <4 x i32> [[INS3]], <4 x i32>* [[PTR0]], align 16 537; CHECK-NEXT: ret void 538; 539entry: 540 ; p may alias q 541 %ld = load <16 x i8>, <16 x i8>* %p 542 store <16 x i8> zeroinitializer, <16 x i8>* %q 543 %ins = insertelement <16 x i8> %ld, i8 %s, i32 3 544 store <16 x i8> %ins, <16 x i8>* %p 545 546 ; p never aliases r 547 %ld2 = load <16 x i8>, <16 x i8>* %q 548 store <16 x i8> zeroinitializer, <16 x i8>* %r 549 %ins2 = insertelement <16 x i8> %ld2, i8 %s, i32 7 550 store <16 x i8> %ins2, <16 x i8>* %q 551 552 ; p must alias ptr0 553 %ptr0 = bitcast <16 x i8>* %p to <4 x i32>* 554 %ld3 = load <4 x i32>, <4 x i32>* %ptr0 555 store <16 x i8> zeroinitializer, <16 x i8>* %p 556 %ins3 = insertelement <4 x i32> %ld3, i32 %m, i32 0 557 store <4 x i32> %ins3, <4 x i32>* %ptr0 558 559 ret void 560} 561 562; Check cases when calls may modify memory 563define void @insert_store_with_call(<16 x i8>* %p, <16 x i8>* %q, i8 %s) { 564; CHECK-LABEL: @insert_store_with_call( 565; CHECK-NEXT: entry: 566; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16 567; CHECK-NEXT: call void @maywrite(<16 x i8>* [[P]]) 568; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3 569; CHECK-NEXT: store <16 x i8> [[INS]], <16 x i8>* [[P]], align 16 570; CHECK-NEXT: call void @foo() 571; CHECK-NEXT: call void @nowrite(<16 x i8>* [[P]]) 572; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P]], i32 0, i32 7 573; CHECK-NEXT: store i8 [[S]], i8* [[TMP0]], align 1 574; CHECK-NEXT: ret void 575; 576entry: 577 %ld = load <16 x i8>, <16 x i8>* %p 578 call void @maywrite(<16 x i8>* %p) 579 %ins = insertelement <16 x i8> %ld, i8 %s, i32 3 580 store <16 x i8> %ins, <16 x i8>* %p 581 call void @foo() ; Barrier 582 %ld2 = load <16 x i8>, <16 x i8>* %p 583 call void @nowrite(<16 x i8>* %p) 584 %ins2 = insertelement <16 x i8> %ld2, i8 %s, i32 7 585 store <16 x i8> %ins2, <16 x i8>* %p 586 ret void 587} 588 589declare void @foo() 590declare void @maywrite(<16 x i8>*) 591declare void @nowrite(<16 x i8>*) readonly 592 593; To test if number of instructions in-between exceeds the limit (default 30), 594; the combine will quit. 595define i32 @insert_store_maximum_scan_instrs(i32 %arg, i16* %arg1, <16 x i8>* %arg2, i8 zeroext %arg3) { 596; CHECK-LABEL: @insert_store_maximum_scan_instrs( 597; CHECK-NEXT: bb: 598; CHECK-NEXT: [[I:%.*]] = or i32 [[ARG:%.*]], 1 599; CHECK-NEXT: [[I4:%.*]] = load <16 x i8>, <16 x i8>* [[ARG2:%.*]], align 16 600; CHECK-NEXT: [[I5:%.*]] = tail call i32 @bar(i32 [[I]], i1 true) 601; CHECK-NEXT: [[I6:%.*]] = shl i32 [[ARG]], [[I5]] 602; CHECK-NEXT: [[I7:%.*]] = lshr i32 [[I6]], 26 603; CHECK-NEXT: [[I8:%.*]] = trunc i32 [[I7]] to i8 604; CHECK-NEXT: [[I9:%.*]] = and i8 [[I8]], 31 605; CHECK-NEXT: [[I10:%.*]] = lshr i32 [[I6]], 11 606; CHECK-NEXT: [[I11:%.*]] = and i32 [[I10]], 32767 607; CHECK-NEXT: [[I12:%.*]] = zext i8 [[I9]] to i64 608; CHECK-NEXT: [[I13:%.*]] = getelementptr inbounds i16, i16* [[ARG1:%.*]], i64 [[I12]] 609; CHECK-NEXT: [[I14:%.*]] = load i16, i16* [[I13]], align 2 610; CHECK-NEXT: [[I15:%.*]] = zext i16 [[I14]] to i32 611; CHECK-NEXT: [[I16:%.*]] = add nuw nsw i8 [[I9]], 1 612; CHECK-NEXT: [[I17:%.*]] = zext i8 [[I16]] to i64 613; CHECK-NEXT: [[I18:%.*]] = getelementptr inbounds i16, i16* [[ARG1]], i64 [[I17]] 614; CHECK-NEXT: [[I19:%.*]] = load i16, i16* [[I18]], align 2 615; CHECK-NEXT: [[I20:%.*]] = zext i16 [[I19]] to i32 616; CHECK-NEXT: [[I21:%.*]] = sub nsw i32 [[I20]], [[I15]] 617; CHECK-NEXT: [[I22:%.*]] = mul nsw i32 [[I11]], [[I21]] 618; CHECK-NEXT: [[I23:%.*]] = ashr i32 [[I22]], 15 619; CHECK-NEXT: [[I24:%.*]] = shl nuw nsw i32 [[I5]], 15 620; CHECK-NEXT: [[I25:%.*]] = xor i32 [[I24]], 1015808 621; CHECK-NEXT: [[I26:%.*]] = add nuw nsw i32 [[I25]], [[I15]] 622; CHECK-NEXT: [[I27:%.*]] = add nsw i32 [[I26]], [[I23]] 623; CHECK-NEXT: [[I28:%.*]] = sitofp i32 [[ARG]] to double 624; CHECK-NEXT: [[I29:%.*]] = tail call double @llvm.log2.f64(double [[I28]]) 625; CHECK-NEXT: [[I30:%.*]] = fptosi double [[I29]] to i32 626; CHECK-NEXT: [[I31:%.*]] = shl nsw i32 [[I30]], 15 627; CHECK-NEXT: [[I32:%.*]] = or i32 [[I31]], 4 628; CHECK-NEXT: [[I33:%.*]] = icmp eq i32 [[I27]], [[I32]] 629; CHECK-NEXT: [[I34:%.*]] = select i1 [[I33]], i32 [[ARG]], i32 [[I31]] 630; CHECK-NEXT: [[I35:%.*]] = lshr i32 [[I34]], 1 631; CHECK-NEXT: [[I36:%.*]] = insertelement <16 x i8> [[I4]], i8 [[ARG3:%.*]], i32 3 632; CHECK-NEXT: store <16 x i8> [[I36]], <16 x i8>* [[ARG2]], align 16 633; CHECK-NEXT: ret i32 [[I35]] 634; 635bb: 636 %i = or i32 %arg, 1 637 %i4 = load <16 x i8>, <16 x i8>* %arg2, align 16 638 %i5 = tail call i32 @bar(i32 %i, i1 true) 639 %i6 = shl i32 %arg, %i5 640 %i7 = lshr i32 %i6, 26 641 %i8 = trunc i32 %i7 to i8 642 %i9 = and i8 %i8, 31 643 %i10 = lshr i32 %i6, 11 644 %i11 = and i32 %i10, 32767 645 %i12 = zext i8 %i9 to i64 646 %i13 = getelementptr inbounds i16, i16* %arg1, i64 %i12 647 %i14 = load i16, i16* %i13, align 2 648 %i15 = zext i16 %i14 to i32 649 %i16 = add nuw nsw i8 %i9, 1 650 %i17 = zext i8 %i16 to i64 651 %i18 = getelementptr inbounds i16, i16* %arg1, i64 %i17 652 %i19 = load i16, i16* %i18, align 2 653 %i20 = zext i16 %i19 to i32 654 %i21 = sub nsw i32 %i20, %i15 655 %i22 = mul nsw i32 %i11, %i21 656 %i23 = ashr i32 %i22, 15 657 %i24 = shl nuw nsw i32 %i5, 15 658 %i25 = xor i32 %i24, 1015808 659 %i26 = add nuw nsw i32 %i25, %i15 660 %i27 = add nsw i32 %i26, %i23 661 %i28 = sitofp i32 %arg to double 662 %i29 = tail call double @llvm.log2.f64(double %i28) 663 %i30 = fptosi double %i29 to i32 664 %i31 = shl nsw i32 %i30, 15 665 %i32 = or i32 %i31, 4 666 %i33 = icmp eq i32 %i27, %i32 667 %i34 = select i1 %i33, i32 %arg, i32 %i31 668 %i35 = lshr i32 %i34, 1 669 %i36 = insertelement <16 x i8> %i4, i8 %arg3, i32 3 670 store <16 x i8> %i36, <16 x i8>* %arg2, align 16 671 ret i32 %i35 672} 673 674declare i32 @bar(i32, i1) readonly 675declare double @llvm.log2.f64(double) 676