1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -passes=vector-combine -data-layout=e < %s | FileCheck %s
3; RUN: opt -S -passes=vector-combine -data-layout=E < %s | FileCheck %s
4
5define void @insert_store(<16 x i8>* %q, i8 zeroext %s) {
6; CHECK-LABEL: @insert_store(
7; CHECK-NEXT:  entry:
8; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 3
9; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP0]], align 1
10; CHECK-NEXT:    ret void
11;
12entry:
13  %0 = load <16 x i8>, <16 x i8>* %q
14  %vecins = insertelement <16 x i8> %0, i8 %s, i32 3
15  store <16 x i8> %vecins, <16 x i8>* %q, align 16
16  ret void
17}
18
19define void @insert_store_i16_align1(<8 x i16>* %q, i16 zeroext %s) {
20; CHECK-LABEL: @insert_store_i16_align1(
21; CHECK-NEXT:  entry:
22; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[Q:%.*]], i32 0, i32 3
23; CHECK-NEXT:    store i16 [[S:%.*]], i16* [[TMP0]], align 2
24; CHECK-NEXT:    ret void
25;
26entry:
27  %0 = load <8 x i16>, <8 x i16>* %q
28  %vecins = insertelement <8 x i16> %0, i16 %s, i32 3
29  store <8 x i16> %vecins, <8 x i16>* %q, align 1
30  ret void
31}
32
33; To verify case when index is out of bounds
34define void @insert_store_outofbounds(<8 x i16>* %q, i16 zeroext %s) {
35; CHECK-LABEL: @insert_store_outofbounds(
36; CHECK-NEXT:  entry:
37; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16
38; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 9
39; CHECK-NEXT:    store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16
40; CHECK-NEXT:    ret void
41;
42entry:
43  %0 = load <8 x i16>, <8 x i16>* %q
44  %vecins = insertelement <8 x i16> %0, i16 %s, i32 9
45  store <8 x i16> %vecins, <8 x i16>* %q
46  ret void
47}
48
49define void @insert_store_vscale(<vscale x 8 x i16>* %q, i16 zeroext %s) {
50; CHECK-LABEL: @insert_store_vscale(
51; CHECK-NEXT:  entry:
52; CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 8 x i16>, <vscale x 8 x i16>* [[Q:%.*]], align 16
53; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <vscale x 8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3
54; CHECK-NEXT:    store <vscale x 8 x i16> [[VECINS]], <vscale x 8 x i16>* [[Q]], align 16
55; CHECK-NEXT:    ret void
56;
57entry:
58  %0 = load <vscale x 8 x i16>, <vscale x 8 x i16>* %q
59  %vecins = insertelement <vscale x 8 x i16> %0, i16 %s, i32 3
60  store <vscale x 8 x i16> %vecins, <vscale x 8 x i16>* %q
61  ret void
62}
63
64define void @insert_store_v9i4(<9 x i4>* %q, i4 zeroext %s) {
65; CHECK-LABEL: @insert_store_v9i4(
66; CHECK-NEXT:  entry:
67; CHECK-NEXT:    [[TMP0:%.*]] = load <9 x i4>, <9 x i4>* [[Q:%.*]], align 8
68; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <9 x i4> [[TMP0]], i4 [[S:%.*]], i32 3
69; CHECK-NEXT:    store <9 x i4> [[VECINS]], <9 x i4>* [[Q]], align 1
70; CHECK-NEXT:    ret void
71;
72entry:
73  %0 = load <9 x i4>, <9 x i4>* %q
74  %vecins = insertelement <9 x i4> %0, i4 %s, i32 3
75  store <9 x i4> %vecins, <9 x i4>* %q, align 1
76  ret void
77}
78
79define void @insert_store_v4i27(<4 x i27>* %q, i27 zeroext %s) {
80; CHECK-LABEL: @insert_store_v4i27(
81; CHECK-NEXT:  entry:
82; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i27>, <4 x i27>* [[Q:%.*]], align 16
83; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x i27> [[TMP0]], i27 [[S:%.*]], i32 3
84; CHECK-NEXT:    store <4 x i27> [[VECINS]], <4 x i27>* [[Q]], align 1
85; CHECK-NEXT:    ret void
86;
87entry:
88  %0 = load <4 x i27>, <4 x i27>* %q
89  %vecins = insertelement <4 x i27> %0, i27 %s, i32 3
90  store <4 x i27> %vecins, <4 x i27>* %q, align 1
91  ret void
92}
93
94define void @insert_store_blk_differ(<8 x i16>* %q, i16 zeroext %s) {
95; CHECK-LABEL: @insert_store_blk_differ(
96; CHECK-NEXT:  entry:
97; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16
98; CHECK-NEXT:    br label [[CONT:%.*]]
99; CHECK:       cont:
100; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3
101; CHECK-NEXT:    store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16
102; CHECK-NEXT:    ret void
103;
104entry:
105  %0 = load <8 x i16>, <8 x i16>* %q
106  br label %cont
107cont:
108  %vecins = insertelement <8 x i16> %0, i16 %s, i32 3
109  store <8 x i16> %vecins, <8 x i16>* %q
110  ret void
111}
112
113define void @insert_store_nonconst(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
114; CHECK-LABEL: @insert_store_nonconst(
115; CHECK-NEXT:  entry:
116; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
117; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX:%.*]]
118; CHECK-NEXT:    store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
119; CHECK-NEXT:    ret void
120;
121entry:
122  %0 = load <16 x i8>, <16 x i8>* %q
123  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
124  store <16 x i8> %vecins, <16 x i8>* %q
125  ret void
126}
127
128; To verify align here is narrowed to scalar store size
129define void @insert_store_nonconst_large_alignment(<4 x i32>* %q, i32 zeroext %s, i32 %idx) {
130; CHECK-LABEL: @insert_store_nonconst_large_alignment(
131; CHECK-NEXT:  entry:
132; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
133; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
134; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[Q:%.*]], i32 0, i32 [[IDX]]
135; CHECK-NEXT:    store i32 [[S:%.*]], i32* [[TMP0]], align 4
136; CHECK-NEXT:    ret void
137;
138entry:
139  %cmp = icmp ult i32 %idx, 4
140  call void @llvm.assume(i1 %cmp)
141  %i = load <4 x i32>, <4 x i32>* %q, align 128
142  %vecins = insertelement <4 x i32> %i, i32 %s, i32 %idx
143  store <4 x i32> %vecins, <4 x i32>* %q, align 128
144  ret void
145}
146
147define void @insert_store_nonconst_align_maximum_8(<8 x i64>* %q, i64 %s, i32 %idx) {
148; CHECK-LABEL: @insert_store_nonconst_align_maximum_8(
149; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
150; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
151; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]]
152; CHECK-NEXT:    store i64 [[S:%.*]], i64* [[TMP1]], align 8
153; CHECK-NEXT:    ret void
154;
155  %cmp = icmp ult i32 %idx, 2
156  call void @llvm.assume(i1 %cmp)
157  %i = load <8 x i64>, <8 x i64>* %q, align 8
158  %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx
159  store <8 x i64> %vecins, <8 x i64>* %q, align 8
160  ret void
161}
162
163define void @insert_store_nonconst_align_maximum_4(<8 x i64>* %q, i64 %s, i32 %idx) {
164; CHECK-LABEL: @insert_store_nonconst_align_maximum_4(
165; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
166; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
167; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]]
168; CHECK-NEXT:    store i64 [[S:%.*]], i64* [[TMP1]], align 4
169; CHECK-NEXT:    ret void
170;
171  %cmp = icmp ult i32 %idx, 2
172  call void @llvm.assume(i1 %cmp)
173  %i = load <8 x i64>, <8 x i64>* %q, align 4
174  %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx
175  store <8 x i64> %vecins, <8 x i64>* %q, align 4
176  ret void
177}
178
179define void @insert_store_nonconst_align_larger(<8 x i64>* %q, i64 %s, i32 %idx) {
180; CHECK-LABEL: @insert_store_nonconst_align_larger(
181; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
182; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
183; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]]
184; CHECK-NEXT:    store i64 [[S:%.*]], i64* [[TMP1]], align 4
185; CHECK-NEXT:    ret void
186;
187  %cmp = icmp ult i32 %idx, 2
188  call void @llvm.assume(i1 %cmp)
189  %i = load <8 x i64>, <8 x i64>* %q, align 4
190  %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx
191  store <8 x i64> %vecins, <8 x i64>* %q, align 2
192  ret void
193}
194
195define void @insert_store_nonconst_index_known_valid_by_assume(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
196; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_assume(
197; CHECK-NEXT:  entry:
198; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
199; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
200; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX]]
201; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP0]], align 1
202; CHECK-NEXT:    ret void
203;
204entry:
205  %cmp = icmp ult i32 %idx, 4
206  call void @llvm.assume(i1 %cmp)
207  %0 = load <16 x i8>, <16 x i8>* %q
208  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
209  store <16 x i8> %vecins, <16 x i8>* %q
210  ret void
211}
212
213declare void @maythrow() readnone
214
215define void @insert_store_nonconst_index_not_known_valid_by_assume_after_load(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
216; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_assume_after_load(
217; CHECK-NEXT:  entry:
218; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
219; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
220; CHECK-NEXT:    call void @maythrow()
221; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
222; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]]
223; CHECK-NEXT:    store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
224; CHECK-NEXT:    ret void
225;
226entry:
227  %cmp = icmp ult i32 %idx, 4
228  %0 = load <16 x i8>, <16 x i8>* %q
229  call void @maythrow()
230  call void @llvm.assume(i1 %cmp)
231  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
232  store <16 x i8> %vecins, <16 x i8>* %q
233  ret void
234}
235
236define void @insert_store_nonconst_index_not_known_valid_by_assume(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
237; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_assume(
238; CHECK-NEXT:  entry:
239; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 17
240; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
241; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
242; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]]
243; CHECK-NEXT:    store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
244; CHECK-NEXT:    ret void
245;
246entry:
247  %cmp = icmp ult i32 %idx, 17
248  call void @llvm.assume(i1 %cmp)
249  %0 = load <16 x i8>, <16 x i8>* %q
250  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
251  store <16 x i8> %vecins, <16 x i8>* %q
252  ret void
253}
254
255declare void @llvm.assume(i1)
256
257define void @insert_store_nonconst_index_known_noundef_and_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) {
258; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_and_valid_by_and(
259; CHECK-NEXT:  entry:
260; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7
261; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
262; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP0]], align 1
263; CHECK-NEXT:    ret void
264;
265entry:
266  %0 = load <16 x i8>, <16 x i8>* %q
267  %idx.clamped = and i32 %idx, 7
268  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
269  store <16 x i8> %vecins, <16 x i8>* %q
270  ret void
271}
272
273define void @insert_store_nonconst_index_base_frozen_and_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
274; CHECK-LABEL: @insert_store_nonconst_index_base_frozen_and_valid_by_and(
275; CHECK-NEXT:  entry:
276; CHECK-NEXT:    [[IDX_FROZEN:%.*]] = freeze i32 [[IDX:%.*]]
277; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = and i32 [[IDX_FROZEN]], 7
278; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
279; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP0]], align 1
280; CHECK-NEXT:    ret void
281;
282entry:
283  %0 = load <16 x i8>, <16 x i8>* %q
284  %idx.frozen = freeze i32 %idx
285  %idx.clamped = and i32 %idx.frozen, 7
286  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
287  store <16 x i8> %vecins, <16 x i8>* %q
288  ret void
289}
290
291define void @insert_store_nonconst_index_frozen_and_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
292; CHECK-LABEL: @insert_store_nonconst_index_frozen_and_valid_by_and(
293; CHECK-NEXT:  entry:
294; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
295; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7
296; CHECK-NEXT:    [[IDX_CLAMPED_FROZEN:%.*]] = freeze i32 [[IDX_CLAMPED]]
297; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED_FROZEN]]
298; CHECK-NEXT:    store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
299; CHECK-NEXT:    ret void
300;
301entry:
302  %0 = load <16 x i8>, <16 x i8>* %q
303  %idx.clamped = and i32 %idx, 7
304  %idx.clamped.frozen = freeze i32 %idx.clamped
305  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped.frozen
306  store <16 x i8> %vecins, <16 x i8>* %q
307  ret void
308}
309
310define void @insert_store_nonconst_index_known_valid_by_and_but_may_be_poison(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
311; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_and_but_may_be_poison(
312; CHECK-NEXT:  entry:
313; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[IDX:%.*]]
314; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = and i32 [[TMP0]], 7
315; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
316; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP1]], align 1
317; CHECK-NEXT:    ret void
318;
319entry:
320  %0 = load <16 x i8>, <16 x i8>* %q
321  %idx.clamped = and i32 %idx, 7
322  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
323  store <16 x i8> %vecins, <16 x i8>* %q
324  ret void
325}
326
327define void @insert_store_nonconst_index_not_known_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
328; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_and(
329; CHECK-NEXT:  entry:
330; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
331; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 16
332; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
333; CHECK-NEXT:    store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
334; CHECK-NEXT:    ret void
335;
336entry:
337  %0 = load <16 x i8>, <16 x i8>* %q
338  %idx.clamped = and i32 %idx, 16
339  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
340  store <16 x i8> %vecins, <16 x i8>* %q
341  ret void
342}
343
344define void @insert_store_nonconst_index_known_noundef_not_known_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) {
345; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_not_known_valid_by_and(
346; CHECK-NEXT:  entry:
347; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
348; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 16
349; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
350; CHECK-NEXT:    store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
351; CHECK-NEXT:    ret void
352;
353entry:
354  %0 = load <16 x i8>, <16 x i8>* %q
355  %idx.clamped = and i32 %idx, 16
356  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
357  store <16 x i8> %vecins, <16 x i8>* %q
358  ret void
359}
360define void @insert_store_nonconst_index_known_noundef_and_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) {
361; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_and_valid_by_urem(
362; CHECK-NEXT:  entry:
363; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16
364; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
365; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP0]], align 1
366; CHECK-NEXT:    ret void
367;
368entry:
369  %0 = load <16 x i8>, <16 x i8>* %q
370  %idx.clamped = urem i32 %idx, 16
371  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
372  store <16 x i8> %vecins, <16 x i8>* %q
373  ret void
374}
375
376define void @insert_store_nonconst_index_base_frozen_and_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
377; CHECK-LABEL: @insert_store_nonconst_index_base_frozen_and_valid_by_urem(
378; CHECK-NEXT:  entry:
379; CHECK-NEXT:    [[IDX_FROZEN:%.*]] = freeze i32 [[IDX:%.*]]
380; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = urem i32 [[IDX_FROZEN]], 16
381; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
382; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP0]], align 1
383; CHECK-NEXT:    ret void
384;
385entry:
386  %0 = load <16 x i8>, <16 x i8>* %q
387  %idx.frozen = freeze i32 %idx
388  %idx.clamped = urem i32 %idx.frozen, 16
389  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
390  store <16 x i8> %vecins, <16 x i8>* %q
391  ret void
392}
393
394define void @insert_store_nonconst_index_frozen_and_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
395; CHECK-LABEL: @insert_store_nonconst_index_frozen_and_valid_by_urem(
396; CHECK-NEXT:  entry:
397; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
398; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16
399; CHECK-NEXT:    [[IDX_CLAMPED_FROZEN:%.*]] = freeze i32 [[IDX_CLAMPED]]
400; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED_FROZEN]]
401; CHECK-NEXT:    store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
402; CHECK-NEXT:    ret void
403;
404entry:
405  %0 = load <16 x i8>, <16 x i8>* %q
406  %idx.clamped = urem i32 %idx, 16
407  %idx.clamped.frozen = freeze i32 %idx.clamped
408  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped.frozen
409  store <16 x i8> %vecins, <16 x i8>* %q
410  ret void
411}
412
413define void @insert_store_nonconst_index_known_valid_by_urem_but_may_be_poison(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
414; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_urem_but_may_be_poison(
415; CHECK-NEXT:  entry:
416; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[IDX:%.*]]
417; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = urem i32 [[TMP0]], 16
418; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
419; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP1]], align 1
420; CHECK-NEXT:    ret void
421;
422entry:
423  %0 = load <16 x i8>, <16 x i8>* %q
424  %idx.clamped = urem i32 %idx, 16
425  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
426  store <16 x i8> %vecins, <16 x i8>* %q
427  ret void
428}
429
430define void @insert_store_nonconst_index_not_known_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
431; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_urem(
432; CHECK-NEXT:  entry:
433; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
434; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 17
435; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
436; CHECK-NEXT:    store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
437; CHECK-NEXT:    ret void
438;
439entry:
440  %0 = load <16 x i8>, <16 x i8>* %q
441  %idx.clamped = urem i32 %idx, 17
442  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
443  store <16 x i8> %vecins, <16 x i8>* %q
444  ret void
445}
446
447define void @insert_store_nonconst_index_known_noundef_not_known_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) {
448; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_not_known_valid_by_urem(
449; CHECK-NEXT:  entry:
450; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
451; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 17
452; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
453; CHECK-NEXT:    store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
454; CHECK-NEXT:    ret void
455;
456entry:
457  %0 = load <16 x i8>, <16 x i8>* %q
458  %idx.clamped = urem i32 %idx, 17
459  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
460  store <16 x i8> %vecins, <16 x i8>* %q
461  ret void
462}
463
464define void @insert_store_ptr_strip(<16 x i8>* %q, i8 zeroext %s) {
465; CHECK-LABEL: @insert_store_ptr_strip(
466; CHECK-NEXT:  entry:
467; CHECK-NEXT:    [[ADDR0:%.*]] = bitcast <16 x i8>* [[Q:%.*]] to <2 x i64>*
468; CHECK-NEXT:    [[ADDR1:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[ADDR0]], i64 0
469; CHECK-NEXT:    [[ADDR2:%.*]] = bitcast <2 x i64>* [[ADDR1]] to <16 x i8>*
470; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[ADDR2]], i32 0, i32 3
471; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP0]], align 1
472; CHECK-NEXT:    ret void
473;
474entry:
475  %0 = load <16 x i8>, <16 x i8>* %q
476  %vecins = insertelement <16 x i8> %0, i8 %s, i32 3
477  %addr0 = bitcast <16 x i8>* %q to <2 x i64>*
478  %addr1 = getelementptr <2 x i64>, <2 x i64>* %addr0, i64 0
479  %addr2 = bitcast <2 x i64>* %addr1 to <16 x i8>*
480  store <16 x i8> %vecins, <16 x i8>* %addr2
481  ret void
482}
483
484define void @volatile_update(<16 x i8>* %q, <16 x i8>* %p, i8 zeroext %s) {
485; CHECK-LABEL: @volatile_update(
486; CHECK-NEXT:  entry:
487; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
488; CHECK-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 3
489; CHECK-NEXT:    store volatile <16 x i8> [[VECINS0]], <16 x i8>* [[Q]], align 16
490; CHECK-NEXT:    [[TMP1:%.*]] = load volatile <16 x i8>, <16 x i8>* [[P:%.*]], align 16
491; CHECK-NEXT:    [[VECINS1:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[S]], i32 1
492; CHECK-NEXT:    store <16 x i8> [[VECINS1]], <16 x i8>* [[P]], align 16
493; CHECK-NEXT:    ret void
494;
495entry:
496  %0 = load <16 x i8>, <16 x i8>* %q
497  %vecins0 = insertelement <16 x i8> %0, i8 %s, i32 3
498  store volatile <16 x i8> %vecins0, <16 x i8>* %q
499
500  %1 = load volatile <16 x i8>, <16 x i8>* %p
501  %vecins1 = insertelement <16 x i8> %1, i8 %s, i32 1
502  store <16 x i8> %vecins1, <16 x i8>* %p
503  ret void
504}
505
506define void @insert_store_addr_differ(<16 x i8>* %p, <16 x i8>* %q, i8 %s) {
507; CHECK-LABEL: @insert_store_addr_differ(
508; CHECK-NEXT:  entry:
509; CHECK-NEXT:    [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16
510; CHECK-NEXT:    [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3
511; CHECK-NEXT:    store <16 x i8> [[INS]], <16 x i8>* [[Q:%.*]], align 16
512; CHECK-NEXT:    ret void
513;
514entry:
515  %ld = load <16 x i8>, <16 x i8>* %p
516  %ins = insertelement <16 x i8> %ld, i8 %s, i32 3
517  store <16 x i8> %ins, <16 x i8>* %q
518  ret void
519}
520
521; We can't transform if any instr could modify memory in between.
522define void @insert_store_mem_modify(<16 x i8>* %p, <16 x i8>* %q, <16 x i8>* noalias %r, i8 %s, i32 %m) {
523; CHECK-LABEL: @insert_store_mem_modify(
524; CHECK-NEXT:  entry:
525; CHECK-NEXT:    [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16
526; CHECK-NEXT:    store <16 x i8> zeroinitializer, <16 x i8>* [[Q:%.*]], align 16
527; CHECK-NEXT:    [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3
528; CHECK-NEXT:    store <16 x i8> [[INS]], <16 x i8>* [[P]], align 16
529; CHECK-NEXT:    store <16 x i8> zeroinitializer, <16 x i8>* [[R:%.*]], align 16
530; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 7
531; CHECK-NEXT:    store i8 [[S]], i8* [[TMP0]], align 1
532; CHECK-NEXT:    [[PTR0:%.*]] = bitcast <16 x i8>* [[P]] to <4 x i32>*
533; CHECK-NEXT:    [[LD3:%.*]] = load <4 x i32>, <4 x i32>* [[PTR0]], align 16
534; CHECK-NEXT:    store <16 x i8> zeroinitializer, <16 x i8>* [[P]], align 16
535; CHECK-NEXT:    [[INS3:%.*]] = insertelement <4 x i32> [[LD3]], i32 [[M:%.*]], i32 0
536; CHECK-NEXT:    store <4 x i32> [[INS3]], <4 x i32>* [[PTR0]], align 16
537; CHECK-NEXT:    ret void
538;
539entry:
540  ; p may alias q
541  %ld = load <16 x i8>, <16 x i8>* %p
542  store <16 x i8> zeroinitializer, <16 x i8>* %q
543  %ins = insertelement <16 x i8> %ld, i8 %s, i32 3
544  store <16 x i8> %ins, <16 x i8>* %p
545
546  ; p never aliases r
547  %ld2 = load <16 x i8>, <16 x i8>* %q
548  store <16 x i8> zeroinitializer, <16 x i8>* %r
549  %ins2 = insertelement <16 x i8> %ld2, i8 %s, i32 7
550  store <16 x i8> %ins2, <16 x i8>* %q
551
552  ; p must alias ptr0
553  %ptr0 = bitcast <16 x i8>* %p to <4 x i32>*
554  %ld3 = load <4 x i32>, <4 x i32>* %ptr0
555  store <16 x i8> zeroinitializer, <16 x i8>* %p
556  %ins3 = insertelement <4 x i32> %ld3, i32 %m, i32 0
557  store <4 x i32> %ins3, <4 x i32>* %ptr0
558
559  ret void
560}
561
562; Check cases when calls may modify memory
563define void @insert_store_with_call(<16 x i8>* %p, <16 x i8>* %q, i8 %s) {
564; CHECK-LABEL: @insert_store_with_call(
565; CHECK-NEXT:  entry:
566; CHECK-NEXT:    [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16
567; CHECK-NEXT:    call void @maywrite(<16 x i8>* [[P]])
568; CHECK-NEXT:    [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3
569; CHECK-NEXT:    store <16 x i8> [[INS]], <16 x i8>* [[P]], align 16
570; CHECK-NEXT:    call void @foo()
571; CHECK-NEXT:    call void @nowrite(<16 x i8>* [[P]])
572; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P]], i32 0, i32 7
573; CHECK-NEXT:    store i8 [[S]], i8* [[TMP0]], align 1
574; CHECK-NEXT:    ret void
575;
576entry:
577  %ld = load <16 x i8>, <16 x i8>* %p
578  call void @maywrite(<16 x i8>* %p)
579  %ins = insertelement <16 x i8> %ld, i8 %s, i32 3
580  store <16 x i8> %ins, <16 x i8>* %p
581  call void @foo()  ; Barrier
582  %ld2 = load <16 x i8>, <16 x i8>* %p
583  call void @nowrite(<16 x i8>* %p)
584  %ins2 = insertelement <16 x i8> %ld2, i8 %s, i32 7
585  store <16 x i8> %ins2, <16 x i8>* %p
586  ret void
587}
588
589declare void @foo()
590declare void @maywrite(<16 x i8>*)
591declare void @nowrite(<16 x i8>*) readonly
592
593; To test if number of instructions in-between exceeds the limit (default 30),
594; the combine will quit.
595define i32 @insert_store_maximum_scan_instrs(i32 %arg, i16* %arg1, <16 x i8>* %arg2, i8 zeroext %arg3) {
596; CHECK-LABEL: @insert_store_maximum_scan_instrs(
597; CHECK-NEXT:  bb:
598; CHECK-NEXT:    [[I:%.*]] = or i32 [[ARG:%.*]], 1
599; CHECK-NEXT:    [[I4:%.*]] = load <16 x i8>, <16 x i8>* [[ARG2:%.*]], align 16
600; CHECK-NEXT:    [[I5:%.*]] = tail call i32 @bar(i32 [[I]], i1 true)
601; CHECK-NEXT:    [[I6:%.*]] = shl i32 [[ARG]], [[I5]]
602; CHECK-NEXT:    [[I7:%.*]] = lshr i32 [[I6]], 26
603; CHECK-NEXT:    [[I8:%.*]] = trunc i32 [[I7]] to i8
604; CHECK-NEXT:    [[I9:%.*]] = and i8 [[I8]], 31
605; CHECK-NEXT:    [[I10:%.*]] = lshr i32 [[I6]], 11
606; CHECK-NEXT:    [[I11:%.*]] = and i32 [[I10]], 32767
607; CHECK-NEXT:    [[I12:%.*]] = zext i8 [[I9]] to i64
608; CHECK-NEXT:    [[I13:%.*]] = getelementptr inbounds i16, i16* [[ARG1:%.*]], i64 [[I12]]
609; CHECK-NEXT:    [[I14:%.*]] = load i16, i16* [[I13]], align 2
610; CHECK-NEXT:    [[I15:%.*]] = zext i16 [[I14]] to i32
611; CHECK-NEXT:    [[I16:%.*]] = add nuw nsw i8 [[I9]], 1
612; CHECK-NEXT:    [[I17:%.*]] = zext i8 [[I16]] to i64
613; CHECK-NEXT:    [[I18:%.*]] = getelementptr inbounds i16, i16* [[ARG1]], i64 [[I17]]
614; CHECK-NEXT:    [[I19:%.*]] = load i16, i16* [[I18]], align 2
615; CHECK-NEXT:    [[I20:%.*]] = zext i16 [[I19]] to i32
616; CHECK-NEXT:    [[I21:%.*]] = sub nsw i32 [[I20]], [[I15]]
617; CHECK-NEXT:    [[I22:%.*]] = mul nsw i32 [[I11]], [[I21]]
618; CHECK-NEXT:    [[I23:%.*]] = ashr i32 [[I22]], 15
619; CHECK-NEXT:    [[I24:%.*]] = shl nuw nsw i32 [[I5]], 15
620; CHECK-NEXT:    [[I25:%.*]] = xor i32 [[I24]], 1015808
621; CHECK-NEXT:    [[I26:%.*]] = add nuw nsw i32 [[I25]], [[I15]]
622; CHECK-NEXT:    [[I27:%.*]] = add nsw i32 [[I26]], [[I23]]
623; CHECK-NEXT:    [[I28:%.*]] = sitofp i32 [[ARG]] to double
624; CHECK-NEXT:    [[I29:%.*]] = tail call double @llvm.log2.f64(double [[I28]])
625; CHECK-NEXT:    [[I30:%.*]] = fptosi double [[I29]] to i32
626; CHECK-NEXT:    [[I31:%.*]] = shl nsw i32 [[I30]], 15
627; CHECK-NEXT:    [[I32:%.*]] = or i32 [[I31]], 4
628; CHECK-NEXT:    [[I33:%.*]] = icmp eq i32 [[I27]], [[I32]]
629; CHECK-NEXT:    [[I34:%.*]] = select i1 [[I33]], i32 [[ARG]], i32 [[I31]]
630; CHECK-NEXT:    [[I35:%.*]] = lshr i32 [[I34]], 1
631; CHECK-NEXT:    [[I36:%.*]] = insertelement <16 x i8> [[I4]], i8 [[ARG3:%.*]], i32 3
632; CHECK-NEXT:    store <16 x i8> [[I36]], <16 x i8>* [[ARG2]], align 16
633; CHECK-NEXT:    ret i32 [[I35]]
634;
635bb:
636  %i = or i32 %arg, 1
637  %i4 = load <16 x i8>, <16 x i8>* %arg2, align 16
638  %i5 = tail call i32 @bar(i32 %i, i1 true)
639  %i6 = shl i32 %arg, %i5
640  %i7 = lshr i32 %i6, 26
641  %i8 = trunc i32 %i7 to i8
642  %i9 = and i8 %i8, 31
643  %i10 = lshr i32 %i6, 11
644  %i11 = and i32 %i10, 32767
645  %i12 = zext i8 %i9 to i64
646  %i13 = getelementptr inbounds i16, i16* %arg1, i64 %i12
647  %i14 = load i16, i16* %i13, align 2
648  %i15 = zext i16 %i14 to i32
649  %i16 = add nuw nsw i8 %i9, 1
650  %i17 = zext i8 %i16 to i64
651  %i18 = getelementptr inbounds i16, i16* %arg1, i64 %i17
652  %i19 = load i16, i16* %i18, align 2
653  %i20 = zext i16 %i19 to i32
654  %i21 = sub nsw i32 %i20, %i15
655  %i22 = mul nsw i32 %i11, %i21
656  %i23 = ashr i32 %i22, 15
657  %i24 = shl nuw nsw i32 %i5, 15
658  %i25 = xor i32 %i24, 1015808
659  %i26 = add nuw nsw i32 %i25, %i15
660  %i27 = add nsw i32 %i26, %i23
661  %i28 = sitofp i32 %arg to double
662  %i29 = tail call double @llvm.log2.f64(double %i28)
663  %i30 = fptosi double %i29 to i32
664  %i31 = shl nsw i32 %i30, 15
665  %i32 = or i32 %i31, 4
666  %i33 = icmp eq i32 %i27, %i32
667  %i34 = select i1 %i33, i32 %arg, i32 %i31
668  %i35 = lshr i32 %i34, 1
669  %i36 = insertelement <16 x i8> %i4, i8 %arg3, i32 3
670  store <16 x i8> %i36, <16 x i8>* %arg2, align 16
671  ret i32 %i35
672}
673
674declare i32 @bar(i32, i1) readonly
675declare double @llvm.log2.f64(double)
676