1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -vector-combine -data-layout=e < %s | FileCheck %s
3; RUN: opt -S -vector-combine -data-layout=E < %s | FileCheck %s
4
5define void @insert_store(<16 x i8>* %q, i8 zeroext %s) {
6; CHECK-LABEL: @insert_store(
7; CHECK-NEXT:  entry:
8; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 3
9; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP0]], align 1
10; CHECK-NEXT:    ret void
11;
12entry:
13  %0 = load <16 x i8>, <16 x i8>* %q
14  %vecins = insertelement <16 x i8> %0, i8 %s, i32 3
15  store <16 x i8> %vecins, <16 x i8>* %q, align 16
16  ret void
17}
18
19define void @insert_store_i16_align1(<8 x i16>* %q, i16 zeroext %s) {
20; CHECK-LABEL: @insert_store_i16_align1(
21; CHECK-NEXT:  entry:
22; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[Q:%.*]], i32 0, i32 3
23; CHECK-NEXT:    store i16 [[S:%.*]], i16* [[TMP0]], align 2
24; CHECK-NEXT:    ret void
25;
26entry:
27  %0 = load <8 x i16>, <8 x i16>* %q
28  %vecins = insertelement <8 x i16> %0, i16 %s, i32 3
29  store <8 x i16> %vecins, <8 x i16>* %q, align 1
30  ret void
31}
32
33; To verify case when index is out of bounds
34define void @insert_store_outofbounds(<8 x i16>* %q, i16 zeroext %s) {
35; CHECK-LABEL: @insert_store_outofbounds(
36; CHECK-NEXT:  entry:
37; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16
38; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 9
39; CHECK-NEXT:    store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16
40; CHECK-NEXT:    ret void
41;
42entry:
43  %0 = load <8 x i16>, <8 x i16>* %q
44  %vecins = insertelement <8 x i16> %0, i16 %s, i32 9
45  store <8 x i16> %vecins, <8 x i16>* %q
46  ret void
47}
48
49define void @insert_store_vscale(<vscale x 8 x i16>* %q, i16 zeroext %s) {
50; CHECK-LABEL: @insert_store_vscale(
51; CHECK-NEXT:  entry:
52; CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 8 x i16>, <vscale x 8 x i16>* [[Q:%.*]], align 16
53; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <vscale x 8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3
54; CHECK-NEXT:    store <vscale x 8 x i16> [[VECINS]], <vscale x 8 x i16>* [[Q]], align 16
55; CHECK-NEXT:    ret void
56;
57entry:
58  %0 = load <vscale x 8 x i16>, <vscale x 8 x i16>* %q
59  %vecins = insertelement <vscale x 8 x i16> %0, i16 %s, i32 3
60  store <vscale x 8 x i16> %vecins, <vscale x 8 x i16>* %q
61  ret void
62}
63
64define void @insert_store_v9i4(<9 x i4>* %q, i4 zeroext %s) {
65; CHECK-LABEL: @insert_store_v9i4(
66; CHECK-NEXT:  entry:
67; CHECK-NEXT:    [[TMP0:%.*]] = load <9 x i4>, <9 x i4>* [[Q:%.*]], align 16
68; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <9 x i4> [[TMP0]], i4 [[S:%.*]], i32 3
69; CHECK-NEXT:    store <9 x i4> [[VECINS]], <9 x i4>* [[Q]], align 1
70; CHECK-NEXT:    ret void
71;
72entry:
73  %0 = load <9 x i4>, <9 x i4>* %q
74  %vecins = insertelement <9 x i4> %0, i4 %s, i32 3
75  store <9 x i4> %vecins, <9 x i4>* %q, align 1
76  ret void
77}
78
79define void @insert_store_v4i27(<4 x i27>* %q, i27 zeroext %s) {
80; CHECK-LABEL: @insert_store_v4i27(
81; CHECK-NEXT:  entry:
82; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i27>, <4 x i27>* [[Q:%.*]], align 16
83; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x i27> [[TMP0]], i27 [[S:%.*]], i32 3
84; CHECK-NEXT:    store <4 x i27> [[VECINS]], <4 x i27>* [[Q]], align 1
85; CHECK-NEXT:    ret void
86;
87entry:
88  %0 = load <4 x i27>, <4 x i27>* %q
89  %vecins = insertelement <4 x i27> %0, i27 %s, i32 3
90  store <4 x i27> %vecins, <4 x i27>* %q, align 1
91  ret void
92}
93
94define void @insert_store_blk_differ(<8 x i16>* %q, i16 zeroext %s) {
95; CHECK-LABEL: @insert_store_blk_differ(
96; CHECK-NEXT:  entry:
97; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16
98; CHECK-NEXT:    br label [[CONT:%.*]]
99; CHECK:       cont:
100; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3
101; CHECK-NEXT:    store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16
102; CHECK-NEXT:    ret void
103;
104entry:
105  %0 = load <8 x i16>, <8 x i16>* %q
106  br label %cont
107cont:
108  %vecins = insertelement <8 x i16> %0, i16 %s, i32 3
109  store <8 x i16> %vecins, <8 x i16>* %q
110  ret void
111}
112
113define void @insert_store_nonconst(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
114; CHECK-LABEL: @insert_store_nonconst(
115; CHECK-NEXT:  entry:
116; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
117; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX:%.*]]
118; CHECK-NEXT:    store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
119; CHECK-NEXT:    ret void
120;
121entry:
122  %0 = load <16 x i8>, <16 x i8>* %q
123  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
124  store <16 x i8> %vecins, <16 x i8>* %q
125  ret void
126}
127
128; To verify align here is narrowed to scalar store size
129define void @insert_store_nonconst_large_alignment(<4 x i32>* %q, i32 zeroext %s, i32 %idx) {
130; CHECK-LABEL: @insert_store_nonconst_large_alignment(
131; CHECK-NEXT:  entry:
132; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
133; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
134; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[Q:%.*]], i32 0, i32 [[IDX]]
135; CHECK-NEXT:    store i32 [[S:%.*]], i32* [[TMP0]], align 4
136; CHECK-NEXT:    ret void
137;
138entry:
139  %cmp = icmp ult i32 %idx, 4
140  call void @llvm.assume(i1 %cmp)
141  %i = load <4 x i32>, <4 x i32>* %q, align 128
142  %vecins = insertelement <4 x i32> %i, i32 %s, i32 %idx
143  store <4 x i32> %vecins, <4 x i32>* %q, align 128
144  ret void
145}
146
147define void @insert_store_nonconst_align_maximum_8(<8 x i64>* %q, i64 %s, i32 %idx) {
148; CHECK-LABEL: @insert_store_nonconst_align_maximum_8(
149; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
150; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
151; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]]
152; CHECK-NEXT:    store i64 [[S:%.*]], i64* [[TMP1]], align 8
153; CHECK-NEXT:    ret void
154;
155  %cmp = icmp ult i32 %idx, 2
156  call void @llvm.assume(i1 %cmp)
157  %i = load <8 x i64>, <8 x i64>* %q, align 8
158  %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx
159  store <8 x i64> %vecins, <8 x i64>* %q, align 8
160  ret void
161}
162
163define void @insert_store_nonconst_align_maximum_4(<8 x i64>* %q, i64 %s, i32 %idx) {
164; CHECK-LABEL: @insert_store_nonconst_align_maximum_4(
165; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
166; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
167; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]]
168; CHECK-NEXT:    store i64 [[S:%.*]], i64* [[TMP1]], align 4
169; CHECK-NEXT:    ret void
170;
171  %cmp = icmp ult i32 %idx, 2
172  call void @llvm.assume(i1 %cmp)
173  %i = load <8 x i64>, <8 x i64>* %q, align 4
174  %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx
175  store <8 x i64> %vecins, <8 x i64>* %q, align 4
176  ret void
177}
178
179define void @insert_store_nonconst_align_larger(<8 x i64>* %q, i64 %s, i32 %idx) {
180; CHECK-LABEL: @insert_store_nonconst_align_larger(
181; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
182; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
183; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]]
184; CHECK-NEXT:    store i64 [[S:%.*]], i64* [[TMP1]], align 4
185; CHECK-NEXT:    ret void
186;
187  %cmp = icmp ult i32 %idx, 2
188  call void @llvm.assume(i1 %cmp)
189  %i = load <8 x i64>, <8 x i64>* %q, align 4
190  %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx
191  store <8 x i64> %vecins, <8 x i64>* %q, align 2
192  ret void
193}
194
195define void @insert_store_nonconst_index_known_valid_by_assume(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
196; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_assume(
197; CHECK-NEXT:  entry:
198; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
199; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
200; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX]]
201; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP0]], align 1
202; CHECK-NEXT:    ret void
203;
204entry:
205  %cmp = icmp ult i32 %idx, 4
206  call void @llvm.assume(i1 %cmp)
207  %0 = load <16 x i8>, <16 x i8>* %q
208  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
209  store <16 x i8> %vecins, <16 x i8>* %q
210  ret void
211}
212
213declare void @maythrow() readnone
214
215define void @insert_store_nonconst_index_not_known_valid_by_assume_after_load(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
216; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_assume_after_load(
217; CHECK-NEXT:  entry:
218; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
219; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
220; CHECK-NEXT:    call void @maythrow()
221; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
222; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]]
223; CHECK-NEXT:    store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
224; CHECK-NEXT:    ret void
225;
226entry:
227  %cmp = icmp ult i32 %idx, 4
228  %0 = load <16 x i8>, <16 x i8>* %q
229  call void @maythrow()
230  call void @llvm.assume(i1 %cmp)
231  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
232  store <16 x i8> %vecins, <16 x i8>* %q
233  ret void
234}
235
236define void @insert_store_nonconst_index_not_known_valid_by_assume(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
237; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_assume(
238; CHECK-NEXT:  entry:
239; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 17
240; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
241; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
242; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]]
243; CHECK-NEXT:    store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
244; CHECK-NEXT:    ret void
245;
246entry:
247  %cmp = icmp ult i32 %idx, 17
248  call void @llvm.assume(i1 %cmp)
249  %0 = load <16 x i8>, <16 x i8>* %q
250  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
251  store <16 x i8> %vecins, <16 x i8>* %q
252  ret void
253}
254
255declare void @llvm.assume(i1)
256
257define void @insert_store_nonconst_index_known_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
258; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_and(
259; CHECK-NEXT:  entry:
260; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7
261; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
262; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP0]], align 1
263; CHECK-NEXT:    ret void
264;
265entry:
266  %0 = load <16 x i8>, <16 x i8>* %q
267  %idx.clamped = and i32 %idx, 7
268  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
269  store <16 x i8> %vecins, <16 x i8>* %q
270  ret void
271}
272
273define void @insert_store_nonconst_index_not_known_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
274; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_and(
275; CHECK-NEXT:  entry:
276; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
277; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 16
278; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
279; CHECK-NEXT:    store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
280; CHECK-NEXT:    ret void
281;
282entry:
283  %0 = load <16 x i8>, <16 x i8>* %q
284  %idx.clamped = and i32 %idx, 16
285  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
286  store <16 x i8> %vecins, <16 x i8>* %q
287  ret void
288}
289
290define void @insert_store_nonconst_index_known_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
291; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_urem(
292; CHECK-NEXT:  entry:
293; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16
294; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
295; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP0]], align 1
296; CHECK-NEXT:    ret void
297;
298entry:
299  %0 = load <16 x i8>, <16 x i8>* %q
300  %idx.clamped = urem i32 %idx, 16
301  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
302  store <16 x i8> %vecins, <16 x i8>* %q
303  ret void
304}
305
306define void @insert_store_nonconst_index_not_known_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
307; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_urem(
308; CHECK-NEXT:  entry:
309; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
310; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 17
311; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
312; CHECK-NEXT:    store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
313; CHECK-NEXT:    ret void
314;
315entry:
316  %0 = load <16 x i8>, <16 x i8>* %q
317  %idx.clamped = urem i32 %idx, 17
318  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
319  store <16 x i8> %vecins, <16 x i8>* %q
320  ret void
321}
322
323define void @insert_store_ptr_strip(<16 x i8>* %q, i8 zeroext %s) {
324; CHECK-LABEL: @insert_store_ptr_strip(
325; CHECK-NEXT:  entry:
326; CHECK-NEXT:    [[ADDR0:%.*]] = bitcast <16 x i8>* [[Q:%.*]] to <2 x i64>*
327; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 3
328; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP0]], align 1
329; CHECK-NEXT:    ret void
330;
331entry:
332  %0 = load <16 x i8>, <16 x i8>* %q
333  %vecins = insertelement <16 x i8> %0, i8 %s, i32 3
334  %addr0 = bitcast <16 x i8>* %q to <2 x i64>*
335  %addr1 = getelementptr <2 x i64>, <2 x i64>* %addr0, i64 0
336  %addr2 = bitcast <2 x i64>* %addr1 to <16 x i8>*
337  store <16 x i8> %vecins, <16 x i8>* %addr2
338  ret void
339}
340
341define void @volatile_update(<16 x i8>* %q, <16 x i8>* %p, i8 zeroext %s) {
342; CHECK-LABEL: @volatile_update(
343; CHECK-NEXT:  entry:
344; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
345; CHECK-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 3
346; CHECK-NEXT:    store volatile <16 x i8> [[VECINS0]], <16 x i8>* [[Q]], align 16
347; CHECK-NEXT:    [[TMP1:%.*]] = load volatile <16 x i8>, <16 x i8>* [[P:%.*]], align 16
348; CHECK-NEXT:    [[VECINS1:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[S]], i32 1
349; CHECK-NEXT:    store <16 x i8> [[VECINS1]], <16 x i8>* [[P]], align 16
350; CHECK-NEXT:    ret void
351;
352entry:
353  %0 = load <16 x i8>, <16 x i8>* %q
354  %vecins0 = insertelement <16 x i8> %0, i8 %s, i32 3
355  store volatile <16 x i8> %vecins0, <16 x i8>* %q
356
357  %1 = load volatile <16 x i8>, <16 x i8>* %p
358  %vecins1 = insertelement <16 x i8> %1, i8 %s, i32 1
359  store <16 x i8> %vecins1, <16 x i8>* %p
360  ret void
361}
362
363define void @insert_store_addr_differ(<16 x i8>* %p, <16 x i8>* %q, i8 %s) {
364; CHECK-LABEL: @insert_store_addr_differ(
365; CHECK-NEXT:  entry:
366; CHECK-NEXT:    [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16
367; CHECK-NEXT:    [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3
368; CHECK-NEXT:    store <16 x i8> [[INS]], <16 x i8>* [[Q:%.*]], align 16
369; CHECK-NEXT:    ret void
370;
371entry:
372  %ld = load <16 x i8>, <16 x i8>* %p
373  %ins = insertelement <16 x i8> %ld, i8 %s, i32 3
374  store <16 x i8> %ins, <16 x i8>* %q
375  ret void
376}
377
378; We can't transform if any instr could modify memory in between.
379define void @insert_store_mem_modify(<16 x i8>* %p, <16 x i8>* %q, <16 x i8>* noalias %r, i8 %s, i32 %m) {
380; CHECK-LABEL: @insert_store_mem_modify(
381; CHECK-NEXT:  entry:
382; CHECK-NEXT:    [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16
383; CHECK-NEXT:    store <16 x i8> zeroinitializer, <16 x i8>* [[Q:%.*]], align 16
384; CHECK-NEXT:    [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3
385; CHECK-NEXT:    store <16 x i8> [[INS]], <16 x i8>* [[P]], align 16
386; CHECK-NEXT:    store <16 x i8> zeroinitializer, <16 x i8>* [[R:%.*]], align 16
387; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 7
388; CHECK-NEXT:    store i8 [[S]], i8* [[TMP0]], align 1
389; CHECK-NEXT:    [[PTR0:%.*]] = bitcast <16 x i8>* [[P]] to <4 x i32>*
390; CHECK-NEXT:    [[LD3:%.*]] = load <4 x i32>, <4 x i32>* [[PTR0]], align 16
391; CHECK-NEXT:    store <16 x i8> zeroinitializer, <16 x i8>* [[P]], align 16
392; CHECK-NEXT:    [[INS3:%.*]] = insertelement <4 x i32> [[LD3]], i32 [[M:%.*]], i32 0
393; CHECK-NEXT:    store <4 x i32> [[INS3]], <4 x i32>* [[PTR0]], align 16
394; CHECK-NEXT:    ret void
395;
396entry:
397  ; p may alias q
398  %ld = load <16 x i8>, <16 x i8>* %p
399  store <16 x i8> zeroinitializer, <16 x i8>* %q
400  %ins = insertelement <16 x i8> %ld, i8 %s, i32 3
401  store <16 x i8> %ins, <16 x i8>* %p
402
403  ; p never aliases r
404  %ld2 = load <16 x i8>, <16 x i8>* %q
405  store <16 x i8> zeroinitializer, <16 x i8>* %r
406  %ins2 = insertelement <16 x i8> %ld2, i8 %s, i32 7
407  store <16 x i8> %ins2, <16 x i8>* %q
408
409  ; p must alias ptr0
410  %ptr0 = bitcast <16 x i8>* %p to <4 x i32>*
411  %ld3 = load <4 x i32>, <4 x i32>* %ptr0
412  store <16 x i8> zeroinitializer, <16 x i8>* %p
413  %ins3 = insertelement <4 x i32> %ld3, i32 %m, i32 0
414  store <4 x i32> %ins3, <4 x i32>* %ptr0
415
416  ret void
417}
418
419; Check cases when calls may modify memory
420define void @insert_store_with_call(<16 x i8>* %p, <16 x i8>* %q, i8 %s) {
421; CHECK-LABEL: @insert_store_with_call(
422; CHECK-NEXT:  entry:
423; CHECK-NEXT:    [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16
424; CHECK-NEXT:    call void @maywrite(<16 x i8>* [[P]])
425; CHECK-NEXT:    [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3
426; CHECK-NEXT:    store <16 x i8> [[INS]], <16 x i8>* [[P]], align 16
427; CHECK-NEXT:    call void @foo()
428; CHECK-NEXT:    call void @nowrite(<16 x i8>* [[P]])
429; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P]], i32 0, i32 7
430; CHECK-NEXT:    store i8 [[S]], i8* [[TMP0]], align 1
431; CHECK-NEXT:    ret void
432;
433entry:
434  %ld = load <16 x i8>, <16 x i8>* %p
435  call void @maywrite(<16 x i8>* %p)
436  %ins = insertelement <16 x i8> %ld, i8 %s, i32 3
437  store <16 x i8> %ins, <16 x i8>* %p
438  call void @foo()  ; Barrier
439  %ld2 = load <16 x i8>, <16 x i8>* %p
440  call void @nowrite(<16 x i8>* %p)
441  %ins2 = insertelement <16 x i8> %ld2, i8 %s, i32 7
442  store <16 x i8> %ins2, <16 x i8>* %p
443  ret void
444}
445
446declare void @foo()
447declare void @maywrite(<16 x i8>*)
448declare void @nowrite(<16 x i8>*) readonly
449
450; To test if number of instructions in-between exceeds the limit (default 30),
451; the combine will quit.
452define i32 @insert_store_maximum_scan_instrs(i32 %arg, i16* %arg1, <16 x i8>* %arg2, i8 zeroext %arg3) {
453; CHECK-LABEL: @insert_store_maximum_scan_instrs(
454; CHECK-NEXT:  bb:
455; CHECK-NEXT:    [[I:%.*]] = or i32 [[ARG:%.*]], 1
456; CHECK-NEXT:    [[I4:%.*]] = load <16 x i8>, <16 x i8>* [[ARG2:%.*]], align 16
457; CHECK-NEXT:    [[I5:%.*]] = tail call i32 @bar(i32 [[I]], i1 true)
458; CHECK-NEXT:    [[I6:%.*]] = shl i32 [[ARG]], [[I5]]
459; CHECK-NEXT:    [[I7:%.*]] = lshr i32 [[I6]], 26
460; CHECK-NEXT:    [[I8:%.*]] = trunc i32 [[I7]] to i8
461; CHECK-NEXT:    [[I9:%.*]] = and i8 [[I8]], 31
462; CHECK-NEXT:    [[I10:%.*]] = lshr i32 [[I6]], 11
463; CHECK-NEXT:    [[I11:%.*]] = and i32 [[I10]], 32767
464; CHECK-NEXT:    [[I12:%.*]] = zext i8 [[I9]] to i64
465; CHECK-NEXT:    [[I13:%.*]] = getelementptr inbounds i16, i16* [[ARG1:%.*]], i64 [[I12]]
466; CHECK-NEXT:    [[I14:%.*]] = load i16, i16* [[I13]], align 2
467; CHECK-NEXT:    [[I15:%.*]] = zext i16 [[I14]] to i32
468; CHECK-NEXT:    [[I16:%.*]] = add nuw nsw i8 [[I9]], 1
469; CHECK-NEXT:    [[I17:%.*]] = zext i8 [[I16]] to i64
470; CHECK-NEXT:    [[I18:%.*]] = getelementptr inbounds i16, i16* [[ARG1]], i64 [[I17]]
471; CHECK-NEXT:    [[I19:%.*]] = load i16, i16* [[I18]], align 2
472; CHECK-NEXT:    [[I20:%.*]] = zext i16 [[I19]] to i32
473; CHECK-NEXT:    [[I21:%.*]] = sub nsw i32 [[I20]], [[I15]]
474; CHECK-NEXT:    [[I22:%.*]] = mul nsw i32 [[I11]], [[I21]]
475; CHECK-NEXT:    [[I23:%.*]] = ashr i32 [[I22]], 15
476; CHECK-NEXT:    [[I24:%.*]] = shl nuw nsw i32 [[I5]], 15
477; CHECK-NEXT:    [[I25:%.*]] = xor i32 [[I24]], 1015808
478; CHECK-NEXT:    [[I26:%.*]] = add nuw nsw i32 [[I25]], [[I15]]
479; CHECK-NEXT:    [[I27:%.*]] = add nsw i32 [[I26]], [[I23]]
480; CHECK-NEXT:    [[I28:%.*]] = sitofp i32 [[ARG]] to double
481; CHECK-NEXT:    [[I29:%.*]] = tail call double @llvm.log2.f64(double [[I28]])
482; CHECK-NEXT:    [[I30:%.*]] = fptosi double [[I29]] to i32
483; CHECK-NEXT:    [[I31:%.*]] = shl nsw i32 [[I30]], 15
484; CHECK-NEXT:    [[I32:%.*]] = or i32 [[I31]], 4
485; CHECK-NEXT:    [[I33:%.*]] = icmp eq i32 [[I27]], [[I32]]
486; CHECK-NEXT:    [[I34:%.*]] = select i1 [[I33]], i32 [[ARG]], i32 [[I31]]
487; CHECK-NEXT:    [[I35:%.*]] = lshr i32 [[I34]], 1
488; CHECK-NEXT:    [[I36:%.*]] = insertelement <16 x i8> [[I4]], i8 [[ARG3:%.*]], i32 3
489; CHECK-NEXT:    store <16 x i8> [[I36]], <16 x i8>* [[ARG2]], align 16
490; CHECK-NEXT:    ret i32 [[I35]]
491;
492bb:
493  %i = or i32 %arg, 1
494  %i4 = load <16 x i8>, <16 x i8>* %arg2, align 16
495  %i5 = tail call i32 @bar(i32 %i, i1 true)
496  %i6 = shl i32 %arg, %i5
497  %i7 = lshr i32 %i6, 26
498  %i8 = trunc i32 %i7 to i8
499  %i9 = and i8 %i8, 31
500  %i10 = lshr i32 %i6, 11
501  %i11 = and i32 %i10, 32767
502  %i12 = zext i8 %i9 to i64
503  %i13 = getelementptr inbounds i16, i16* %arg1, i64 %i12
504  %i14 = load i16, i16* %i13, align 2
505  %i15 = zext i16 %i14 to i32
506  %i16 = add nuw nsw i8 %i9, 1
507  %i17 = zext i8 %i16 to i64
508  %i18 = getelementptr inbounds i16, i16* %arg1, i64 %i17
509  %i19 = load i16, i16* %i18, align 2
510  %i20 = zext i16 %i19 to i32
511  %i21 = sub nsw i32 %i20, %i15
512  %i22 = mul nsw i32 %i11, %i21
513  %i23 = ashr i32 %i22, 15
514  %i24 = shl nuw nsw i32 %i5, 15
515  %i25 = xor i32 %i24, 1015808
516  %i26 = add nuw nsw i32 %i25, %i15
517  %i27 = add nsw i32 %i26, %i23
518  %i28 = sitofp i32 %arg to double
519  %i29 = tail call double @llvm.log2.f64(double %i28)
520  %i30 = fptosi double %i29 to i32
521  %i31 = shl nsw i32 %i30, 15
522  %i32 = or i32 %i31, 4
523  %i33 = icmp eq i32 %i27, %i32
524  %i34 = select i1 %i33, i32 %arg, i32 %i31
525  %i35 = lshr i32 %i34, 1
526  %i36 = insertelement <16 x i8> %i4, i8 %arg3, i32 3
527  store <16 x i8> %i36, <16 x i8>* %arg2, align 16
528  ret i32 %i35
529}
530
531declare i32 @bar(i32, i1) readonly
532declare double @llvm.log2.f64(double)
533