1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s
3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
4
5declare <4 x float> @ext(<4 x float>)
6@g = global <4 x float> zeroinitializer
7
8define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) {
9; CHECK-LABEL: @f1(
10; CHECK-NEXT:  entry:
11; CHECK-NEXT:    [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i32 0
12; CHECK-NEXT:    [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i32 1
13; CHECK-NEXT:    [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i32 2
14; CHECK-NEXT:    [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i32 3
15; CHECK-NEXT:    br label [[LOOP:%.*]]
16; CHECK:       loop:
17; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
18; CHECK-NEXT:    [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
19; CHECK-NEXT:    [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
20; CHECK-NEXT:    [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
21; CHECK-NEXT:    [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
22; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
23; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x float>, <4 x float>* [[BASE:%.*]], i32 [[I]]
24; CHECK-NEXT:    [[PTR_I0:%.*]] = bitcast <4 x float>* [[PTR]] to float*
25; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, float* [[PTR_I0]], align 16
26; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr float, float* [[PTR_I0]], i32 1
27; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, float* [[PTR_I1]], align 4
28; CHECK-NEXT:    [[PTR_I2:%.*]] = getelementptr float, float* [[PTR_I0]], i32 2
29; CHECK-NEXT:    [[VAL_I2:%.*]] = load float, float* [[PTR_I2]], align 8
30; CHECK-NEXT:    [[PTR_I3:%.*]] = getelementptr float, float* [[PTR_I0]], i32 3
31; CHECK-NEXT:    [[VAL_I3:%.*]] = load float, float* [[PTR_I3]], align 4
32; CHECK-NEXT:    [[ADD_I0:%.*]] = fadd float [[VAL_I0]], [[VAL_I2]]
33; CHECK-NEXT:    [[ADD_I1:%.*]] = fadd float [[VAL_I1]], [[VAL_I3]]
34; CHECK-NEXT:    [[ADD_I2:%.*]] = fadd float [[ACC_I0]], [[ACC_I2]]
35; CHECK-NEXT:    [[ADD_I3:%.*]] = fadd float [[ACC_I1]], [[ACC_I3]]
36; CHECK-NEXT:    [[ADD_UPTO0:%.*]] = insertelement <4 x float> poison, float [[ADD_I0]], i32 0
37; CHECK-NEXT:    [[ADD_UPTO1:%.*]] = insertelement <4 x float> [[ADD_UPTO0]], float [[ADD_I1]], i32 1
38; CHECK-NEXT:    [[ADD_UPTO2:%.*]] = insertelement <4 x float> [[ADD_UPTO1]], float [[ADD_I2]], i32 2
39; CHECK-NEXT:    [[ADD:%.*]] = insertelement <4 x float> [[ADD_UPTO2]], float [[ADD_I3]], i32 3
40; CHECK-NEXT:    [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[ADD]])
41; CHECK-NEXT:    [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0
42; CHECK-NEXT:    [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00
43; CHECK-NEXT:    [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1
44; CHECK-NEXT:    [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00
45; CHECK-NEXT:    [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2
46; CHECK-NEXT:    [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00
47; CHECK-NEXT:    [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3
48; CHECK-NEXT:    [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00
49; CHECK-NEXT:    [[SEL_I0]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00
50; CHECK-NEXT:    [[SEL_I1]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00
51; CHECK-NEXT:    [[SEL_I2]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00
52; CHECK-NEXT:    [[SEL_I3]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00
53; CHECK-NEXT:    store float [[SEL_I0]], float* [[PTR_I0]], align 16
54; CHECK-NEXT:    store float [[SEL_I1]], float* [[PTR_I1]], align 4
55; CHECK-NEXT:    store float [[SEL_I2]], float* [[PTR_I2]], align 8
56; CHECK-NEXT:    store float [[SEL_I3]], float* [[PTR_I3]], align 4
57; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
58; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
59; CHECK:       exit:
60; CHECK-NEXT:    ret void
61;
62entry:
63  br label %loop
64
65loop:
66  %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
67  %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
68  %nexti = sub i32 %i, 1
69
70  %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i
71  %val = load <4 x float> , <4 x float> *%ptr
72  %dval = bitcast <4 x float> %val to <2 x double>
73  %dacc = bitcast <4 x float> %acc to <2 x double>
74  %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc,
75  <2 x i32> <i32 0, i32 2>
76  %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc,
77  <2 x i32> <i32 1, i32 3>
78  %f1 = bitcast <2 x double> %shuffle1 to <4 x float>
79  %f2 = bitcast <2 x double> %shuffle2 to <4 x float>
80  %add = fadd <4 x float> %f1, %f2
81  %call = call <4 x float> @ext(<4 x float> %add)
82  %cmp = fcmp ogt <4 x float> %call,
83  <float 1.0, float 2.0, float 3.0, float 4.0>
84  %sel = select <4 x i1> %cmp, <4 x float> %call,
85  <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
86  store <4 x float> %sel, <4 x float> *%ptr
87
88  %test = icmp eq i32 %nexti, 0
89  br i1 %test, label %loop, label %exit
90
91exit:
92  ret void
93}
94
95define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) {
96; CHECK-LABEL: @f2(
97; CHECK-NEXT:  entry:
98; CHECK-NEXT:    [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i32 0
99; CHECK-NEXT:    [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i32 1
100; CHECK-NEXT:    [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i32 2
101; CHECK-NEXT:    [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i32 3
102; CHECK-NEXT:    br label [[LOOP:%.*]]
103; CHECK:       loop:
104; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
105; CHECK-NEXT:    [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
106; CHECK-NEXT:    [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
107; CHECK-NEXT:    [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
108; CHECK-NEXT:    [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
109; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
110; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x i8>, <4 x i8>* [[BASE:%.*]], i32 [[I]]
111; CHECK-NEXT:    [[PTR_I0:%.*]] = bitcast <4 x i8>* [[PTR]] to i8*
112; CHECK-NEXT:    [[VAL_I0:%.*]] = load i8, i8* [[PTR_I0]], align 4
113; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 1
114; CHECK-NEXT:    [[VAL_I1:%.*]] = load i8, i8* [[PTR_I1]], align 1
115; CHECK-NEXT:    [[PTR_I2:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 2
116; CHECK-NEXT:    [[VAL_I2:%.*]] = load i8, i8* [[PTR_I2]], align 2
117; CHECK-NEXT:    [[PTR_I3:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 3
118; CHECK-NEXT:    [[VAL_I3:%.*]] = load i8, i8* [[PTR_I3]], align 1
119; CHECK-NEXT:    [[EXT_I0:%.*]] = sext i8 [[VAL_I0]] to i32
120; CHECK-NEXT:    [[EXT_I1:%.*]] = sext i8 [[VAL_I1]] to i32
121; CHECK-NEXT:    [[EXT_I2:%.*]] = sext i8 [[VAL_I2]] to i32
122; CHECK-NEXT:    [[EXT_I3:%.*]] = sext i8 [[VAL_I3]] to i32
123; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[EXT_I0]], [[ACC_I0]]
124; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[EXT_I1]], [[ACC_I1]]
125; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[EXT_I2]], [[ACC_I2]]
126; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[EXT_I3]], [[ACC_I3]]
127; CHECK-NEXT:    [[CMP_I0:%.*]] = icmp slt i32 [[ADD_I0]], -10
128; CHECK-NEXT:    [[CMP_I1:%.*]] = icmp slt i32 [[ADD_I1]], -11
129; CHECK-NEXT:    [[CMP_I2:%.*]] = icmp slt i32 [[ADD_I2]], -12
130; CHECK-NEXT:    [[CMP_I3:%.*]] = icmp slt i32 [[ADD_I3]], -13
131; CHECK-NEXT:    [[SEL_I0]] = select i1 [[CMP_I0]], i32 [[ADD_I0]], i32 [[I]]
132; CHECK-NEXT:    [[SEL_I1]] = select i1 [[CMP_I1]], i32 [[ADD_I1]], i32 [[I]]
133; CHECK-NEXT:    [[SEL_I2]] = select i1 [[CMP_I2]], i32 [[ADD_I2]], i32 [[I]]
134; CHECK-NEXT:    [[SEL_I3]] = select i1 [[CMP_I3]], i32 [[ADD_I3]], i32 [[I]]
135; CHECK-NEXT:    [[TRUNC_I0:%.*]] = trunc i32 [[SEL_I0]] to i8
136; CHECK-NEXT:    [[TRUNC_I1:%.*]] = trunc i32 [[SEL_I1]] to i8
137; CHECK-NEXT:    [[TRUNC_I2:%.*]] = trunc i32 [[SEL_I2]] to i8
138; CHECK-NEXT:    [[TRUNC_I3:%.*]] = trunc i32 [[SEL_I3]] to i8
139; CHECK-NEXT:    store i8 [[TRUNC_I0]], i8* [[PTR_I0]], align 4
140; CHECK-NEXT:    store i8 [[TRUNC_I1]], i8* [[PTR_I1]], align 1
141; CHECK-NEXT:    store i8 [[TRUNC_I2]], i8* [[PTR_I2]], align 2
142; CHECK-NEXT:    store i8 [[TRUNC_I3]], i8* [[PTR_I3]], align 1
143; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
144; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
145; CHECK:       exit:
146; CHECK-NEXT:    ret void
147;
148entry:
149  br label %loop
150
151loop:
152  %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
153  %acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ]
154  %nexti = sub i32 %i, 1
155
156  %ptr = getelementptr <4 x i8>, <4 x i8> *%base, i32 %i
157  %val = load <4 x i8> , <4 x i8> *%ptr
158  %ext = sext <4 x i8> %val to <4 x i32>
159  %add = add <4 x i32> %ext, %acc
160  %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13>
161  %single = insertelement <4 x i32> undef, i32 %i, i32 0
162  %limit = shufflevector <4 x i32> %single, <4 x i32> undef,
163  <4 x i32> zeroinitializer
164  %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit
165  %trunc = trunc <4 x i32> %sel to <4 x i8>
166  store <4 x i8> %trunc, <4 x i8> *%ptr
167
168  %test = icmp eq i32 %nexti, 0
169  br i1 %test, label %loop, label %exit
170
171exit:
172  ret void
173}
174
175; Check that !tbaa information is preserved.
176define void @f3(<4 x i32> *%src, <4 x i32> *%dst) {
177; CHECK-LABEL: @f3(
178; CHECK-NEXT:    [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32*
179; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1
180; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2
181; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3
182; CHECK-NEXT:    [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32*
183; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16, !tbaa [[TBAA0:![0-9]+]]
184; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1
185; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4, !tbaa [[TBAA0]]
186; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
187; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8, !tbaa [[TBAA0]]
188; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
189; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4, !tbaa [[TBAA0]]
190; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
191; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
192; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
193; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
194; CHECK-NEXT:    store i32 [[ADD_I0]], i32* [[DST_I0]], align 16, !tbaa [[TBAA3:![0-9]+]]
195; CHECK-NEXT:    store i32 [[ADD_I1]], i32* [[DST_I1]], align 4, !tbaa [[TBAA3]]
196; CHECK-NEXT:    store i32 [[ADD_I2]], i32* [[DST_I2]], align 8, !tbaa [[TBAA3]]
197; CHECK-NEXT:    store i32 [[ADD_I3]], i32* [[DST_I3]], align 4, !tbaa [[TBAA3]]
198; CHECK-NEXT:    ret void
199;
200  %val = load <4 x i32> , <4 x i32> *%src, !tbaa !1
201  %add = add <4 x i32> %val, %val
202  store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2
203  ret void
204}
205
206; Check that !tbaa.struct information is preserved.
207define void @f4(<4 x i32> *%src, <4 x i32> *%dst) {
208; CHECK-LABEL: @f4(
209; CHECK-NEXT:    [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32*
210; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1
211; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2
212; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3
213; CHECK-NEXT:    [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32*
214; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16, !tbaa.struct !5
215; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1
216; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4, !tbaa.struct !5
217; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
218; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8, !tbaa.struct !5
219; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
220; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4, !tbaa.struct !5
221; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
222; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
223; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
224; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
225; CHECK-NEXT:    store i32 [[ADD_I0]], i32* [[DST_I0]], align 16, !tbaa.struct !5
226; CHECK-NEXT:    store i32 [[ADD_I1]], i32* [[DST_I1]], align 4, !tbaa.struct !5
227; CHECK-NEXT:    store i32 [[ADD_I2]], i32* [[DST_I2]], align 8, !tbaa.struct !5
228; CHECK-NEXT:    store i32 [[ADD_I3]], i32* [[DST_I3]], align 4, !tbaa.struct !5
229; CHECK-NEXT:    ret void
230;
231  %val = load <4 x i32> , <4 x i32> *%src, !tbaa.struct !5
232  %add = add <4 x i32> %val, %val
233  store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5
234  ret void
235}
236
237; Check that llvm.access.group information is preserved.
238define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) {
239; CHECK-LABEL: @f5(
240; CHECK-NEXT:  entry:
241; CHECK-NEXT:    br label [[LOOP:%.*]]
242; CHECK:       loop:
243; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ]
244; CHECK-NEXT:    [[THIS_SRC:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[SRC:%.*]], i32 [[INDEX]]
245; CHECK-NEXT:    [[THIS_SRC_I0:%.*]] = bitcast <4 x i32>* [[THIS_SRC]] to i32*
246; CHECK-NEXT:    [[THIS_SRC_I1:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 1
247; CHECK-NEXT:    [[THIS_SRC_I2:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 2
248; CHECK-NEXT:    [[THIS_SRC_I3:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 3
249; CHECK-NEXT:    [[THIS_DST:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[DST:%.*]], i32 [[INDEX]]
250; CHECK-NEXT:    [[THIS_DST_I0:%.*]] = bitcast <4 x i32>* [[THIS_DST]] to i32*
251; CHECK-NEXT:    [[THIS_DST_I1:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 1
252; CHECK-NEXT:    [[THIS_DST_I2:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 2
253; CHECK-NEXT:    [[THIS_DST_I3:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 3
254; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, i32* [[THIS_SRC_I0]], align 16, !llvm.access.group !6
255; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, i32* [[THIS_SRC_I1]], align 4, !llvm.access.group !6
256; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, i32* [[THIS_SRC_I2]], align 8, !llvm.access.group !6
257; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, i32* [[THIS_SRC_I3]], align 4, !llvm.access.group !6
258; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
259; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
260; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
261; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
262; CHECK-NEXT:    store i32 [[ADD_I0]], i32* [[THIS_DST_I0]], align 16, !llvm.access.group !6
263; CHECK-NEXT:    store i32 [[ADD_I1]], i32* [[THIS_DST_I1]], align 4, !llvm.access.group !6
264; CHECK-NEXT:    store i32 [[ADD_I2]], i32* [[THIS_DST_I2]], align 8, !llvm.access.group !6
265; CHECK-NEXT:    store i32 [[ADD_I3]], i32* [[THIS_DST_I3]], align 4, !llvm.access.group !6
266; CHECK-NEXT:    [[NEXT_INDEX]] = add i32 [[INDEX]], -1
267; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT:%.*]]
268; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[END:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
269; CHECK:       end:
270; CHECK-NEXT:    ret void
271;
272entry:
273  br label %loop
274
275loop:
276  %index = phi i32 [ 0, %entry ], [ %next_index, %loop ]
277  %this_src = getelementptr <4 x i32>, <4 x i32> *%src, i32 %index
278  %this_dst = getelementptr <4 x i32>, <4 x i32> *%dst, i32 %index
279  %val = load <4 x i32> , <4 x i32> *%this_src, !llvm.access.group !13
280  %add = add <4 x i32> %val, %val
281  store <4 x i32> %add, <4 x i32> *%this_dst, !llvm.access.group !13
282  %next_index = add i32 %index, -1
283  %continue = icmp ne i32 %next_index, %count
284  br i1 %continue, label %loop, label %end, !llvm.loop !3
285
286end:
287  ret void
288}
289
290; Check that fpmath information is preserved.
291define <4 x float> @f6(<4 x float> %x) {
292; CHECK-LABEL: @f6(
293; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
294; CHECK-NEXT:    [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath !9
295; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <4 x float> [[X]], i32 1
296; CHECK-NEXT:    [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath !9
297; CHECK-NEXT:    [[X_I2:%.*]] = extractelement <4 x float> [[X]], i32 2
298; CHECK-NEXT:    [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath !9
299; CHECK-NEXT:    [[X_I3:%.*]] = extractelement <4 x float> [[X]], i32 3
300; CHECK-NEXT:    [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath !9
301; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i32 0
302; CHECK-NEXT:    [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
303; CHECK-NEXT:    [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i32 2
304; CHECK-NEXT:    [[RES:%.*]] = insertelement <4 x float> [[RES_UPTO2]], float [[RES_I3]], i32 3
305; CHECK-NEXT:    ret <4 x float> [[RES]]
306;
307  %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>,
308  !fpmath !4
309  ret <4 x float> %res
310}
311
312; Check that random metadata isn't kept.
313define void @f7(<4 x i32> *%src, <4 x i32> *%dst) {
314; CHECK-LABEL: @f7(
315; CHECK-NEXT:    [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32*
316; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1
317; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2
318; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3
319; CHECK-NEXT:    [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32*
320; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16
321; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1
322; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4
323; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
324; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8
325; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
326; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4
327; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
328; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
329; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
330; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
331; CHECK-NEXT:    store i32 [[ADD_I0]], i32* [[DST_I0]], align 16
332; CHECK-NEXT:    store i32 [[ADD_I1]], i32* [[DST_I1]], align 4
333; CHECK-NEXT:    store i32 [[ADD_I2]], i32* [[DST_I2]], align 8
334; CHECK-NEXT:    store i32 [[ADD_I3]], i32* [[DST_I3]], align 4
335; CHECK-NEXT:    ret void
336;
337  %val = load <4 x i32> , <4 x i32> *%src, !foo !5
338  %add = add <4 x i32> %val, %val
339  store <4 x i32> %add, <4 x i32> *%dst, !foo !5
340  ret void
341}
342
343; Test GEP with vectors.
344define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0,
345; CHECK-LABEL: @f8(
346; CHECK-NEXT:    [[DEST_I0:%.*]] = bitcast <4 x float*>* [[DEST:%.*]] to float**
347; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 1
348; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 2
349; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 3
350; CHECK-NEXT:    [[PTR0_I0:%.*]] = extractelement <4 x float*> [[PTR0:%.*]], i32 0
351; CHECK-NEXT:    [[PTR0_I2:%.*]] = extractelement <4 x float*> [[PTR0]], i32 2
352; CHECK-NEXT:    [[PTR0_I3:%.*]] = extractelement <4 x float*> [[PTR0]], i32 3
353; CHECK-NEXT:    [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i32 1
354; CHECK-NEXT:    [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i32 3
355; CHECK-NEXT:    [[VAL_I0:%.*]] = getelementptr float, float* [[PTR0_I0]], i32 100
356; CHECK-NEXT:    [[VAL_I1:%.*]] = getelementptr float, float* [[OTHER:%.*]], i32 [[I0_I1]]
357; CHECK-NEXT:    [[VAL_I2:%.*]] = getelementptr float, float* [[PTR0_I2]], i32 100
358; CHECK-NEXT:    [[VAL_I3:%.*]] = getelementptr float, float* [[PTR0_I3]], i32 [[I0_I3]]
359; CHECK-NEXT:    store float* [[VAL_I0]], float** [[DEST_I0]], align 32
360; CHECK-NEXT:    store float* [[VAL_I1]], float** [[DEST_I1]], align 8
361; CHECK-NEXT:    store float* [[VAL_I2]], float** [[DEST_I2]], align 16
362; CHECK-NEXT:    store float* [[VAL_I3]], float** [[DEST_I3]], align 8
363; CHECK-NEXT:    ret void
364;
365  float *%other) {
366  %i1 = insertelement <4 x i32> %i0, i32 100, i32 0
367  %i2 = insertelement <4 x i32> %i1, i32 100, i32 2
368  %ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1
369  %val = getelementptr float, <4 x float *> %ptr1, <4 x i32> %i2
370  store <4 x float *> %val, <4 x float *> *%dest
371  ret void
372}
373
374; Test the handling of unaligned loads.
375define void @f9(<4 x float> *%dest, <4 x float> *%src) {
376; CHECK-LABEL: @f9(
377; CHECK-NEXT:    [[DEST_I0:%.*]] = bitcast <4 x float>* [[DEST:%.*]] to float*
378; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float, float* [[DEST_I0]], i32 1
379; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float, float* [[DEST_I0]], i32 2
380; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float, float* [[DEST_I0]], i32 3
381; CHECK-NEXT:    [[SRC_I0:%.*]] = bitcast <4 x float>* [[SRC:%.*]] to float*
382; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, float* [[SRC_I0]], align 4
383; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr float, float* [[SRC_I0]], i32 1
384; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, float* [[SRC_I1]], align 4
385; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr float, float* [[SRC_I0]], i32 2
386; CHECK-NEXT:    [[VAL_I2:%.*]] = load float, float* [[SRC_I2]], align 4
387; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr float, float* [[SRC_I0]], i32 3
388; CHECK-NEXT:    [[VAL_I3:%.*]] = load float, float* [[SRC_I3]], align 4
389; CHECK-NEXT:    store float [[VAL_I0]], float* [[DEST_I0]], align 8
390; CHECK-NEXT:    store float [[VAL_I1]], float* [[DEST_I1]], align 4
391; CHECK-NEXT:    store float [[VAL_I2]], float* [[DEST_I2]], align 8
392; CHECK-NEXT:    store float [[VAL_I3]], float* [[DEST_I3]], align 4
393; CHECK-NEXT:    ret void
394;
395  %val = load <4 x float> , <4 x float> *%src, align 4
396  store <4 x float> %val, <4 x float> *%dest, align 8
397  ret void
398}
399
400; ...and again with subelement alignment.
401define void @f10(<4 x float> *%dest, <4 x float> *%src) {
402; CHECK-LABEL: @f10(
403; CHECK-NEXT:    [[DEST_I0:%.*]] = bitcast <4 x float>* [[DEST:%.*]] to float*
404; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float, float* [[DEST_I0]], i32 1
405; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float, float* [[DEST_I0]], i32 2
406; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float, float* [[DEST_I0]], i32 3
407; CHECK-NEXT:    [[SRC_I0:%.*]] = bitcast <4 x float>* [[SRC:%.*]] to float*
408; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, float* [[SRC_I0]], align 1
409; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr float, float* [[SRC_I0]], i32 1
410; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, float* [[SRC_I1]], align 1
411; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr float, float* [[SRC_I0]], i32 2
412; CHECK-NEXT:    [[VAL_I2:%.*]] = load float, float* [[SRC_I2]], align 1
413; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr float, float* [[SRC_I0]], i32 3
414; CHECK-NEXT:    [[VAL_I3:%.*]] = load float, float* [[SRC_I3]], align 1
415; CHECK-NEXT:    store float [[VAL_I0]], float* [[DEST_I0]], align 2
416; CHECK-NEXT:    store float [[VAL_I1]], float* [[DEST_I1]], align 2
417; CHECK-NEXT:    store float [[VAL_I2]], float* [[DEST_I2]], align 2
418; CHECK-NEXT:    store float [[VAL_I3]], float* [[DEST_I3]], align 2
419; CHECK-NEXT:    ret void
420;
421  %val = load <4 x float> , <4 x float> *%src, align 1
422  store <4 x float> %val, <4 x float> *%dest, align 2
423  ret void
424}
425
426; Test that sub-byte loads aren't scalarized.
427define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) {
428; CHECK-LABEL: @f11(
429; CHECK-NEXT:    [[SRC1:%.*]] = getelementptr <32 x i1>, <32 x i1>* [[SRC0:%.*]], i32 1
430; CHECK-NEXT:    [[VAL0:%.*]] = load <32 x i1>, <32 x i1>* [[SRC0]], align 4
431; CHECK-NEXT:    [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i32 0
432; CHECK-NEXT:    [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i32 1
433; CHECK-NEXT:    [[VAL0_I2:%.*]] = extractelement <32 x i1> [[VAL0]], i32 2
434; CHECK-NEXT:    [[VAL0_I3:%.*]] = extractelement <32 x i1> [[VAL0]], i32 3
435; CHECK-NEXT:    [[VAL0_I4:%.*]] = extractelement <32 x i1> [[VAL0]], i32 4
436; CHECK-NEXT:    [[VAL0_I5:%.*]] = extractelement <32 x i1> [[VAL0]], i32 5
437; CHECK-NEXT:    [[VAL0_I6:%.*]] = extractelement <32 x i1> [[VAL0]], i32 6
438; CHECK-NEXT:    [[VAL0_I7:%.*]] = extractelement <32 x i1> [[VAL0]], i32 7
439; CHECK-NEXT:    [[VAL0_I8:%.*]] = extractelement <32 x i1> [[VAL0]], i32 8
440; CHECK-NEXT:    [[VAL0_I9:%.*]] = extractelement <32 x i1> [[VAL0]], i32 9
441; CHECK-NEXT:    [[VAL0_I10:%.*]] = extractelement <32 x i1> [[VAL0]], i32 10
442; CHECK-NEXT:    [[VAL0_I11:%.*]] = extractelement <32 x i1> [[VAL0]], i32 11
443; CHECK-NEXT:    [[VAL0_I12:%.*]] = extractelement <32 x i1> [[VAL0]], i32 12
444; CHECK-NEXT:    [[VAL0_I13:%.*]] = extractelement <32 x i1> [[VAL0]], i32 13
445; CHECK-NEXT:    [[VAL0_I14:%.*]] = extractelement <32 x i1> [[VAL0]], i32 14
446; CHECK-NEXT:    [[VAL0_I15:%.*]] = extractelement <32 x i1> [[VAL0]], i32 15
447; CHECK-NEXT:    [[VAL0_I16:%.*]] = extractelement <32 x i1> [[VAL0]], i32 16
448; CHECK-NEXT:    [[VAL0_I17:%.*]] = extractelement <32 x i1> [[VAL0]], i32 17
449; CHECK-NEXT:    [[VAL0_I18:%.*]] = extractelement <32 x i1> [[VAL0]], i32 18
450; CHECK-NEXT:    [[VAL0_I19:%.*]] = extractelement <32 x i1> [[VAL0]], i32 19
451; CHECK-NEXT:    [[VAL0_I20:%.*]] = extractelement <32 x i1> [[VAL0]], i32 20
452; CHECK-NEXT:    [[VAL0_I21:%.*]] = extractelement <32 x i1> [[VAL0]], i32 21
453; CHECK-NEXT:    [[VAL0_I22:%.*]] = extractelement <32 x i1> [[VAL0]], i32 22
454; CHECK-NEXT:    [[VAL0_I23:%.*]] = extractelement <32 x i1> [[VAL0]], i32 23
455; CHECK-NEXT:    [[VAL0_I24:%.*]] = extractelement <32 x i1> [[VAL0]], i32 24
456; CHECK-NEXT:    [[VAL0_I25:%.*]] = extractelement <32 x i1> [[VAL0]], i32 25
457; CHECK-NEXT:    [[VAL0_I26:%.*]] = extractelement <32 x i1> [[VAL0]], i32 26
458; CHECK-NEXT:    [[VAL0_I27:%.*]] = extractelement <32 x i1> [[VAL0]], i32 27
459; CHECK-NEXT:    [[VAL0_I28:%.*]] = extractelement <32 x i1> [[VAL0]], i32 28
460; CHECK-NEXT:    [[VAL0_I29:%.*]] = extractelement <32 x i1> [[VAL0]], i32 29
461; CHECK-NEXT:    [[VAL0_I30:%.*]] = extractelement <32 x i1> [[VAL0]], i32 30
462; CHECK-NEXT:    [[VAL0_I31:%.*]] = extractelement <32 x i1> [[VAL0]], i32 31
463; CHECK-NEXT:    [[VAL1:%.*]] = load <32 x i1>, <32 x i1>* [[SRC1]], align 4
464; CHECK-NEXT:    [[VAL1_I0:%.*]] = extractelement <32 x i1> [[VAL1]], i32 0
465; CHECK-NEXT:    [[AND_I0:%.*]] = and i1 [[VAL0_I0]], [[VAL1_I0]]
466; CHECK-NEXT:    [[VAL1_I1:%.*]] = extractelement <32 x i1> [[VAL1]], i32 1
467; CHECK-NEXT:    [[AND_I1:%.*]] = and i1 [[VAL0_I1]], [[VAL1_I1]]
468; CHECK-NEXT:    [[VAL1_I2:%.*]] = extractelement <32 x i1> [[VAL1]], i32 2
469; CHECK-NEXT:    [[AND_I2:%.*]] = and i1 [[VAL0_I2]], [[VAL1_I2]]
470; CHECK-NEXT:    [[VAL1_I3:%.*]] = extractelement <32 x i1> [[VAL1]], i32 3
471; CHECK-NEXT:    [[AND_I3:%.*]] = and i1 [[VAL0_I3]], [[VAL1_I3]]
472; CHECK-NEXT:    [[VAL1_I4:%.*]] = extractelement <32 x i1> [[VAL1]], i32 4
473; CHECK-NEXT:    [[AND_I4:%.*]] = and i1 [[VAL0_I4]], [[VAL1_I4]]
474; CHECK-NEXT:    [[VAL1_I5:%.*]] = extractelement <32 x i1> [[VAL1]], i32 5
475; CHECK-NEXT:    [[AND_I5:%.*]] = and i1 [[VAL0_I5]], [[VAL1_I5]]
476; CHECK-NEXT:    [[VAL1_I6:%.*]] = extractelement <32 x i1> [[VAL1]], i32 6
477; CHECK-NEXT:    [[AND_I6:%.*]] = and i1 [[VAL0_I6]], [[VAL1_I6]]
478; CHECK-NEXT:    [[VAL1_I7:%.*]] = extractelement <32 x i1> [[VAL1]], i32 7
479; CHECK-NEXT:    [[AND_I7:%.*]] = and i1 [[VAL0_I7]], [[VAL1_I7]]
480; CHECK-NEXT:    [[VAL1_I8:%.*]] = extractelement <32 x i1> [[VAL1]], i32 8
481; CHECK-NEXT:    [[AND_I8:%.*]] = and i1 [[VAL0_I8]], [[VAL1_I8]]
482; CHECK-NEXT:    [[VAL1_I9:%.*]] = extractelement <32 x i1> [[VAL1]], i32 9
483; CHECK-NEXT:    [[AND_I9:%.*]] = and i1 [[VAL0_I9]], [[VAL1_I9]]
484; CHECK-NEXT:    [[VAL1_I10:%.*]] = extractelement <32 x i1> [[VAL1]], i32 10
485; CHECK-NEXT:    [[AND_I10:%.*]] = and i1 [[VAL0_I10]], [[VAL1_I10]]
486; CHECK-NEXT:    [[VAL1_I11:%.*]] = extractelement <32 x i1> [[VAL1]], i32 11
487; CHECK-NEXT:    [[AND_I11:%.*]] = and i1 [[VAL0_I11]], [[VAL1_I11]]
488; CHECK-NEXT:    [[VAL1_I12:%.*]] = extractelement <32 x i1> [[VAL1]], i32 12
489; CHECK-NEXT:    [[AND_I12:%.*]] = and i1 [[VAL0_I12]], [[VAL1_I12]]
490; CHECK-NEXT:    [[VAL1_I13:%.*]] = extractelement <32 x i1> [[VAL1]], i32 13
491; CHECK-NEXT:    [[AND_I13:%.*]] = and i1 [[VAL0_I13]], [[VAL1_I13]]
492; CHECK-NEXT:    [[VAL1_I14:%.*]] = extractelement <32 x i1> [[VAL1]], i32 14
493; CHECK-NEXT:    [[AND_I14:%.*]] = and i1 [[VAL0_I14]], [[VAL1_I14]]
494; CHECK-NEXT:    [[VAL1_I15:%.*]] = extractelement <32 x i1> [[VAL1]], i32 15
495; CHECK-NEXT:    [[AND_I15:%.*]] = and i1 [[VAL0_I15]], [[VAL1_I15]]
496; CHECK-NEXT:    [[VAL1_I16:%.*]] = extractelement <32 x i1> [[VAL1]], i32 16
497; CHECK-NEXT:    [[AND_I16:%.*]] = and i1 [[VAL0_I16]], [[VAL1_I16]]
498; CHECK-NEXT:    [[VAL1_I17:%.*]] = extractelement <32 x i1> [[VAL1]], i32 17
499; CHECK-NEXT:    [[AND_I17:%.*]] = and i1 [[VAL0_I17]], [[VAL1_I17]]
500; CHECK-NEXT:    [[VAL1_I18:%.*]] = extractelement <32 x i1> [[VAL1]], i32 18
501; CHECK-NEXT:    [[AND_I18:%.*]] = and i1 [[VAL0_I18]], [[VAL1_I18]]
502; CHECK-NEXT:    [[VAL1_I19:%.*]] = extractelement <32 x i1> [[VAL1]], i32 19
503; CHECK-NEXT:    [[AND_I19:%.*]] = and i1 [[VAL0_I19]], [[VAL1_I19]]
504; CHECK-NEXT:    [[VAL1_I20:%.*]] = extractelement <32 x i1> [[VAL1]], i32 20
505; CHECK-NEXT:    [[AND_I20:%.*]] = and i1 [[VAL0_I20]], [[VAL1_I20]]
506; CHECK-NEXT:    [[VAL1_I21:%.*]] = extractelement <32 x i1> [[VAL1]], i32 21
507; CHECK-NEXT:    [[AND_I21:%.*]] = and i1 [[VAL0_I21]], [[VAL1_I21]]
508; CHECK-NEXT:    [[VAL1_I22:%.*]] = extractelement <32 x i1> [[VAL1]], i32 22
509; CHECK-NEXT:    [[AND_I22:%.*]] = and i1 [[VAL0_I22]], [[VAL1_I22]]
510; CHECK-NEXT:    [[VAL1_I23:%.*]] = extractelement <32 x i1> [[VAL1]], i32 23
511; CHECK-NEXT:    [[AND_I23:%.*]] = and i1 [[VAL0_I23]], [[VAL1_I23]]
512; CHECK-NEXT:    [[VAL1_I24:%.*]] = extractelement <32 x i1> [[VAL1]], i32 24
513; CHECK-NEXT:    [[AND_I24:%.*]] = and i1 [[VAL0_I24]], [[VAL1_I24]]
514; CHECK-NEXT:    [[VAL1_I25:%.*]] = extractelement <32 x i1> [[VAL1]], i32 25
515; CHECK-NEXT:    [[AND_I25:%.*]] = and i1 [[VAL0_I25]], [[VAL1_I25]]
516; CHECK-NEXT:    [[VAL1_I26:%.*]] = extractelement <32 x i1> [[VAL1]], i32 26
517; CHECK-NEXT:    [[AND_I26:%.*]] = and i1 [[VAL0_I26]], [[VAL1_I26]]
518; CHECK-NEXT:    [[VAL1_I27:%.*]] = extractelement <32 x i1> [[VAL1]], i32 27
519; CHECK-NEXT:    [[AND_I27:%.*]] = and i1 [[VAL0_I27]], [[VAL1_I27]]
520; CHECK-NEXT:    [[VAL1_I28:%.*]] = extractelement <32 x i1> [[VAL1]], i32 28
521; CHECK-NEXT:    [[AND_I28:%.*]] = and i1 [[VAL0_I28]], [[VAL1_I28]]
522; CHECK-NEXT:    [[VAL1_I29:%.*]] = extractelement <32 x i1> [[VAL1]], i32 29
523; CHECK-NEXT:    [[AND_I29:%.*]] = and i1 [[VAL0_I29]], [[VAL1_I29]]
524; CHECK-NEXT:    [[VAL1_I30:%.*]] = extractelement <32 x i1> [[VAL1]], i32 30
525; CHECK-NEXT:    [[AND_I30:%.*]] = and i1 [[VAL0_I30]], [[VAL1_I30]]
526; CHECK-NEXT:    [[VAL1_I31:%.*]] = extractelement <32 x i1> [[VAL1]], i32 31
527; CHECK-NEXT:    [[AND_I31:%.*]] = and i1 [[VAL0_I31]], [[VAL1_I31]]
528; CHECK-NEXT:    [[AND_UPTO0:%.*]] = insertelement <32 x i1> poison, i1 [[AND_I0]], i32 0
529; CHECK-NEXT:    [[AND_UPTO1:%.*]] = insertelement <32 x i1> [[AND_UPTO0]], i1 [[AND_I1]], i32 1
530; CHECK-NEXT:    [[AND_UPTO2:%.*]] = insertelement <32 x i1> [[AND_UPTO1]], i1 [[AND_I2]], i32 2
531; CHECK-NEXT:    [[AND_UPTO3:%.*]] = insertelement <32 x i1> [[AND_UPTO2]], i1 [[AND_I3]], i32 3
532; CHECK-NEXT:    [[AND_UPTO4:%.*]] = insertelement <32 x i1> [[AND_UPTO3]], i1 [[AND_I4]], i32 4
533; CHECK-NEXT:    [[AND_UPTO5:%.*]] = insertelement <32 x i1> [[AND_UPTO4]], i1 [[AND_I5]], i32 5
534; CHECK-NEXT:    [[AND_UPTO6:%.*]] = insertelement <32 x i1> [[AND_UPTO5]], i1 [[AND_I6]], i32 6
535; CHECK-NEXT:    [[AND_UPTO7:%.*]] = insertelement <32 x i1> [[AND_UPTO6]], i1 [[AND_I7]], i32 7
536; CHECK-NEXT:    [[AND_UPTO8:%.*]] = insertelement <32 x i1> [[AND_UPTO7]], i1 [[AND_I8]], i32 8
537; CHECK-NEXT:    [[AND_UPTO9:%.*]] = insertelement <32 x i1> [[AND_UPTO8]], i1 [[AND_I9]], i32 9
538; CHECK-NEXT:    [[AND_UPTO10:%.*]] = insertelement <32 x i1> [[AND_UPTO9]], i1 [[AND_I10]], i32 10
539; CHECK-NEXT:    [[AND_UPTO11:%.*]] = insertelement <32 x i1> [[AND_UPTO10]], i1 [[AND_I11]], i32 11
540; CHECK-NEXT:    [[AND_UPTO12:%.*]] = insertelement <32 x i1> [[AND_UPTO11]], i1 [[AND_I12]], i32 12
541; CHECK-NEXT:    [[AND_UPTO13:%.*]] = insertelement <32 x i1> [[AND_UPTO12]], i1 [[AND_I13]], i32 13
542; CHECK-NEXT:    [[AND_UPTO14:%.*]] = insertelement <32 x i1> [[AND_UPTO13]], i1 [[AND_I14]], i32 14
543; CHECK-NEXT:    [[AND_UPTO15:%.*]] = insertelement <32 x i1> [[AND_UPTO14]], i1 [[AND_I15]], i32 15
544; CHECK-NEXT:    [[AND_UPTO16:%.*]] = insertelement <32 x i1> [[AND_UPTO15]], i1 [[AND_I16]], i32 16
545; CHECK-NEXT:    [[AND_UPTO17:%.*]] = insertelement <32 x i1> [[AND_UPTO16]], i1 [[AND_I17]], i32 17
546; CHECK-NEXT:    [[AND_UPTO18:%.*]] = insertelement <32 x i1> [[AND_UPTO17]], i1 [[AND_I18]], i32 18
547; CHECK-NEXT:    [[AND_UPTO19:%.*]] = insertelement <32 x i1> [[AND_UPTO18]], i1 [[AND_I19]], i32 19
548; CHECK-NEXT:    [[AND_UPTO20:%.*]] = insertelement <32 x i1> [[AND_UPTO19]], i1 [[AND_I20]], i32 20
549; CHECK-NEXT:    [[AND_UPTO21:%.*]] = insertelement <32 x i1> [[AND_UPTO20]], i1 [[AND_I21]], i32 21
550; CHECK-NEXT:    [[AND_UPTO22:%.*]] = insertelement <32 x i1> [[AND_UPTO21]], i1 [[AND_I22]], i32 22
551; CHECK-NEXT:    [[AND_UPTO23:%.*]] = insertelement <32 x i1> [[AND_UPTO22]], i1 [[AND_I23]], i32 23
552; CHECK-NEXT:    [[AND_UPTO24:%.*]] = insertelement <32 x i1> [[AND_UPTO23]], i1 [[AND_I24]], i32 24
553; CHECK-NEXT:    [[AND_UPTO25:%.*]] = insertelement <32 x i1> [[AND_UPTO24]], i1 [[AND_I25]], i32 25
554; CHECK-NEXT:    [[AND_UPTO26:%.*]] = insertelement <32 x i1> [[AND_UPTO25]], i1 [[AND_I26]], i32 26
555; CHECK-NEXT:    [[AND_UPTO27:%.*]] = insertelement <32 x i1> [[AND_UPTO26]], i1 [[AND_I27]], i32 27
556; CHECK-NEXT:    [[AND_UPTO28:%.*]] = insertelement <32 x i1> [[AND_UPTO27]], i1 [[AND_I28]], i32 28
557; CHECK-NEXT:    [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i32 29
558; CHECK-NEXT:    [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i32 30
559; CHECK-NEXT:    [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i32 31
560; CHECK-NEXT:    store <32 x i1> [[AND]], <32 x i1>* [[DEST:%.*]], align 4
561; CHECK-NEXT:    ret void
562;
563  %src1 = getelementptr <32 x i1>, <32 x i1> *%src0, i32 1
564  %val0 = load <32 x i1> , <32 x i1> *%src0
565  %val1 = load <32 x i1> , <32 x i1> *%src1
566  %and = and <32 x i1> %val0, %val1
567  store <32 x i1> %and, <32 x i1> *%dest
568  ret void
569}
570
571; Test vector GEPs with more than one index.
572define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i,
573; CHECK-LABEL: @f13(
574; CHECK-NEXT:    [[DEST_I0:%.*]] = bitcast <4 x float*>* [[DEST:%.*]] to float**
575; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 1
576; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 2
577; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 3
578; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i32 0
579; CHECK-NEXT:    [[PTR_I0:%.*]] = extractelement <4 x [4 x float]*> [[PTR:%.*]], i32 0
580; CHECK-NEXT:    [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I0]], i32 0, i32 [[I_I0]]
581; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i32 1
582; CHECK-NEXT:    [[PTR_I1:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 1
583; CHECK-NEXT:    [[VAL_I1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I1]], i32 1, i32 [[I_I1]]
584; CHECK-NEXT:    [[I_I2:%.*]] = extractelement <4 x i32> [[I]], i32 2
585; CHECK-NEXT:    [[PTR_I2:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 2
586; CHECK-NEXT:    [[VAL_I2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I2]], i32 2, i32 [[I_I2]]
587; CHECK-NEXT:    [[I_I3:%.*]] = extractelement <4 x i32> [[I]], i32 3
588; CHECK-NEXT:    [[PTR_I3:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 3
589; CHECK-NEXT:    [[VAL_I3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I3]], i32 3, i32 [[I_I3]]
590; CHECK-NEXT:    store float* [[VAL_I0]], float** [[DEST_I0]], align 32
591; CHECK-NEXT:    store float* [[VAL_I1]], float** [[DEST_I1]], align 8
592; CHECK-NEXT:    store float* [[VAL_I2]], float** [[DEST_I2]], align 16
593; CHECK-NEXT:    store float* [[VAL_I3]], float** [[DEST_I3]], align 8
594; CHECK-NEXT:    ret void
595;
596  float *%other) {
597  %val = getelementptr inbounds [4 x float], <4 x [4 x float] *> %ptr,
598  <4 x i32> <i32 0, i32 1, i32 2, i32 3>,
599  <4 x i32> %i
600  store <4 x float *> %val, <4 x float *> *%dest
601  ret void
602}
603
604; Test combinations of vector and non-vector PHIs.
605define <4 x float> @f14(<4 x float> %acc, i32 %count) {
606; CHECK-LABEL: @f14(
607; CHECK-NEXT:  entry:
608; CHECK-NEXT:    [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i32 0
609; CHECK-NEXT:    [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i32 1
610; CHECK-NEXT:    [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i32 2
611; CHECK-NEXT:    [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i32 3
612; CHECK-NEXT:    br label [[LOOP:%.*]]
613; CHECK:       loop:
614; CHECK-NEXT:    [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ]
615; CHECK-NEXT:    [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], [[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], [[LOOP]] ]
616; CHECK-NEXT:    [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ]
617; CHECK-NEXT:    [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ]
618; CHECK-NEXT:    [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ]
619; CHECK-NEXT:    [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i32 0
620; CHECK-NEXT:    [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i32 1
621; CHECK-NEXT:    [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i32 2
622; CHECK-NEXT:    [[THIS_ACC:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO2]], float [[THIS_ACC_I3]], i32 3
623; CHECK-NEXT:    [[FOO:%.*]] = call <4 x float> @ext(<4 x float> [[THIS_ACC]])
624; CHECK-NEXT:    [[FOO_I0:%.*]] = extractelement <4 x float> [[FOO]], i32 0
625; CHECK-NEXT:    [[NEXT_ACC_I0]] = fadd float [[THIS_ACC_I0]], [[FOO_I0]]
626; CHECK-NEXT:    [[FOO_I1:%.*]] = extractelement <4 x float> [[FOO]], i32 1
627; CHECK-NEXT:    [[NEXT_ACC_I1]] = fadd float [[THIS_ACC_I1]], [[FOO_I1]]
628; CHECK-NEXT:    [[FOO_I2:%.*]] = extractelement <4 x float> [[FOO]], i32 2
629; CHECK-NEXT:    [[NEXT_ACC_I2]] = fadd float [[THIS_ACC_I2]], [[FOO_I2]]
630; CHECK-NEXT:    [[FOO_I3:%.*]] = extractelement <4 x float> [[FOO]], i32 3
631; CHECK-NEXT:    [[NEXT_ACC_I3]] = fadd float [[THIS_ACC_I3]], [[FOO_I3]]
632; CHECK-NEXT:    [[NEXT_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEXT_ACC_I0]], i32 0
633; CHECK-NEXT:    [[NEXT_ACC_UPTO1:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO0]], float [[NEXT_ACC_I1]], i32 1
634; CHECK-NEXT:    [[NEXT_ACC_UPTO2:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO1]], float [[NEXT_ACC_I2]], i32 2
635; CHECK-NEXT:    [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i32 3
636; CHECK-NEXT:    [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1
637; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0
638; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
639; CHECK:       exit:
640; CHECK-NEXT:    ret <4 x float> [[NEXT_ACC]]
641;
642entry:
643  br label %loop
644
645loop:
646  %this_acc = phi <4 x float> [ %acc, %entry ], [ %next_acc, %loop ]
647  %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
648  %foo = call <4 x float> @ext(<4 x float> %this_acc)
649  %next_acc = fadd <4 x float> %this_acc, %foo
650  %next_count = sub i32 %this_count, 1
651  %cmp = icmp eq i32 %next_count, 0
652  br i1 %cmp, label %loop, label %exit
653
654exit:
655  ret <4 x float> %next_acc
656}
657
658; Test unary operator scalarization.
659define void @f15(<4 x float> %init, <4 x float> *%base, i32 %count) {
660; CHECK-LABEL: @f15(
661; CHECK-NEXT:  entry:
662; CHECK-NEXT:    br label [[LOOP:%.*]]
663; CHECK:       loop:
664; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
665; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
666; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x float>, <4 x float>* [[BASE:%.*]], i32 [[I]]
667; CHECK-NEXT:    [[PTR_I0:%.*]] = bitcast <4 x float>* [[PTR]] to float*
668; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, float* [[PTR_I0]], align 16
669; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr float, float* [[PTR_I0]], i32 1
670; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, float* [[PTR_I1]], align 4
671; CHECK-NEXT:    [[PTR_I2:%.*]] = getelementptr float, float* [[PTR_I0]], i32 2
672; CHECK-NEXT:    [[VAL_I2:%.*]] = load float, float* [[PTR_I2]], align 8
673; CHECK-NEXT:    [[PTR_I3:%.*]] = getelementptr float, float* [[PTR_I0]], i32 3
674; CHECK-NEXT:    [[VAL_I3:%.*]] = load float, float* [[PTR_I3]], align 4
675; CHECK-NEXT:    [[NEG_I0:%.*]] = fneg float [[VAL_I0]]
676; CHECK-NEXT:    [[NEG_I1:%.*]] = fneg float [[VAL_I1]]
677; CHECK-NEXT:    [[NEG_I2:%.*]] = fneg float [[VAL_I2]]
678; CHECK-NEXT:    [[NEG_I3:%.*]] = fneg float [[VAL_I3]]
679; CHECK-NEXT:    [[NEG_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEG_I0]], i32 0
680; CHECK-NEXT:    [[NEG_UPTO1:%.*]] = insertelement <4 x float> [[NEG_UPTO0]], float [[NEG_I1]], i32 1
681; CHECK-NEXT:    [[NEG_UPTO2:%.*]] = insertelement <4 x float> [[NEG_UPTO1]], float [[NEG_I2]], i32 2
682; CHECK-NEXT:    [[NEG:%.*]] = insertelement <4 x float> [[NEG_UPTO2]], float [[NEG_I3]], i32 3
683; CHECK-NEXT:    [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[NEG]])
684; CHECK-NEXT:    [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0
685; CHECK-NEXT:    [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00
686; CHECK-NEXT:    [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1
687; CHECK-NEXT:    [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00
688; CHECK-NEXT:    [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2
689; CHECK-NEXT:    [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00
690; CHECK-NEXT:    [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3
691; CHECK-NEXT:    [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00
692; CHECK-NEXT:    [[SEL_I0:%.*]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00
693; CHECK-NEXT:    [[SEL_I1:%.*]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00
694; CHECK-NEXT:    [[SEL_I2:%.*]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00
695; CHECK-NEXT:    [[SEL_I3:%.*]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00
696; CHECK-NEXT:    store float [[SEL_I0]], float* [[PTR_I0]], align 16
697; CHECK-NEXT:    store float [[SEL_I1]], float* [[PTR_I1]], align 4
698; CHECK-NEXT:    store float [[SEL_I2]], float* [[PTR_I2]], align 8
699; CHECK-NEXT:    store float [[SEL_I3]], float* [[PTR_I3]], align 4
700; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
701; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
702; CHECK:       exit:
703; CHECK-NEXT:    ret void
704;
705entry:
706  br label %loop
707
708loop:
709  %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
710  %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
711  %nexti = sub i32 %i, 1
712
713  %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i
714  %val = load <4 x float> , <4 x float> *%ptr
715  %neg = fneg <4 x float> %val
716  %call = call <4 x float> @ext(<4 x float> %neg)
717  %cmp = fcmp ogt <4 x float> %call,
718  <float 1.0, float 2.0, float 3.0, float 4.0>
719  %sel = select <4 x i1> %cmp, <4 x float> %call,
720  <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
721  store <4 x float> %sel, <4 x float> *%ptr
722
723  %test = icmp eq i32 %nexti, 0
724  br i1 %test, label %loop, label %exit
725
726exit:
727  ret void
728}
729
730; Check that IR flags are preserved.
731define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) {
732; CHECK-LABEL: @f16(
733; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0
734; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0
735; CHECK-NEXT:    [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]]
736; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1
737; CHECK-NEXT:    [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1
738; CHECK-NEXT:    [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]]
739; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0
740; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1
741; CHECK-NEXT:    ret <2 x i32> [[RES]]
742;
743  %res = add nuw nsw <2 x i32> %i, %j
744  ret <2 x i32> %res
745}
746define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) {
747; CHECK-LABEL: @f17(
748; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0
749; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0
750; CHECK-NEXT:    [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]]
751; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1
752; CHECK-NEXT:    [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1
753; CHECK-NEXT:    [[RES_I1:%.*]] = sdiv exact i32 [[I_I1]], [[J_I1]]
754; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0
755; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1
756; CHECK-NEXT:    ret <2 x i32> [[RES]]
757;
758  %res = sdiv exact <2 x i32> %i, %j
759  ret <2 x i32> %res
760}
761define <2 x float> @f18(<2 x float> %x, <2 x float> %y) {
762; CHECK-LABEL: @f18(
763; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
764; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0
765; CHECK-NEXT:    [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]]
766; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
767; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1
768; CHECK-NEXT:    [[RES_I1:%.*]] = fadd fast float [[X_I1]], [[Y_I1]]
769; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0
770; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
771; CHECK-NEXT:    ret <2 x float> [[RES]]
772;
773  %res = fadd fast <2 x float> %x, %y
774  ret <2 x float> %res
775}
776define <2 x float> @f19(<2 x float> %x) {
777; CHECK-LABEL: @f19(
778; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
779; CHECK-NEXT:    [[RES_I0:%.*]] = fneg fast float [[X_I0]]
780; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
781; CHECK-NEXT:    [[RES_I1:%.*]] = fneg fast float [[X_I1]]
782; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0
783; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
784; CHECK-NEXT:    ret <2 x float> [[RES]]
785;
786  %res = fneg fast <2 x float> %x
787  ret <2 x float> %res
788}
789define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) {
790; CHECK-LABEL: @f20(
791; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
792; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0
793; CHECK-NEXT:    [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]]
794; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
795; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1
796; CHECK-NEXT:    [[RES_I1:%.*]] = fcmp fast ogt float [[X_I1]], [[Y_I1]]
797; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[RES_I0]], i32 0
798; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x i1> [[RES_UPTO0]], i1 [[RES_I1]], i32 1
799; CHECK-NEXT:    ret <2 x i1> [[RES]]
800;
801  %res = fcmp fast ogt <2 x float> %x, %y
802  ret <2 x i1> %res
803}
804declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
805define <2 x float> @f21(<2 x float> %x) {
806; CHECK-LABEL: @f21(
807; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
808; CHECK-NEXT:    [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]])
809; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
810; CHECK-NEXT:    [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]])
811; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0
812; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
813; CHECK-NEXT:    ret <2 x float> [[RES]]
814;
815  %res = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
816  ret <2 x float> %res
817}
818declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
819define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
820; CHECK-LABEL: @f22(
821; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
822; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0
823; CHECK-NEXT:    [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i32 0
824; CHECK-NEXT:    [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]])
825; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
826; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1
827; CHECK-NEXT:    [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i32 1
828; CHECK-NEXT:    [[RES_I1:%.*]] = call fast float @llvm.fma.f32(float [[X_I1]], float [[Y_I1]], float [[Z_I1]])
829; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0
830; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
831; CHECK-NEXT:    ret <2 x float> [[RES]]
832;
833  %res = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z)
834  ret <2 x float> %res
835}
836
837; See https://reviews.llvm.org/D83101#2133062
838define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) {
839; CHECK-LABEL: @f23_crash(
840; CHECK-NEXT:    [[V0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i32 0
841; CHECK-NEXT:    [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[V0]], i32 0
842; CHECK-NEXT:    [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i32 1
843; CHECK-NEXT:    ret <2 x i32> [[T1]]
844;
845  %v0 = extractelement <2 x i32> %srcvec, i32 0
846  %t0 = insertelement <2 x i32> undef, i32 %v0, i32 0
847  %t1 = insertelement <2 x i32> %t0, i32 %v1, i32 1
848  ret <2 x i32> %t1
849}
850
851!0 = !{ !"root" }
852!1 = !{ !"set1", !0 }
853!2 = !{ !"set2", !0 }
854!3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} }
855!4 = !{ float 4.0 }
856!5 = !{ i64 0, i64 8, null }
857!13 = distinct !{}
858