1; RUN: opt < %s -passes='print<cost-model>' -mtriple=arm-apple-ios6.0.0 -mcpu=cortex-a8 2>&1 -disable-output | FileCheck %s --check-prefix=COST
2; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
3; Make sure that ARM backend with NEON handles vselect.
4
5define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
6; CHECK: vmax.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
7    %cmpres = icmp sgt <4 x i32> %a, %b
8    %maxres = select <4 x i1> %cmpres, <4 x i32> %a,  <4 x i32> %b
9    store <4 x i32> %maxres, <4 x i32>* %m
10    ret void
11}
12
13%T0_10 = type <16 x i16>
14%T1_10 = type <16 x i1>
15; CHECK-LABEL: func_blend10:
16define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
17                           %T1_10* %blend, %T0_10* %storeaddr) {
18  %v0 = load %T0_10, %T0_10* %loadaddr
19  %v1 = load %T0_10, %T0_10* %loadaddr2
20  %c = icmp slt %T0_10 %v0, %v1
21; CHECK: vmin.s16
22; CHECK: vmin.s16
23; COST: func_blend10
24; COST: cost of 0 {{.*}} icmp
25; COST: cost of 4 {{.*}} select
26  %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
27  store %T0_10 %r, %T0_10* %storeaddr
28  ret void
29}
30%T0_14 = type <8 x i32>
31%T1_14 = type <8 x i1>
32; CHECK-LABEL: func_blend14:
33define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
34                           %T1_14* %blend, %T0_14* %storeaddr) {
35  %v0 = load %T0_14, %T0_14* %loadaddr
36  %v1 = load %T0_14, %T0_14* %loadaddr2
37  %c = icmp slt %T0_14 %v0, %v1
38; CHECK: vmin.s32
39; CHECK: vmin.s32
40; COST: func_blend14
41; COST: cost of 0 {{.*}} icmp
42; COST: cost of 4 {{.*}} select
43  %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
44  store %T0_14 %r, %T0_14* %storeaddr
45  ret void
46}
47%T0_15 = type <16 x i32>
48%T1_15 = type <16 x i1>
49; CHECK-LABEL: func_blend15:
50define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
51                           %T1_15* %blend, %T0_15* %storeaddr) {
52; CHECK: vmin.s32
53; CHECK: vmin.s32
54  %v0 = load %T0_15, %T0_15* %loadaddr
55  %v1 = load %T0_15, %T0_15* %loadaddr2
56  %c = icmp slt %T0_15 %v0, %v1
57; COST: func_blend15
58; COST: cost of 0 {{.*}} icmp
59; COST: cost of 8 {{.*}} select
60  %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
61  store %T0_15 %r, %T0_15* %storeaddr
62  ret void
63}
64
65; We adjusted the cost model of the following selects. When we improve code
66; lowering we also need to adjust the cost.
67%T0_18 = type <4 x i64>
68%T1_18 = type <4 x i1>
69define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
70                           %T1_18* %blend, %T0_18* %storeaddr) {
71; CHECK-LABEL: func_blend18:
72; CHECK:       @ %bb.0:
73; CHECK-NEXT:    .save {r4, r5, r6, r7, r11, lr}
74; CHECK-NEXT:    push {r4, r5, r6, r7, r11, lr}
75; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]!
76; CHECK-NEXT:    vld1.64 {d22, d23}, [r0:128]!
77; CHECK-NEXT:    vmov r4, r6, d16
78; CHECK-NEXT:    vld1.64 {d18, d19}, [r1:128]
79; CHECK-NEXT:    vld1.64 {d20, d21}, [r0:128]
80; CHECK-NEXT:    vmov lr, r12, d18
81; CHECK-NEXT:    mov r0, #0
82; CHECK-NEXT:    vmov r2, r1, d20
83; CHECK-NEXT:    subs r2, r2, lr
84; CHECK-NEXT:    vmov r7, lr, d17
85; CHECK-NEXT:    vmov r2, r5, d22
86; CHECK-NEXT:    sbcs r1, r1, r12
87; CHECK-NEXT:    mov r1, #0
88; CHECK-NEXT:    movlt r1, #1
89; CHECK-NEXT:    cmp r1, #0
90; CHECK-NEXT:    mvnne r1, #0
91; CHECK-NEXT:    subs r2, r2, r4
92; CHECK-NEXT:    sbcs r6, r5, r6
93; CHECK-NEXT:    vmov r2, r12, d19
94; CHECK-NEXT:    vmov r5, r4, d21
95; CHECK-NEXT:    mov r6, #0
96; CHECK-NEXT:    movlt r6, #1
97; CHECK-NEXT:    cmp r6, #0
98; CHECK-NEXT:    mvnne r6, #0
99; CHECK-NEXT:    subs r2, r5, r2
100; CHECK-NEXT:    sbcs r4, r4, r12
101; CHECK-NEXT:    mov r2, #0
102; CHECK-NEXT:    vmov r4, r5, d23
103; CHECK-NEXT:    movlt r2, #1
104; CHECK-NEXT:    subs r7, r4, r7
105; CHECK-NEXT:    sbcs r7, r5, lr
106; CHECK-NEXT:    movlt r0, #1
107; CHECK-NEXT:    cmp r0, #0
108; CHECK-NEXT:    mvnne r0, #0
109; CHECK-NEXT:    cmp r2, #0
110; CHECK-NEXT:    vdup.32 d25, r0
111; CHECK-NEXT:    mvnne r2, #0
112; CHECK-NEXT:    vdup.32 d24, r6
113; CHECK-NEXT:    vdup.32 d27, r2
114; CHECK-NEXT:    vbit q8, q11, q12
115; CHECK-NEXT:    vdup.32 d26, r1
116; CHECK-NEXT:    vbit q9, q10, q13
117; CHECK-NEXT:    vst1.64 {d16, d17}, [r3:128]!
118; CHECK-NEXT:    vst1.64 {d18, d19}, [r3:128]
119; CHECK-NEXT:    pop {r4, r5, r6, r7, r11, lr}
120; CHECK-NEXT:    mov pc, lr
121  %v0 = load %T0_18, %T0_18* %loadaddr
122  %v1 = load %T0_18, %T0_18* %loadaddr2
123  %c = icmp slt %T0_18 %v0, %v1
124; COST: func_blend18
125; COST: cost of 0 {{.*}} icmp
126; COST: cost of 21 {{.*}} select
127  %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1
128  store %T0_18 %r, %T0_18* %storeaddr
129  ret void
130}
131%T0_19 = type <8 x i64>
132%T1_19 = type <8 x i1>
133define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
134                           %T1_19* %blend, %T0_19* %storeaddr) {
135; CHECK-LABEL: func_blend19:
136; CHECK:       @ %bb.0:
137; CHECK-NEXT:    .save {r4, r5, r6, lr}
138; CHECK-NEXT:    push {r4, r5, r6, lr}
139; CHECK-NEXT:    vld1.64 {d28, d29}, [r1:128]!
140; CHECK-NEXT:    mov lr, #0
141; CHECK-NEXT:    vld1.64 {d30, d31}, [r0:128]!
142; CHECK-NEXT:    vld1.64 {d20, d21}, [r1:128]!
143; CHECK-NEXT:    vld1.64 {d24, d25}, [r0:128]!
144; CHECK-NEXT:    vld1.64 {d22, d23}, [r1:128]!
145; CHECK-NEXT:    vld1.64 {d26, d27}, [r0:128]!
146; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]
147; CHECK-NEXT:    vld1.64 {d18, d19}, [r0:128]
148; CHECK-NEXT:    vmov r0, r12, d16
149; CHECK-NEXT:    vmov r1, r2, d18
150; CHECK-NEXT:    subs r0, r1, r0
151; CHECK-NEXT:    vmov r1, r4, d25
152; CHECK-NEXT:    sbcs r0, r2, r12
153; CHECK-NEXT:    mov r12, #0
154; CHECK-NEXT:    vmov r2, r0, d21
155; CHECK-NEXT:    movlt r12, #1
156; CHECK-NEXT:    cmp r12, #0
157; CHECK-NEXT:    mvnne r12, #0
158; CHECK-NEXT:    subs r1, r1, r2
159; CHECK-NEXT:    sbcs r0, r4, r0
160; CHECK-NEXT:    vmov r2, r4, d26
161; CHECK-NEXT:    mov r0, #0
162; CHECK-NEXT:    movlt r0, #1
163; CHECK-NEXT:    cmp r0, #0
164; CHECK-NEXT:    mvnne r0, #0
165; CHECK-NEXT:    vdup.32 d1, r0
166; CHECK-NEXT:    vmov r0, r1, d22
167; CHECK-NEXT:    subs r0, r2, r0
168; CHECK-NEXT:    mov r2, #0
169; CHECK-NEXT:    sbcs r0, r4, r1
170; CHECK-NEXT:    vmov r4, r5, d31
171; CHECK-NEXT:    vmov r0, r1, d29
172; CHECK-NEXT:    movlt r2, #1
173; CHECK-NEXT:    cmp r2, #0
174; CHECK-NEXT:    mvnne r2, #0
175; CHECK-NEXT:    subs r0, r4, r0
176; CHECK-NEXT:    sbcs r0, r5, r1
177; CHECK-NEXT:    vmov r4, r5, d30
178; CHECK-NEXT:    mov r0, #0
179; CHECK-NEXT:    movlt r0, #1
180; CHECK-NEXT:    cmp r0, #0
181; CHECK-NEXT:    mvnne r0, #0
182; CHECK-NEXT:    vdup.32 d3, r0
183; CHECK-NEXT:    vmov r0, r1, d28
184; CHECK-NEXT:    subs r0, r4, r0
185; CHECK-NEXT:    sbcs r0, r5, r1
186; CHECK-NEXT:    vmov r4, r5, d24
187; CHECK-NEXT:    mov r0, #0
188; CHECK-NEXT:    movlt r0, #1
189; CHECK-NEXT:    cmp r0, #0
190; CHECK-NEXT:    mvnne r0, #0
191; CHECK-NEXT:    vdup.32 d2, r0
192; CHECK-NEXT:    vmov r0, r1, d20
193; CHECK-NEXT:    vbit q14, q15, q1
194; CHECK-NEXT:    subs r0, r4, r0
195; CHECK-NEXT:    sbcs r0, r5, r1
196; CHECK-NEXT:    vmov r1, r4, d17
197; CHECK-NEXT:    vmov r5, r6, d19
198; CHECK-NEXT:    mov r0, #0
199; CHECK-NEXT:    movlt r0, #1
200; CHECK-NEXT:    cmp r0, #0
201; CHECK-NEXT:    mvnne r0, #0
202; CHECK-NEXT:    vdup.32 d0, r0
203; CHECK-NEXT:    vbit q10, q12, q0
204; CHECK-NEXT:    subs r1, r5, r1
205; CHECK-NEXT:    sbcs r1, r6, r4
206; CHECK-NEXT:    vmov r4, r5, d27
207; CHECK-NEXT:    vmov r0, r1, d23
208; CHECK-NEXT:    mov r6, #0
209; CHECK-NEXT:    movlt r6, #1
210; CHECK-NEXT:    subs r0, r4, r0
211; CHECK-NEXT:    sbcs r0, r5, r1
212; CHECK-NEXT:    movlt lr, #1
213; CHECK-NEXT:    cmp lr, #0
214; CHECK-NEXT:    mvnne lr, #0
215; CHECK-NEXT:    cmp r6, #0
216; CHECK-NEXT:    vdup.32 d31, lr
217; CHECK-NEXT:    mvnne r6, #0
218; CHECK-NEXT:    vdup.32 d30, r2
219; CHECK-NEXT:    vdup.32 d3, r6
220; CHECK-NEXT:    vbit q11, q13, q15
221; CHECK-NEXT:    vdup.32 d2, r12
222; CHECK-NEXT:    vst1.64 {d28, d29}, [r3:128]!
223; CHECK-NEXT:    vbit q8, q9, q1
224; CHECK-NEXT:    vst1.64 {d20, d21}, [r3:128]!
225; CHECK-NEXT:    vst1.64 {d22, d23}, [r3:128]!
226; CHECK-NEXT:    vst1.64 {d16, d17}, [r3:128]
227; CHECK-NEXT:    pop {r4, r5, r6, lr}
228; CHECK-NEXT:    mov pc, lr
229  %v0 = load %T0_19, %T0_19* %loadaddr
230  %v1 = load %T0_19, %T0_19* %loadaddr2
231  %c = icmp slt %T0_19 %v0, %v1
232; COST: func_blend19
233; COST: cost of 0 {{.*}} icmp
234; COST: cost of 54 {{.*}} select
235  %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1
236  store %T0_19 %r, %T0_19* %storeaddr
237  ret void
238}
239%T0_20 = type <16 x i64>
240%T1_20 = type <16 x i1>
241define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
242                           %T1_20* %blend, %T0_20* %storeaddr) {
243; CHECK-LABEL: func_blend20:
244; CHECK:       @ %bb.0:
245; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
246; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
247; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
248; CHECK-NEXT:    vpush {d8, d9, d10, d11}
249; CHECK-NEXT:    mov r8, r1
250; CHECK-NEXT:    mov lr, r0
251; CHECK-NEXT:    vld1.64 {d16, d17}, [r8:128]!
252; CHECK-NEXT:    add r9, r0, #64
253; CHECK-NEXT:    add r10, r1, #64
254; CHECK-NEXT:    mov r12, #0
255; CHECK-NEXT:    vld1.64 {d22, d23}, [lr:128]!
256; CHECK-NEXT:    vld1.64 {d18, d19}, [r8:128]!
257; CHECK-NEXT:    vld1.64 {d20, d21}, [lr:128]!
258; CHECK-NEXT:    vmov r6, r4, d19
259; CHECK-NEXT:    vmov r5, r7, d21
260; CHECK-NEXT:    vld1.64 {d4, d5}, [r9:128]!
261; CHECK-NEXT:    vld1.64 {d6, d7}, [r10:128]!
262; CHECK-NEXT:    vld1.64 {d0, d1}, [r10:128]!
263; CHECK-NEXT:    vld1.64 {d2, d3}, [r9:128]!
264; CHECK-NEXT:    subs r6, r5, r6
265; CHECK-NEXT:    sbcs r4, r7, r4
266; CHECK-NEXT:    vmov r5, r6, d18
267; CHECK-NEXT:    vmov r7, r2, d20
268; CHECK-NEXT:    mov r4, #0
269; CHECK-NEXT:    movlt r4, #1
270; CHECK-NEXT:    cmp r4, #0
271; CHECK-NEXT:    mvnne r4, #0
272; CHECK-NEXT:    vdup.32 d31, r4
273; CHECK-NEXT:    subs r5, r7, r5
274; CHECK-NEXT:    sbcs r2, r2, r6
275; CHECK-NEXT:    vmov r4, r5, d3
276; CHECK-NEXT:    mov r2, #0
277; CHECK-NEXT:    movlt r2, #1
278; CHECK-NEXT:    cmp r2, #0
279; CHECK-NEXT:    mvnne r2, #0
280; CHECK-NEXT:    vdup.32 d30, r2
281; CHECK-NEXT:    vmov r0, r2, d1
282; CHECK-NEXT:    subs r0, r4, r0
283; CHECK-NEXT:    sbcs r0, r5, r2
284; CHECK-NEXT:    vmov r4, r5, d2
285; CHECK-NEXT:    mov r0, #0
286; CHECK-NEXT:    movlt r0, #1
287; CHECK-NEXT:    cmp r0, #0
288; CHECK-NEXT:    mvnne r0, #0
289; CHECK-NEXT:    vdup.32 d9, r0
290; CHECK-NEXT:    vmov r0, r2, d0
291; CHECK-NEXT:    subs r0, r4, r0
292; CHECK-NEXT:    sbcs r0, r5, r2
293; CHECK-NEXT:    vmov r4, r5, d5
294; CHECK-NEXT:    mov r0, #0
295; CHECK-NEXT:    movlt r0, #1
296; CHECK-NEXT:    cmp r0, #0
297; CHECK-NEXT:    mvnne r0, #0
298; CHECK-NEXT:    vdup.32 d8, r0
299; CHECK-NEXT:    vmov r0, r2, d7
300; CHECK-NEXT:    subs r0, r4, r0
301; CHECK-NEXT:    sbcs r0, r5, r2
302; CHECK-NEXT:    vmov r4, r5, d4
303; CHECK-NEXT:    mov r0, #0
304; CHECK-NEXT:    movlt r0, #1
305; CHECK-NEXT:    cmp r0, #0
306; CHECK-NEXT:    mvnne r0, #0
307; CHECK-NEXT:    vdup.32 d11, r0
308; CHECK-NEXT:    vmov r0, r2, d6
309; CHECK-NEXT:    subs r0, r4, r0
310; CHECK-NEXT:    sbcs r0, r5, r2
311; CHECK-NEXT:    vmov r4, r5, d23
312; CHECK-NEXT:    mov r0, #0
313; CHECK-NEXT:    movlt r0, #1
314; CHECK-NEXT:    cmp r0, #0
315; CHECK-NEXT:    mvnne r0, #0
316; CHECK-NEXT:    vdup.32 d10, r0
317; CHECK-NEXT:    vmov r0, r2, d17
318; CHECK-NEXT:    subs r0, r4, r0
319; CHECK-NEXT:    sbcs r0, r5, r2
320; CHECK-NEXT:    vmov r4, r5, d22
321; CHECK-NEXT:    mov r0, #0
322; CHECK-NEXT:    movlt r0, #1
323; CHECK-NEXT:    cmp r0, #0
324; CHECK-NEXT:    mvnne r0, #0
325; CHECK-NEXT:    vdup.32 d25, r0
326; CHECK-NEXT:    vmov r0, r2, d16
327; CHECK-NEXT:    subs r0, r4, r0
328; CHECK-NEXT:    sbcs r0, r5, r2
329; CHECK-NEXT:    mov r0, #0
330; CHECK-NEXT:    movlt r0, #1
331; CHECK-NEXT:    cmp r0, #0
332; CHECK-NEXT:    mvnne r0, #0
333; CHECK-NEXT:    vdup.32 d24, r0
334; CHECK-NEXT:    vorr q13, q12, q12
335; CHECK-NEXT:    vbsl q13, q11, q8
336; CHECK-NEXT:    vld1.64 {d24, d25}, [r9:128]!
337; CHECK-NEXT:    vorr q8, q5, q5
338; CHECK-NEXT:    vld1.64 {d28, d29}, [r10:128]!
339; CHECK-NEXT:    vbsl q8, q2, q3
340; CHECK-NEXT:    vld1.64 {d6, d7}, [r8:128]!
341; CHECK-NEXT:    vld1.64 {d22, d23}, [r8:128]
342; CHECK-NEXT:    vld1.64 {d4, d5}, [lr:128]!
343; CHECK-NEXT:    vbif q10, q9, q15
344; CHECK-NEXT:    vorr q9, q4, q4
345; CHECK-NEXT:    vmov r0, r2, d22
346; CHECK-NEXT:    vbsl q9, q1, q0
347; CHECK-NEXT:    vld1.64 {d30, d31}, [lr:128]
348; CHECK-NEXT:    mov lr, #0
349; CHECK-NEXT:    vmov r7, r5, d30
350; CHECK-NEXT:    vld1.64 {d0, d1}, [r9:128]
351; CHECK-NEXT:    vld1.64 {d2, d3}, [r10:128]
352; CHECK-NEXT:    subs r0, r7, r0
353; CHECK-NEXT:    sbcs r0, r5, r2
354; CHECK-NEXT:    vmov r5, r4, d24
355; CHECK-NEXT:    vmov r0, r7, d28
356; CHECK-NEXT:    movlt lr, #1
357; CHECK-NEXT:    cmp lr, #0
358; CHECK-NEXT:    mvnne lr, #0
359; CHECK-NEXT:    subs r0, r5, r0
360; CHECK-NEXT:    sbcs r0, r4, r7
361; CHECK-NEXT:    vmov r7, r5, d29
362; CHECK-NEXT:    vmov r4, r6, d25
363; CHECK-NEXT:    mov r0, #0
364; CHECK-NEXT:    movlt r0, #1
365; CHECK-NEXT:    cmp r0, #0
366; CHECK-NEXT:    mvnne r0, #0
367; CHECK-NEXT:    subs r7, r4, r7
368; CHECK-NEXT:    mov r4, #0
369; CHECK-NEXT:    sbcs r7, r6, r5
370; CHECK-NEXT:    vmov r5, r1, d31
371; CHECK-NEXT:    vmov r7, r6, d23
372; CHECK-NEXT:    movlt r4, #1
373; CHECK-NEXT:    cmp r4, #0
374; CHECK-NEXT:    mvnne r4, #0
375; CHECK-NEXT:    subs r7, r5, r7
376; CHECK-NEXT:    mov r5, #0
377; CHECK-NEXT:    sbcs r1, r1, r6
378; CHECK-NEXT:    vmov r6, r2, d5
379; CHECK-NEXT:    vmov r1, r7, d7
380; CHECK-NEXT:    movlt r5, #1
381; CHECK-NEXT:    cmp r5, #0
382; CHECK-NEXT:    mvnne r5, #0
383; CHECK-NEXT:    subs r1, r6, r1
384; CHECK-NEXT:    sbcs r1, r2, r7
385; CHECK-NEXT:    vmov r6, r7, d4
386; CHECK-NEXT:    mov r1, #0
387; CHECK-NEXT:    movlt r1, #1
388; CHECK-NEXT:    cmp r1, #0
389; CHECK-NEXT:    mvnne r1, #0
390; CHECK-NEXT:    vdup.32 d9, r1
391; CHECK-NEXT:    vmov r1, r2, d6
392; CHECK-NEXT:    subs r1, r6, r1
393; CHECK-NEXT:    sbcs r1, r7, r2
394; CHECK-NEXT:    vmov r6, r7, d0
395; CHECK-NEXT:    mov r1, #0
396; CHECK-NEXT:    movlt r1, #1
397; CHECK-NEXT:    cmp r1, #0
398; CHECK-NEXT:    mvnne r1, #0
399; CHECK-NEXT:    vdup.32 d8, r1
400; CHECK-NEXT:    vmov r1, r2, d2
401; CHECK-NEXT:    vbif q2, q3, q4
402; CHECK-NEXT:    vdup.32 d7, r5
403; CHECK-NEXT:    vdup.32 d9, r4
404; CHECK-NEXT:    vmov r4, r5, d1
405; CHECK-NEXT:    vdup.32 d8, r0
406; CHECK-NEXT:    mov r0, r3
407; CHECK-NEXT:    vst1.64 {d26, d27}, [r0:128]!
408; CHECK-NEXT:    vbif q12, q14, q4
409; CHECK-NEXT:    vdup.32 d6, lr
410; CHECK-NEXT:    vbit q11, q15, q3
411; CHECK-NEXT:    vst1.64 {d20, d21}, [r0:128]!
412; CHECK-NEXT:    subs r1, r6, r1
413; CHECK-NEXT:    mov r6, #0
414; CHECK-NEXT:    sbcs r1, r7, r2
415; CHECK-NEXT:    vmov r1, r2, d3
416; CHECK-NEXT:    movlt r6, #1
417; CHECK-NEXT:    subs r1, r4, r1
418; CHECK-NEXT:    sbcs r1, r5, r2
419; CHECK-NEXT:    movlt r12, #1
420; CHECK-NEXT:    cmp r12, #0
421; CHECK-NEXT:    mvnne r12, #0
422; CHECK-NEXT:    cmp r6, #0
423; CHECK-NEXT:    vdup.32 d27, r12
424; CHECK-NEXT:    mvnne r6, #0
425; CHECK-NEXT:    vdup.32 d26, r6
426; CHECK-NEXT:    vorr q10, q13, q13
427; CHECK-NEXT:    vbsl q10, q0, q1
428; CHECK-NEXT:    vst1.64 {d4, d5}, [r0:128]!
429; CHECK-NEXT:    vst1.64 {d22, d23}, [r0:128]
430; CHECK-NEXT:    add r0, r3, #64
431; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]!
432; CHECK-NEXT:    vst1.64 {d18, d19}, [r0:128]!
433; CHECK-NEXT:    vst1.64 {d24, d25}, [r0:128]!
434; CHECK-NEXT:    vst1.64 {d20, d21}, [r0:128]
435; CHECK-NEXT:    vpop {d8, d9, d10, d11}
436; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, lr}
437; CHECK-NEXT:    mov pc, lr
438  %v0 = load %T0_20, %T0_20* %loadaddr
439  %v1 = load %T0_20, %T0_20* %loadaddr2
440  %c = icmp slt %T0_20 %v0, %v1
441; COST: func_blend20
442; COST: cost of 0 {{.*}} icmp
443; COST: cost of 108 {{.*}} select
444  %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1
445  store %T0_20 %r, %T0_20* %storeaddr
446  ret void
447}
448