1; RUN: opt < %s -passes='print<cost-model>' -mtriple=arm-apple-ios6.0.0 -mcpu=cortex-a8 2>&1 -disable-output | FileCheck %s --check-prefix=COST 2; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s 3; Make sure that ARM backend with NEON handles vselect. 4 5define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) { 6; CHECK: vmax.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} 7 %cmpres = icmp sgt <4 x i32> %a, %b 8 %maxres = select <4 x i1> %cmpres, <4 x i32> %a, <4 x i32> %b 9 store <4 x i32> %maxres, <4 x i32>* %m 10 ret void 11} 12 13%T0_10 = type <16 x i16> 14%T1_10 = type <16 x i1> 15; CHECK-LABEL: func_blend10: 16define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2, 17 %T1_10* %blend, %T0_10* %storeaddr) { 18 %v0 = load %T0_10, %T0_10* %loadaddr 19 %v1 = load %T0_10, %T0_10* %loadaddr2 20 %c = icmp slt %T0_10 %v0, %v1 21; CHECK: vmin.s16 22; CHECK: vmin.s16 23; COST: func_blend10 24; COST: cost of 0 {{.*}} icmp 25; COST: cost of 4 {{.*}} select 26 %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1 27 store %T0_10 %r, %T0_10* %storeaddr 28 ret void 29} 30%T0_14 = type <8 x i32> 31%T1_14 = type <8 x i1> 32; CHECK-LABEL: func_blend14: 33define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2, 34 %T1_14* %blend, %T0_14* %storeaddr) { 35 %v0 = load %T0_14, %T0_14* %loadaddr 36 %v1 = load %T0_14, %T0_14* %loadaddr2 37 %c = icmp slt %T0_14 %v0, %v1 38; CHECK: vmin.s32 39; CHECK: vmin.s32 40; COST: func_blend14 41; COST: cost of 0 {{.*}} icmp 42; COST: cost of 4 {{.*}} select 43 %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1 44 store %T0_14 %r, %T0_14* %storeaddr 45 ret void 46} 47%T0_15 = type <16 x i32> 48%T1_15 = type <16 x i1> 49; CHECK-LABEL: func_blend15: 50define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2, 51 %T1_15* %blend, %T0_15* %storeaddr) { 52; CHECK: vmin.s32 53; CHECK: vmin.s32 54 %v0 = load %T0_15, %T0_15* %loadaddr 55 %v1 = load %T0_15, %T0_15* %loadaddr2 56 %c = icmp slt %T0_15 %v0, %v1 57; COST: func_blend15 58; COST: cost of 0 {{.*}} icmp 59; COST: cost of 8 {{.*}} select 60 %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1 61 store %T0_15 %r, %T0_15* %storeaddr 62 ret void 63} 64 65; We adjusted the cost model of the following selects. When we improve code 66; lowering we also need to adjust the cost. 67%T0_18 = type <4 x i64> 68%T1_18 = type <4 x i1> 69define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2, 70 %T1_18* %blend, %T0_18* %storeaddr) { 71; CHECK-LABEL: func_blend18: 72; CHECK: @ %bb.0: 73; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} 74; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} 75; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]! 76; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128]! 77; CHECK-NEXT: vmov r4, r6, d16 78; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128] 79; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128] 80; CHECK-NEXT: vmov lr, r12, d18 81; CHECK-NEXT: mov r0, #0 82; CHECK-NEXT: vmov r2, r1, d20 83; CHECK-NEXT: subs r2, r2, lr 84; CHECK-NEXT: vmov r7, lr, d17 85; CHECK-NEXT: vmov r2, r5, d22 86; CHECK-NEXT: sbcs r1, r1, r12 87; CHECK-NEXT: mov r1, #0 88; CHECK-NEXT: movlt r1, #1 89; CHECK-NEXT: cmp r1, #0 90; CHECK-NEXT: mvnne r1, #0 91; CHECK-NEXT: subs r2, r2, r4 92; CHECK-NEXT: sbcs r6, r5, r6 93; CHECK-NEXT: vmov r2, r12, d19 94; CHECK-NEXT: vmov r5, r4, d21 95; CHECK-NEXT: mov r6, #0 96; CHECK-NEXT: movlt r6, #1 97; CHECK-NEXT: cmp r6, #0 98; CHECK-NEXT: mvnne r6, #0 99; CHECK-NEXT: subs r2, r5, r2 100; CHECK-NEXT: sbcs r4, r4, r12 101; CHECK-NEXT: mov r2, #0 102; CHECK-NEXT: vmov r4, r5, d23 103; CHECK-NEXT: movlt r2, #1 104; CHECK-NEXT: subs r7, r4, r7 105; CHECK-NEXT: sbcs r7, r5, lr 106; CHECK-NEXT: movlt r0, #1 107; CHECK-NEXT: cmp r0, #0 108; CHECK-NEXT: mvnne r0, #0 109; CHECK-NEXT: cmp r2, #0 110; CHECK-NEXT: vdup.32 d25, r0 111; CHECK-NEXT: mvnne r2, #0 112; CHECK-NEXT: vdup.32 d24, r6 113; CHECK-NEXT: vdup.32 d27, r2 114; CHECK-NEXT: vbit q8, q11, q12 115; CHECK-NEXT: vdup.32 d26, r1 116; CHECK-NEXT: vbit q9, q10, q13 117; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128]! 118; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128] 119; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr} 120; CHECK-NEXT: mov pc, lr 121 %v0 = load %T0_18, %T0_18* %loadaddr 122 %v1 = load %T0_18, %T0_18* %loadaddr2 123 %c = icmp slt %T0_18 %v0, %v1 124; COST: func_blend18 125; COST: cost of 0 {{.*}} icmp 126; COST: cost of 21 {{.*}} select 127 %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1 128 store %T0_18 %r, %T0_18* %storeaddr 129 ret void 130} 131%T0_19 = type <8 x i64> 132%T1_19 = type <8 x i1> 133define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2, 134 %T1_19* %blend, %T0_19* %storeaddr) { 135; CHECK-LABEL: func_blend19: 136; CHECK: @ %bb.0: 137; CHECK-NEXT: .save {r4, r5, r6, lr} 138; CHECK-NEXT: push {r4, r5, r6, lr} 139; CHECK-NEXT: vld1.64 {d28, d29}, [r1:128]! 140; CHECK-NEXT: mov lr, #0 141; CHECK-NEXT: vld1.64 {d30, d31}, [r0:128]! 142; CHECK-NEXT: vld1.64 {d20, d21}, [r1:128]! 143; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]! 144; CHECK-NEXT: vld1.64 {d22, d23}, [r1:128]! 145; CHECK-NEXT: vld1.64 {d26, d27}, [r0:128]! 146; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128] 147; CHECK-NEXT: vld1.64 {d18, d19}, [r0:128] 148; CHECK-NEXT: vmov r0, r12, d16 149; CHECK-NEXT: vmov r1, r2, d18 150; CHECK-NEXT: subs r0, r1, r0 151; CHECK-NEXT: vmov r1, r4, d25 152; CHECK-NEXT: sbcs r0, r2, r12 153; CHECK-NEXT: mov r12, #0 154; CHECK-NEXT: vmov r2, r0, d21 155; CHECK-NEXT: movlt r12, #1 156; CHECK-NEXT: cmp r12, #0 157; CHECK-NEXT: mvnne r12, #0 158; CHECK-NEXT: subs r1, r1, r2 159; CHECK-NEXT: sbcs r0, r4, r0 160; CHECK-NEXT: vmov r2, r4, d26 161; CHECK-NEXT: mov r0, #0 162; CHECK-NEXT: movlt r0, #1 163; CHECK-NEXT: cmp r0, #0 164; CHECK-NEXT: mvnne r0, #0 165; CHECK-NEXT: vdup.32 d1, r0 166; CHECK-NEXT: vmov r0, r1, d22 167; CHECK-NEXT: subs r0, r2, r0 168; CHECK-NEXT: mov r2, #0 169; CHECK-NEXT: sbcs r0, r4, r1 170; CHECK-NEXT: vmov r4, r5, d31 171; CHECK-NEXT: vmov r0, r1, d29 172; CHECK-NEXT: movlt r2, #1 173; CHECK-NEXT: cmp r2, #0 174; CHECK-NEXT: mvnne r2, #0 175; CHECK-NEXT: subs r0, r4, r0 176; CHECK-NEXT: sbcs r0, r5, r1 177; CHECK-NEXT: vmov r4, r5, d30 178; CHECK-NEXT: mov r0, #0 179; CHECK-NEXT: movlt r0, #1 180; CHECK-NEXT: cmp r0, #0 181; CHECK-NEXT: mvnne r0, #0 182; CHECK-NEXT: vdup.32 d3, r0 183; CHECK-NEXT: vmov r0, r1, d28 184; CHECK-NEXT: subs r0, r4, r0 185; CHECK-NEXT: sbcs r0, r5, r1 186; CHECK-NEXT: vmov r4, r5, d24 187; CHECK-NEXT: mov r0, #0 188; CHECK-NEXT: movlt r0, #1 189; CHECK-NEXT: cmp r0, #0 190; CHECK-NEXT: mvnne r0, #0 191; CHECK-NEXT: vdup.32 d2, r0 192; CHECK-NEXT: vmov r0, r1, d20 193; CHECK-NEXT: vbit q14, q15, q1 194; CHECK-NEXT: subs r0, r4, r0 195; CHECK-NEXT: sbcs r0, r5, r1 196; CHECK-NEXT: vmov r1, r4, d17 197; CHECK-NEXT: vmov r5, r6, d19 198; CHECK-NEXT: mov r0, #0 199; CHECK-NEXT: movlt r0, #1 200; CHECK-NEXT: cmp r0, #0 201; CHECK-NEXT: mvnne r0, #0 202; CHECK-NEXT: vdup.32 d0, r0 203; CHECK-NEXT: vbit q10, q12, q0 204; CHECK-NEXT: subs r1, r5, r1 205; CHECK-NEXT: sbcs r1, r6, r4 206; CHECK-NEXT: vmov r4, r5, d27 207; CHECK-NEXT: vmov r0, r1, d23 208; CHECK-NEXT: mov r6, #0 209; CHECK-NEXT: movlt r6, #1 210; CHECK-NEXT: subs r0, r4, r0 211; CHECK-NEXT: sbcs r0, r5, r1 212; CHECK-NEXT: movlt lr, #1 213; CHECK-NEXT: cmp lr, #0 214; CHECK-NEXT: mvnne lr, #0 215; CHECK-NEXT: cmp r6, #0 216; CHECK-NEXT: vdup.32 d31, lr 217; CHECK-NEXT: mvnne r6, #0 218; CHECK-NEXT: vdup.32 d30, r2 219; CHECK-NEXT: vdup.32 d3, r6 220; CHECK-NEXT: vbit q11, q13, q15 221; CHECK-NEXT: vdup.32 d2, r12 222; CHECK-NEXT: vst1.64 {d28, d29}, [r3:128]! 223; CHECK-NEXT: vbit q8, q9, q1 224; CHECK-NEXT: vst1.64 {d20, d21}, [r3:128]! 225; CHECK-NEXT: vst1.64 {d22, d23}, [r3:128]! 226; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128] 227; CHECK-NEXT: pop {r4, r5, r6, lr} 228; CHECK-NEXT: mov pc, lr 229 %v0 = load %T0_19, %T0_19* %loadaddr 230 %v1 = load %T0_19, %T0_19* %loadaddr2 231 %c = icmp slt %T0_19 %v0, %v1 232; COST: func_blend19 233; COST: cost of 0 {{.*}} icmp 234; COST: cost of 54 {{.*}} select 235 %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1 236 store %T0_19 %r, %T0_19* %storeaddr 237 ret void 238} 239%T0_20 = type <16 x i64> 240%T1_20 = type <16 x i1> 241define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2, 242 %T1_20* %blend, %T0_20* %storeaddr) { 243; CHECK-LABEL: func_blend20: 244; CHECK: @ %bb.0: 245; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} 246; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} 247; CHECK-NEXT: .vsave {d8, d9, d10, d11} 248; CHECK-NEXT: vpush {d8, d9, d10, d11} 249; CHECK-NEXT: mov r8, r1 250; CHECK-NEXT: mov lr, r0 251; CHECK-NEXT: vld1.64 {d16, d17}, [r8:128]! 252; CHECK-NEXT: add r9, r0, #64 253; CHECK-NEXT: add r10, r1, #64 254; CHECK-NEXT: mov r12, #0 255; CHECK-NEXT: vld1.64 {d22, d23}, [lr:128]! 256; CHECK-NEXT: vld1.64 {d18, d19}, [r8:128]! 257; CHECK-NEXT: vld1.64 {d20, d21}, [lr:128]! 258; CHECK-NEXT: vmov r6, r4, d19 259; CHECK-NEXT: vmov r5, r7, d21 260; CHECK-NEXT: vld1.64 {d4, d5}, [r9:128]! 261; CHECK-NEXT: vld1.64 {d6, d7}, [r10:128]! 262; CHECK-NEXT: vld1.64 {d0, d1}, [r10:128]! 263; CHECK-NEXT: vld1.64 {d2, d3}, [r9:128]! 264; CHECK-NEXT: subs r6, r5, r6 265; CHECK-NEXT: sbcs r4, r7, r4 266; CHECK-NEXT: vmov r5, r6, d18 267; CHECK-NEXT: vmov r7, r2, d20 268; CHECK-NEXT: mov r4, #0 269; CHECK-NEXT: movlt r4, #1 270; CHECK-NEXT: cmp r4, #0 271; CHECK-NEXT: mvnne r4, #0 272; CHECK-NEXT: vdup.32 d31, r4 273; CHECK-NEXT: subs r5, r7, r5 274; CHECK-NEXT: sbcs r2, r2, r6 275; CHECK-NEXT: vmov r4, r5, d3 276; CHECK-NEXT: mov r2, #0 277; CHECK-NEXT: movlt r2, #1 278; CHECK-NEXT: cmp r2, #0 279; CHECK-NEXT: mvnne r2, #0 280; CHECK-NEXT: vdup.32 d30, r2 281; CHECK-NEXT: vmov r0, r2, d1 282; CHECK-NEXT: subs r0, r4, r0 283; CHECK-NEXT: sbcs r0, r5, r2 284; CHECK-NEXT: vmov r4, r5, d2 285; CHECK-NEXT: mov r0, #0 286; CHECK-NEXT: movlt r0, #1 287; CHECK-NEXT: cmp r0, #0 288; CHECK-NEXT: mvnne r0, #0 289; CHECK-NEXT: vdup.32 d9, r0 290; CHECK-NEXT: vmov r0, r2, d0 291; CHECK-NEXT: subs r0, r4, r0 292; CHECK-NEXT: sbcs r0, r5, r2 293; CHECK-NEXT: vmov r4, r5, d5 294; CHECK-NEXT: mov r0, #0 295; CHECK-NEXT: movlt r0, #1 296; CHECK-NEXT: cmp r0, #0 297; CHECK-NEXT: mvnne r0, #0 298; CHECK-NEXT: vdup.32 d8, r0 299; CHECK-NEXT: vmov r0, r2, d7 300; CHECK-NEXT: subs r0, r4, r0 301; CHECK-NEXT: sbcs r0, r5, r2 302; CHECK-NEXT: vmov r4, r5, d4 303; CHECK-NEXT: mov r0, #0 304; CHECK-NEXT: movlt r0, #1 305; CHECK-NEXT: cmp r0, #0 306; CHECK-NEXT: mvnne r0, #0 307; CHECK-NEXT: vdup.32 d11, r0 308; CHECK-NEXT: vmov r0, r2, d6 309; CHECK-NEXT: subs r0, r4, r0 310; CHECK-NEXT: sbcs r0, r5, r2 311; CHECK-NEXT: vmov r4, r5, d23 312; CHECK-NEXT: mov r0, #0 313; CHECK-NEXT: movlt r0, #1 314; CHECK-NEXT: cmp r0, #0 315; CHECK-NEXT: mvnne r0, #0 316; CHECK-NEXT: vdup.32 d10, r0 317; CHECK-NEXT: vmov r0, r2, d17 318; CHECK-NEXT: subs r0, r4, r0 319; CHECK-NEXT: sbcs r0, r5, r2 320; CHECK-NEXT: vmov r4, r5, d22 321; CHECK-NEXT: mov r0, #0 322; CHECK-NEXT: movlt r0, #1 323; CHECK-NEXT: cmp r0, #0 324; CHECK-NEXT: mvnne r0, #0 325; CHECK-NEXT: vdup.32 d25, r0 326; CHECK-NEXT: vmov r0, r2, d16 327; CHECK-NEXT: subs r0, r4, r0 328; CHECK-NEXT: sbcs r0, r5, r2 329; CHECK-NEXT: mov r0, #0 330; CHECK-NEXT: movlt r0, #1 331; CHECK-NEXT: cmp r0, #0 332; CHECK-NEXT: mvnne r0, #0 333; CHECK-NEXT: vdup.32 d24, r0 334; CHECK-NEXT: vorr q13, q12, q12 335; CHECK-NEXT: vbsl q13, q11, q8 336; CHECK-NEXT: vld1.64 {d24, d25}, [r9:128]! 337; CHECK-NEXT: vorr q8, q5, q5 338; CHECK-NEXT: vld1.64 {d28, d29}, [r10:128]! 339; CHECK-NEXT: vbsl q8, q2, q3 340; CHECK-NEXT: vld1.64 {d6, d7}, [r8:128]! 341; CHECK-NEXT: vld1.64 {d22, d23}, [r8:128] 342; CHECK-NEXT: vld1.64 {d4, d5}, [lr:128]! 343; CHECK-NEXT: vbif q10, q9, q15 344; CHECK-NEXT: vorr q9, q4, q4 345; CHECK-NEXT: vmov r0, r2, d22 346; CHECK-NEXT: vbsl q9, q1, q0 347; CHECK-NEXT: vld1.64 {d30, d31}, [lr:128] 348; CHECK-NEXT: mov lr, #0 349; CHECK-NEXT: vmov r7, r5, d30 350; CHECK-NEXT: vld1.64 {d0, d1}, [r9:128] 351; CHECK-NEXT: vld1.64 {d2, d3}, [r10:128] 352; CHECK-NEXT: subs r0, r7, r0 353; CHECK-NEXT: sbcs r0, r5, r2 354; CHECK-NEXT: vmov r5, r4, d24 355; CHECK-NEXT: vmov r0, r7, d28 356; CHECK-NEXT: movlt lr, #1 357; CHECK-NEXT: cmp lr, #0 358; CHECK-NEXT: mvnne lr, #0 359; CHECK-NEXT: subs r0, r5, r0 360; CHECK-NEXT: sbcs r0, r4, r7 361; CHECK-NEXT: vmov r7, r5, d29 362; CHECK-NEXT: vmov r4, r6, d25 363; CHECK-NEXT: mov r0, #0 364; CHECK-NEXT: movlt r0, #1 365; CHECK-NEXT: cmp r0, #0 366; CHECK-NEXT: mvnne r0, #0 367; CHECK-NEXT: subs r7, r4, r7 368; CHECK-NEXT: mov r4, #0 369; CHECK-NEXT: sbcs r7, r6, r5 370; CHECK-NEXT: vmov r5, r1, d31 371; CHECK-NEXT: vmov r7, r6, d23 372; CHECK-NEXT: movlt r4, #1 373; CHECK-NEXT: cmp r4, #0 374; CHECK-NEXT: mvnne r4, #0 375; CHECK-NEXT: subs r7, r5, r7 376; CHECK-NEXT: mov r5, #0 377; CHECK-NEXT: sbcs r1, r1, r6 378; CHECK-NEXT: vmov r6, r2, d5 379; CHECK-NEXT: vmov r1, r7, d7 380; CHECK-NEXT: movlt r5, #1 381; CHECK-NEXT: cmp r5, #0 382; CHECK-NEXT: mvnne r5, #0 383; CHECK-NEXT: subs r1, r6, r1 384; CHECK-NEXT: sbcs r1, r2, r7 385; CHECK-NEXT: vmov r6, r7, d4 386; CHECK-NEXT: mov r1, #0 387; CHECK-NEXT: movlt r1, #1 388; CHECK-NEXT: cmp r1, #0 389; CHECK-NEXT: mvnne r1, #0 390; CHECK-NEXT: vdup.32 d9, r1 391; CHECK-NEXT: vmov r1, r2, d6 392; CHECK-NEXT: subs r1, r6, r1 393; CHECK-NEXT: sbcs r1, r7, r2 394; CHECK-NEXT: vmov r6, r7, d0 395; CHECK-NEXT: mov r1, #0 396; CHECK-NEXT: movlt r1, #1 397; CHECK-NEXT: cmp r1, #0 398; CHECK-NEXT: mvnne r1, #0 399; CHECK-NEXT: vdup.32 d8, r1 400; CHECK-NEXT: vmov r1, r2, d2 401; CHECK-NEXT: vbif q2, q3, q4 402; CHECK-NEXT: vdup.32 d7, r5 403; CHECK-NEXT: vdup.32 d9, r4 404; CHECK-NEXT: vmov r4, r5, d1 405; CHECK-NEXT: vdup.32 d8, r0 406; CHECK-NEXT: mov r0, r3 407; CHECK-NEXT: vst1.64 {d26, d27}, [r0:128]! 408; CHECK-NEXT: vbif q12, q14, q4 409; CHECK-NEXT: vdup.32 d6, lr 410; CHECK-NEXT: vbit q11, q15, q3 411; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128]! 412; CHECK-NEXT: subs r1, r6, r1 413; CHECK-NEXT: mov r6, #0 414; CHECK-NEXT: sbcs r1, r7, r2 415; CHECK-NEXT: vmov r1, r2, d3 416; CHECK-NEXT: movlt r6, #1 417; CHECK-NEXT: subs r1, r4, r1 418; CHECK-NEXT: sbcs r1, r5, r2 419; CHECK-NEXT: movlt r12, #1 420; CHECK-NEXT: cmp r12, #0 421; CHECK-NEXT: mvnne r12, #0 422; CHECK-NEXT: cmp r6, #0 423; CHECK-NEXT: vdup.32 d27, r12 424; CHECK-NEXT: mvnne r6, #0 425; CHECK-NEXT: vdup.32 d26, r6 426; CHECK-NEXT: vorr q10, q13, q13 427; CHECK-NEXT: vbsl q10, q0, q1 428; CHECK-NEXT: vst1.64 {d4, d5}, [r0:128]! 429; CHECK-NEXT: vst1.64 {d22, d23}, [r0:128] 430; CHECK-NEXT: add r0, r3, #64 431; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]! 432; CHECK-NEXT: vst1.64 {d18, d19}, [r0:128]! 433; CHECK-NEXT: vst1.64 {d24, d25}, [r0:128]! 434; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128] 435; CHECK-NEXT: vpop {d8, d9, d10, d11} 436; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, lr} 437; CHECK-NEXT: mov pc, lr 438 %v0 = load %T0_20, %T0_20* %loadaddr 439 %v1 = load %T0_20, %T0_20* %loadaddr2 440 %c = icmp slt %T0_20 %v0, %v1 441; COST: func_blend20 442; COST: cost of 0 {{.*}} icmp 443; COST: cost of 108 {{.*}} select 444 %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1 445 store %T0_20 %r, %T0_20* %storeaddr 446 ret void 447} 448