1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 3; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE 4; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 5; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE 6; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 7; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE 8; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 9; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE 10 11; Function Attrs: norecurse nounwind readonly 12define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) { 13; P9LE-LABEL: s2v_test1: 14; P9LE: # %bb.0: # %entry 15; P9LE-NEXT: lwz r3, 0(r3) 16; P9LE-NEXT: mtfprwz f0, r3 17; P9LE-NEXT: xxinsertw v2, vs0, 12 18; P9LE-NEXT: blr 19; 20; P9BE-LABEL: s2v_test1: 21; P9BE: # %bb.0: # %entry 22; P9BE-NEXT: lwz r3, 0(r3) 23; P9BE-NEXT: mtfprwz f0, r3 24; P9BE-NEXT: xxinsertw v2, vs0, 0 25; P9BE-NEXT: blr 26; 27; P8LE-LABEL: s2v_test1: 28; P8LE: # %bb.0: # %entry 29; P8LE-NEXT: addis r4, r2, .LCPI0_0@toc@ha 30; P8LE-NEXT: lxsiwzx v4, 0, r3 31; P8LE-NEXT: addi r4, r4, .LCPI0_0@toc@l 32; P8LE-NEXT: lvx v3, 0, r4 33; P8LE-NEXT: vperm v2, v2, v4, v3 34; P8LE-NEXT: blr 35; 36; P8BE-LABEL: s2v_test1: 37; P8BE: # %bb.0: # %entry 38; P8BE-NEXT: lxsiwzx v3, 0, r3 39; P8BE-NEXT: vmrghw v4, v2, v3 40; P8BE-NEXT: xxsldwi vs0, v2, v3, 1 41; P8BE-NEXT: xxsldwi v2, v4, vs0, 3 42; P8BE-NEXT: blr 43entry: 44 %0 = load i32, i32* %int32, align 4 45 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 46 ret <4 x i32> %vecins 47} 48 49; Function Attrs: norecurse nounwind readonly 50define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec) { 51; P9LE-LABEL: s2v_test2: 52; P9LE: # %bb.0: # %entry 53; P9LE-NEXT: lwz r3, 4(r3) 54; P9LE-NEXT: mtfprwz f0, r3 55; P9LE-NEXT: xxinsertw v2, vs0, 12 56; P9LE-NEXT: blr 57; 58; P9BE-LABEL: s2v_test2: 59; P9BE: # %bb.0: # %entry 60; P9BE-NEXT: lwz r3, 4(r3) 61; P9BE-NEXT: mtfprwz f0, r3 62; P9BE-NEXT: xxinsertw v2, vs0, 0 63; P9BE-NEXT: blr 64; 65; P8LE-LABEL: s2v_test2: 66; P8LE: # %bb.0: # %entry 67; P8LE-NEXT: addis r4, r2, .LCPI1_0@toc@ha 68; P8LE-NEXT: addi r3, r3, 4 69; P8LE-NEXT: addi r4, r4, .LCPI1_0@toc@l 70; P8LE-NEXT: lxsiwzx v4, 0, r3 71; P8LE-NEXT: lvx v3, 0, r4 72; P8LE-NEXT: vperm v2, v2, v4, v3 73; P8LE-NEXT: blr 74; 75; P8BE-LABEL: s2v_test2: 76; P8BE: # %bb.0: # %entry 77; P8BE-NEXT: addi r3, r3, 4 78; P8BE-NEXT: lxsiwzx v3, 0, r3 79; P8BE-NEXT: vmrghw v4, v2, v3 80; P8BE-NEXT: xxsldwi vs0, v2, v3, 1 81; P8BE-NEXT: xxsldwi v2, v4, vs0, 3 82; P8BE-NEXT: blr 83entry: 84 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 85 %0 = load i32, i32* %arrayidx, align 4 86 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 87 ret <4 x i32> %vecins 88} 89 90; Function Attrs: norecurse nounwind readonly 91define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 signext %Idx) { 92; P9LE-LABEL: s2v_test3: 93; P9LE: # %bb.0: # %entry 94; P9LE-NEXT: sldi r4, r7, 2 95; P9LE-NEXT: lwzx r3, r3, r4 96; P9LE-NEXT: mtfprwz f0, r3 97; P9LE-NEXT: xxinsertw v2, vs0, 12 98; P9LE-NEXT: blr 99; 100; P9BE-LABEL: s2v_test3: 101; P9BE: # %bb.0: # %entry 102; P9BE-NEXT: sldi r4, r7, 2 103; P9BE-NEXT: lwzx r3, r3, r4 104; P9BE-NEXT: mtfprwz f0, r3 105; P9BE-NEXT: xxinsertw v2, vs0, 0 106; P9BE-NEXT: blr 107; 108; P8LE-LABEL: s2v_test3: 109; P8LE: # %bb.0: # %entry 110; P8LE-NEXT: addis r4, r2, .LCPI2_0@toc@ha 111; P8LE-NEXT: sldi r5, r7, 2 112; P8LE-NEXT: addi r4, r4, .LCPI2_0@toc@l 113; P8LE-NEXT: lxsiwzx v3, r3, r5 114; P8LE-NEXT: lvx v4, 0, r4 115; P8LE-NEXT: vperm v2, v2, v3, v4 116; P8LE-NEXT: blr 117; 118; P8BE-LABEL: s2v_test3: 119; P8BE: # %bb.0: # %entry 120; P8BE-NEXT: sldi r4, r7, 2 121; P8BE-NEXT: lxsiwzx v3, r3, r4 122; P8BE-NEXT: vmrghw v4, v2, v3 123; P8BE-NEXT: xxsldwi vs0, v2, v3, 1 124; P8BE-NEXT: xxsldwi v2, v4, vs0, 3 125; P8BE-NEXT: blr 126entry: 127 %idxprom = sext i32 %Idx to i64 128 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom 129 %0 = load i32, i32* %arrayidx, align 4 130 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 131 ret <4 x i32> %vecins 132} 133 134; Function Attrs: norecurse nounwind readonly 135define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec) { 136; P9LE-LABEL: s2v_test4: 137; P9LE: # %bb.0: # %entry 138; P9LE-NEXT: lwz r3, 4(r3) 139; P9LE-NEXT: mtfprwz f0, r3 140; P9LE-NEXT: xxinsertw v2, vs0, 12 141; P9LE-NEXT: blr 142; 143; P9BE-LABEL: s2v_test4: 144; P9BE: # %bb.0: # %entry 145; P9BE-NEXT: lwz r3, 4(r3) 146; P9BE-NEXT: mtfprwz f0, r3 147; P9BE-NEXT: xxinsertw v2, vs0, 0 148; P9BE-NEXT: blr 149; 150; P8LE-LABEL: s2v_test4: 151; P8LE: # %bb.0: # %entry 152; P8LE-NEXT: addis r4, r2, .LCPI3_0@toc@ha 153; P8LE-NEXT: addi r3, r3, 4 154; P8LE-NEXT: addi r4, r4, .LCPI3_0@toc@l 155; P8LE-NEXT: lxsiwzx v4, 0, r3 156; P8LE-NEXT: lvx v3, 0, r4 157; P8LE-NEXT: vperm v2, v2, v4, v3 158; P8LE-NEXT: blr 159; 160; P8BE-LABEL: s2v_test4: 161; P8BE: # %bb.0: # %entry 162; P8BE-NEXT: addi r3, r3, 4 163; P8BE-NEXT: lxsiwzx v3, 0, r3 164; P8BE-NEXT: vmrghw v4, v2, v3 165; P8BE-NEXT: xxsldwi vs0, v2, v3, 1 166; P8BE-NEXT: xxsldwi v2, v4, vs0, 3 167; P8BE-NEXT: blr 168entry: 169 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 170 %0 = load i32, i32* %arrayidx, align 4 171 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 172 ret <4 x i32> %vecins 173} 174 175; Function Attrs: norecurse nounwind readonly 176define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1) { 177; P9LE-LABEL: s2v_test5: 178; P9LE: # %bb.0: # %entry 179; P9LE-NEXT: lwz r3, 0(r5) 180; P9LE-NEXT: mtfprwz f0, r3 181; P9LE-NEXT: xxinsertw v2, vs0, 12 182; P9LE-NEXT: blr 183; 184; P9BE-LABEL: s2v_test5: 185; P9BE: # %bb.0: # %entry 186; P9BE-NEXT: lwz r3, 0(r5) 187; P9BE-NEXT: mtfprwz f0, r3 188; P9BE-NEXT: xxinsertw v2, vs0, 0 189; P9BE-NEXT: blr 190; 191; P8LE-LABEL: s2v_test5: 192; P8LE: # %bb.0: # %entry 193; P8LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha 194; P8LE-NEXT: lxsiwzx v4, 0, r5 195; P8LE-NEXT: addi r3, r3, .LCPI4_0@toc@l 196; P8LE-NEXT: lvx v3, 0, r3 197; P8LE-NEXT: vperm v2, v2, v4, v3 198; P8LE-NEXT: blr 199; 200; P8BE-LABEL: s2v_test5: 201; P8BE: # %bb.0: # %entry 202; P8BE-NEXT: lxsiwzx v3, 0, r5 203; P8BE-NEXT: vmrghw v4, v2, v3 204; P8BE-NEXT: xxsldwi vs0, v2, v3, 1 205; P8BE-NEXT: xxsldwi v2, v4, vs0, 3 206; P8BE-NEXT: blr 207entry: 208 %0 = load i32, i32* %ptr1, align 4 209 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 210 ret <4 x i32> %vecins 211} 212 213; Function Attrs: norecurse nounwind readonly 214define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec) { 215; P9LE-LABEL: s2v_test_f1: 216; P9LE: # %bb.0: # %entry 217; P9LE-NEXT: lfs f0, 0(r3) 218; P9LE-NEXT: xscvdpspn vs0, f0 219; P9LE-NEXT: xxinsertw v2, vs0, 12 220; P9LE-NEXT: blr 221; 222; P9BE-LABEL: s2v_test_f1: 223; P9BE: # %bb.0: # %entry 224; P9BE-NEXT: lfs f0, 0(r3) 225; P9BE-NEXT: xscvdpspn vs0, f0 226; P9BE-NEXT: xxinsertw v2, vs0, 0 227; P9BE-NEXT: blr 228; 229; P8LE-LABEL: s2v_test_f1: 230; P8LE: # %bb.0: # %entry 231; P8LE-NEXT: addis r4, r2, .LCPI5_0@toc@ha 232; P8LE-NEXT: lxsiwzx v4, 0, r3 233; P8LE-NEXT: addi r4, r4, .LCPI5_0@toc@l 234; P8LE-NEXT: lvx v3, 0, r4 235; P8LE-NEXT: vperm v2, v2, v4, v3 236; P8LE-NEXT: blr 237; 238; P8BE-LABEL: s2v_test_f1: 239; P8BE: # %bb.0: # %entry 240; P8BE-NEXT: lxsiwzx v3, 0, r3 241; P8BE-NEXT: vmrghw v4, v2, v3 242; P8BE-NEXT: xxsldwi vs0, v2, v3, 1 243; P8BE-NEXT: xxsldwi v2, v4, vs0, 3 244; P8BE-NEXT: blr 245entry: 246 %0 = load float, float* %f64, align 4 247 %vecins = insertelement <4 x float> %vec, float %0, i32 0 248 ret <4 x float> %vecins 249} 250 251; Function Attrs: norecurse nounwind readonly 252define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec) { 253; P9LE-LABEL: s2v_test_f2: 254; P9LE: # %bb.0: # %entry 255; P9LE-NEXT: addi r3, r3, 4 256; P9LE-NEXT: vmrglw v2, v2, v2 257; P9LE-NEXT: lxsiwzx v3, 0, r3 258; P9LE-NEXT: vmrghw v2, v2, v3 259; P9LE-NEXT: blr 260; 261; P9BE-LABEL: s2v_test_f2: 262; P9BE: # %bb.0: # %entry 263; P9BE-NEXT: addi r3, r3, 4 264; P9BE-NEXT: lxsiwzx v3, 0, r3 265; P9BE-NEXT: vmrgow v2, v3, v2 266; P9BE-NEXT: blr 267; 268; P8LE-LABEL: s2v_test_f2: 269; P8LE: # %bb.0: # %entry 270; P8LE-NEXT: vmrglw v2, v2, v2 271; P8LE-NEXT: addi r3, r3, 4 272; P8LE-NEXT: lxsiwzx v3, 0, r3 273; P8LE-NEXT: vmrghw v2, v2, v3 274; P8LE-NEXT: blr 275; 276; P8BE-LABEL: s2v_test_f2: 277; P8BE: # %bb.0: # %entry 278; P8BE-NEXT: addi r3, r3, 4 279; P8BE-NEXT: lxsiwzx v3, 0, r3 280; P8BE-NEXT: vmrgow v2, v3, v2 281; P8BE-NEXT: blr 282entry: 283 %arrayidx = getelementptr inbounds float, float* %f64, i64 1 284 %0 = load float, float* %arrayidx, align 8 285 %vecins = insertelement <2 x float> %vec, float %0, i32 0 286 ret <2 x float> %vecins 287} 288 289; Function Attrs: norecurse nounwind readonly 290define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec, i32 signext %Idx) { 291; P9LE-LABEL: s2v_test_f3: 292; P9LE: # %bb.0: # %entry 293; P9LE-NEXT: sldi r4, r7, 2 294; P9LE-NEXT: vmrglw v2, v2, v2 295; P9LE-NEXT: lxsiwzx v3, r3, r4 296; P9LE-NEXT: vmrghw v2, v2, v3 297; P9LE-NEXT: blr 298; 299; P9BE-LABEL: s2v_test_f3: 300; P9BE: # %bb.0: # %entry 301; P9BE-NEXT: sldi r4, r7, 2 302; P9BE-NEXT: lxsiwzx v3, r3, r4 303; P9BE-NEXT: vmrgow v2, v3, v2 304; P9BE-NEXT: blr 305; 306; P8LE-LABEL: s2v_test_f3: 307; P8LE: # %bb.0: # %entry 308; P8LE-NEXT: vmrglw v2, v2, v2 309; P8LE-NEXT: sldi r4, r7, 2 310; P8LE-NEXT: lxsiwzx v3, r3, r4 311; P8LE-NEXT: vmrghw v2, v2, v3 312; P8LE-NEXT: blr 313; 314; P8BE-LABEL: s2v_test_f3: 315; P8BE: # %bb.0: # %entry 316; P8BE-NEXT: sldi r4, r7, 2 317; P8BE-NEXT: lxsiwzx v3, r3, r4 318; P8BE-NEXT: vmrgow v2, v3, v2 319; P8BE-NEXT: blr 320entry: 321 %idxprom = sext i32 %Idx to i64 322 %arrayidx = getelementptr inbounds float, float* %f64, i64 %idxprom 323 %0 = load float, float* %arrayidx, align 8 324 %vecins = insertelement <2 x float> %vec, float %0, i32 0 325 ret <2 x float> %vecins 326} 327 328; Function Attrs: norecurse nounwind readonly 329define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec) { 330; P9LE-LABEL: s2v_test_f4: 331; P9LE: # %bb.0: # %entry 332; P9LE-NEXT: addi r3, r3, 4 333; P9LE-NEXT: vmrglw v2, v2, v2 334; P9LE-NEXT: lxsiwzx v3, 0, r3 335; P9LE-NEXT: vmrghw v2, v2, v3 336; P9LE-NEXT: blr 337; 338; P9BE-LABEL: s2v_test_f4: 339; P9BE: # %bb.0: # %entry 340; P9BE-NEXT: addi r3, r3, 4 341; P9BE-NEXT: lxsiwzx v3, 0, r3 342; P9BE-NEXT: vmrgow v2, v3, v2 343; P9BE-NEXT: blr 344; 345; P8LE-LABEL: s2v_test_f4: 346; P8LE: # %bb.0: # %entry 347; P8LE-NEXT: vmrglw v2, v2, v2 348; P8LE-NEXT: addi r3, r3, 4 349; P8LE-NEXT: lxsiwzx v3, 0, r3 350; P8LE-NEXT: vmrghw v2, v2, v3 351; P8LE-NEXT: blr 352; 353; P8BE-LABEL: s2v_test_f4: 354; P8BE: # %bb.0: # %entry 355; P8BE-NEXT: addi r3, r3, 4 356; P8BE-NEXT: lxsiwzx v3, 0, r3 357; P8BE-NEXT: vmrgow v2, v3, v2 358; P8BE-NEXT: blr 359entry: 360 %arrayidx = getelementptr inbounds float, float* %f64, i64 1 361 %0 = load float, float* %arrayidx, align 8 362 %vecins = insertelement <2 x float> %vec, float %0, i32 0 363 ret <2 x float> %vecins 364} 365 366; Function Attrs: norecurse nounwind readonly 367define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1) { 368; P9LE-LABEL: s2v_test_f5: 369; P9LE: # %bb.0: # %entry 370; P9LE-NEXT: lxsiwzx v3, 0, r5 371; P9LE-NEXT: vmrglw v2, v2, v2 372; P9LE-NEXT: vmrghw v2, v2, v3 373; P9LE-NEXT: blr 374; 375; P9BE-LABEL: s2v_test_f5: 376; P9BE: # %bb.0: # %entry 377; P9BE-NEXT: lxsiwzx v3, 0, r5 378; P9BE-NEXT: vmrgow v2, v3, v2 379; P9BE-NEXT: blr 380; 381; P8LE-LABEL: s2v_test_f5: 382; P8LE: # %bb.0: # %entry 383; P8LE-NEXT: vmrglw v2, v2, v2 384; P8LE-NEXT: lxsiwzx v3, 0, r5 385; P8LE-NEXT: vmrghw v2, v2, v3 386; P8LE-NEXT: blr 387; 388; P8BE-LABEL: s2v_test_f5: 389; P8BE: # %bb.0: # %entry 390; P8BE-NEXT: lxsiwzx v3, 0, r5 391; P8BE-NEXT: vmrgow v2, v3, v2 392; P8BE-NEXT: blr 393entry: 394 %0 = load float, float* %ptr1, align 8 395 %vecins = insertelement <2 x float> %vec, float %0, i32 0 396 ret <2 x float> %vecins 397} 398 399