1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 5 6define float @dyn_extract_v8f32_const_s_v(i32 %sel) { 7; GCN-LABEL: dyn_extract_v8f32_const_s_v: 8; GCN: ; %bb.0: ; %entry 9; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 11; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000 12; GCN-NEXT: v_cndmask_b32_e64 v6, 1.0, 2.0, vcc 13; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 14; GCN-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc 15; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 16; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000 17; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc 18; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 19; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000 20; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 21; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 22; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000 23; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 24; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 25; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000 26; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 27; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 28; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc 29; GCN-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX10-LABEL: dyn_extract_v8f32_const_s_v: 32; GFX10: ; %bb.0: ; %entry 33; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 35; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 36; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo 37; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 38; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo 39; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 40; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo 41; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 42; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo 43; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 44; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo 45; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 46; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo 47; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 48; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, 0x41000000, vcc_lo 49; GFX10-NEXT: s_setpc_b64 s[30:31] 50entry: 51 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel 52 ret float %ext 53} 54 55define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) { 56; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s: 57; GPRIDX: ; %bb.0: ; %entry 58; GPRIDX-NEXT: s_cmp_eq_u32 s2, 1 59; GPRIDX-NEXT: s_cselect_b32 s0, 2.0, 1.0 60; GPRIDX-NEXT: s_cmp_eq_u32 s2, 2 61; GPRIDX-NEXT: s_cselect_b32 s0, 0x40400000, s0 62; GPRIDX-NEXT: s_cmp_eq_u32 s2, 3 63; GPRIDX-NEXT: s_cselect_b32 s0, 4.0, s0 64; GPRIDX-NEXT: s_cmp_eq_u32 s2, 4 65; GPRIDX-NEXT: s_cselect_b32 s0, 0x40a00000, s0 66; GPRIDX-NEXT: s_cmp_eq_u32 s2, 5 67; GPRIDX-NEXT: s_cselect_b32 s0, 0x40c00000, s0 68; GPRIDX-NEXT: s_cmp_eq_u32 s2, 6 69; GPRIDX-NEXT: s_cselect_b32 s0, 0x40e00000, s0 70; GPRIDX-NEXT: s_cmp_eq_u32 s2, 7 71; GPRIDX-NEXT: s_cselect_b32 s0, 0x41000000, s0 72; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 73; GPRIDX-NEXT: ; return to shader part epilog 74; 75; MOVREL-LABEL: dyn_extract_v8f32_const_s_s: 76; MOVREL: ; %bb.0: ; %entry 77; MOVREL-NEXT: s_mov_b32 s4, 1.0 78; MOVREL-NEXT: s_mov_b32 m0, s2 79; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 80; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 81; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 82; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 83; MOVREL-NEXT: s_mov_b32 s7, 4.0 84; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 85; MOVREL-NEXT: s_mov_b32 s5, 2.0 86; MOVREL-NEXT: s_movrels_b32 s0, s4 87; MOVREL-NEXT: v_mov_b32_e32 v0, s0 88; MOVREL-NEXT: ; return to shader part epilog 89; 90; GFX10-LABEL: dyn_extract_v8f32_const_s_s: 91; GFX10: ; %bb.0: ; %entry 92; GFX10-NEXT: s_mov_b32 s4, 1.0 93; GFX10-NEXT: s_mov_b32 m0, s2 94; GFX10-NEXT: s_mov_b32 s11, 0x41000000 95; GFX10-NEXT: s_mov_b32 s10, 0x40e00000 96; GFX10-NEXT: s_mov_b32 s9, 0x40c00000 97; GFX10-NEXT: s_mov_b32 s8, 0x40a00000 98; GFX10-NEXT: s_mov_b32 s7, 4.0 99; GFX10-NEXT: s_mov_b32 s6, 0x40400000 100; GFX10-NEXT: s_mov_b32 s5, 2.0 101; GFX10-NEXT: s_movrels_b32 s0, s4 102; GFX10-NEXT: v_mov_b32_e32 v0, s0 103; GFX10-NEXT: ; return to shader part epilog 104entry: 105 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel 106 ret float %ext 107} 108 109define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) { 110; GCN-LABEL: dyn_extract_v8f32_s_v: 111; GCN: ; %bb.0: ; %entry 112; GCN-NEXT: s_mov_b32 s0, s2 113; GCN-NEXT: s_mov_b32 s1, s3 114; GCN-NEXT: s_mov_b32 s2, s4 115; GCN-NEXT: v_mov_b32_e32 v1, s0 116; GCN-NEXT: v_mov_b32_e32 v2, s1 117; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 118; GCN-NEXT: s_mov_b32 s3, s5 119; GCN-NEXT: v_mov_b32_e32 v3, s2 120; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 121; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 122; GCN-NEXT: v_mov_b32_e32 v4, s3 123; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 124; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 125; GCN-NEXT: v_mov_b32_e32 v5, s6 126; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 127; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 128; GCN-NEXT: v_mov_b32_e32 v6, s7 129; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 130; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 131; GCN-NEXT: v_mov_b32_e32 v7, s8 132; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 133; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 134; GCN-NEXT: v_mov_b32_e32 v8, s9 135; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 136; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 137; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v8, vcc 138; GCN-NEXT: ; return to shader part epilog 139; 140; GFX10-LABEL: dyn_extract_v8f32_s_v: 141; GFX10: ; %bb.0: ; %entry 142; GFX10-NEXT: s_mov_b32 s1, s3 143; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 144; GFX10-NEXT: v_mov_b32_e32 v1, s1 145; GFX10-NEXT: s_mov_b32 s0, s2 146; GFX10-NEXT: s_mov_b32 s2, s4 147; GFX10-NEXT: s_mov_b32 s3, s5 148; GFX10-NEXT: s_mov_b32 s4, s6 149; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 150; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 151; GFX10-NEXT: s_mov_b32 s5, s7 152; GFX10-NEXT: s_mov_b32 s6, s8 153; GFX10-NEXT: s_mov_b32 s7, s9 154; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo 155; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 156; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo 157; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 158; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 159; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 160; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo 161; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 162; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 163; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 164; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s7, vcc_lo 165; GFX10-NEXT: ; return to shader part epilog 166entry: 167 %ext = extractelement <8 x float> %vec, i32 %sel 168 ret float %ext 169} 170 171define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) { 172; GCN-LABEL: dyn_extract_v8f32_v_v: 173; GCN: ; %bb.0: ; %entry 174; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 175; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 176; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 177; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 178; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 179; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 180; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 181; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 182; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 183; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 184; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 185; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 186; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 187; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 188; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 189; GCN-NEXT: s_setpc_b64 s[30:31] 190; 191; GFX10-LABEL: dyn_extract_v8f32_v_v: 192; GFX10: ; %bb.0: ; %entry 193; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 194; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 195; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 196; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 197; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 198; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 199; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 200; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 201; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 202; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 203; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 204; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 205; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 206; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 207; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 208; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 209; GFX10-NEXT: s_setpc_b64 s[30:31] 210entry: 211 %ext = extractelement <8 x float> %vec, i32 %sel 212 ret float %ext 213} 214 215define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) { 216; GPRIDX-LABEL: dyn_extract_v8f32_v_s: 217; GPRIDX: ; %bb.0: ; %entry 218; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 219; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 220; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 221; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 222; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 223; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 224; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 225; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 226; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 227; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 228; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 229; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 230; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7 231; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 232; GPRIDX-NEXT: ; return to shader part epilog 233; 234; MOVREL-LABEL: dyn_extract_v8f32_v_s: 235; MOVREL: ; %bb.0: ; %entry 236; MOVREL-NEXT: s_mov_b32 m0, s2 237; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 238; MOVREL-NEXT: ; return to shader part epilog 239; 240; GFX10-LABEL: dyn_extract_v8f32_v_s: 241; GFX10: ; %bb.0: ; %entry 242; GFX10-NEXT: s_mov_b32 m0, s2 243; GFX10-NEXT: v_movrels_b32_e32 v0, v0 244; GFX10-NEXT: ; return to shader part epilog 245entry: 246 %ext = extractelement <8 x float> %vec, i32 %sel 247 ret float %ext 248} 249 250define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) { 251; GPRIDX-LABEL: dyn_extract_v8f32_s_s: 252; GPRIDX: ; %bb.0: ; %entry 253; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 254; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 255; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 256; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 257; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 258; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 259; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 260; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 261; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 262; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 263; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 264; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 265; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 266; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 267; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 268; GPRIDX-NEXT: ; return to shader part epilog 269; 270; MOVREL-LABEL: dyn_extract_v8f32_s_s: 271; MOVREL: ; %bb.0: ; %entry 272; MOVREL-NEXT: s_mov_b32 s0, s2 273; MOVREL-NEXT: s_mov_b32 m0, s10 274; MOVREL-NEXT: s_mov_b32 s1, s3 275; MOVREL-NEXT: s_mov_b32 s2, s4 276; MOVREL-NEXT: s_mov_b32 s3, s5 277; MOVREL-NEXT: s_mov_b32 s4, s6 278; MOVREL-NEXT: s_mov_b32 s5, s7 279; MOVREL-NEXT: s_mov_b32 s6, s8 280; MOVREL-NEXT: s_mov_b32 s7, s9 281; MOVREL-NEXT: s_movrels_b32 s0, s0 282; MOVREL-NEXT: v_mov_b32_e32 v0, s0 283; MOVREL-NEXT: ; return to shader part epilog 284; 285; GFX10-LABEL: dyn_extract_v8f32_s_s: 286; GFX10: ; %bb.0: ; %entry 287; GFX10-NEXT: s_mov_b32 s0, s2 288; GFX10-NEXT: s_mov_b32 m0, s10 289; GFX10-NEXT: s_mov_b32 s1, s3 290; GFX10-NEXT: s_mov_b32 s2, s4 291; GFX10-NEXT: s_mov_b32 s3, s5 292; GFX10-NEXT: s_mov_b32 s4, s6 293; GFX10-NEXT: s_mov_b32 s5, s7 294; GFX10-NEXT: s_mov_b32 s6, s8 295; GFX10-NEXT: s_mov_b32 s7, s9 296; GFX10-NEXT: s_movrels_b32 s0, s0 297; GFX10-NEXT: v_mov_b32_e32 v0, s0 298; GFX10-NEXT: ; return to shader part epilog 299entry: 300 %ext = extractelement <8 x float> %vec, i32 %sel 301 ret float %ext 302} 303 304define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { 305; GCN-LABEL: dyn_extract_v8i64_const_s_v: 306; GCN: ; %bb.0: ; %entry 307; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 308; GCN-NEXT: s_mov_b64 s[4:5], 1 309; GCN-NEXT: s_mov_b64 s[6:7], 2 310; GCN-NEXT: v_mov_b32_e32 v1, s4 311; GCN-NEXT: v_mov_b32_e32 v2, s5 312; GCN-NEXT: v_mov_b32_e32 v3, s6 313; GCN-NEXT: v_mov_b32_e32 v4, s7 314; GCN-NEXT: s_mov_b64 s[8:9], 3 315; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 316; GCN-NEXT: v_mov_b32_e32 v5, s8 317; GCN-NEXT: v_mov_b32_e32 v6, s9 318; GCN-NEXT: s_mov_b64 s[10:11], 4 319; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 320; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 321; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 322; GCN-NEXT: v_mov_b32_e32 v7, s10 323; GCN-NEXT: v_mov_b32_e32 v8, s11 324; GCN-NEXT: s_mov_b64 s[12:13], 5 325; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 326; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 327; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 328; GCN-NEXT: s_mov_b64 s[14:15], 6 329; GCN-NEXT: v_mov_b32_e32 v9, s12 330; GCN-NEXT: v_mov_b32_e32 v10, s13 331; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 332; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 333; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 334; GCN-NEXT: s_mov_b64 s[16:17], 7 335; GCN-NEXT: v_mov_b32_e32 v11, s14 336; GCN-NEXT: v_mov_b32_e32 v12, s15 337; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 338; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 339; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 340; GCN-NEXT: s_mov_b64 s[18:19], 8 341; GCN-NEXT: v_mov_b32_e32 v13, s16 342; GCN-NEXT: v_mov_b32_e32 v14, s17 343; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 344; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 345; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 346; GCN-NEXT: v_mov_b32_e32 v15, s18 347; GCN-NEXT: v_mov_b32_e32 v16, s19 348; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 349; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc 350; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 351; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 352; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc 353; GCN-NEXT: s_setpc_b64 s[30:31] 354; 355; GFX10-LABEL: dyn_extract_v8i64_const_s_v: 356; GFX10: ; %bb.0: ; %entry 357; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 358; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 359; GFX10-NEXT: s_mov_b64 s[6:7], 2 360; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 361; GFX10-NEXT: v_mov_b32_e32 v1, s6 362; GFX10-NEXT: v_mov_b32_e32 v2, s7 363; GFX10-NEXT: s_mov_b64 s[4:5], 1 364; GFX10-NEXT: s_mov_b64 s[8:9], 3 365; GFX10-NEXT: s_mov_b64 s[10:11], 4 366; GFX10-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo 367; GFX10-NEXT: v_cndmask_b32_e32 v2, s5, v2, vcc_lo 368; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 369; GFX10-NEXT: s_mov_b64 s[12:13], 5 370; GFX10-NEXT: s_mov_b64 s[14:15], 6 371; GFX10-NEXT: s_mov_b64 s[16:17], 7 372; GFX10-NEXT: s_mov_b64 s[18:19], 8 373; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 374; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 375; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 376; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 377; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 378; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 379; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 380; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo 381; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 382; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo 383; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo 384; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 385; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s16, vcc_lo 386; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s17, vcc_lo 387; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 388; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s18, vcc_lo 389; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s19, vcc_lo 390; GFX10-NEXT: s_setpc_b64 s[30:31] 391entry: 392 %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel 393 ret i64 %ext 394} 395 396define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) { 397; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s: 398; GPRIDX: ; %bb.0: ; %entry 399; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 400; GPRIDX-NEXT: s_mov_b32 m0, s2 401; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 402; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 403; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 404; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 405; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 406; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 407; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 408; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5] 409; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 410; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 411; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 412; GPRIDX-NEXT: s_endpgm 413; 414; MOVREL-LABEL: dyn_extract_v8i64_const_s_s: 415; MOVREL: ; %bb.0: ; %entry 416; MOVREL-NEXT: s_mov_b64 s[4:5], 1 417; MOVREL-NEXT: s_mov_b32 m0, s2 418; MOVREL-NEXT: s_mov_b64 s[18:19], 8 419; MOVREL-NEXT: s_mov_b64 s[16:17], 7 420; MOVREL-NEXT: s_mov_b64 s[14:15], 6 421; MOVREL-NEXT: s_mov_b64 s[12:13], 5 422; MOVREL-NEXT: s_mov_b64 s[10:11], 4 423; MOVREL-NEXT: s_mov_b64 s[8:9], 3 424; MOVREL-NEXT: s_mov_b64 s[6:7], 2 425; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5] 426; MOVREL-NEXT: v_mov_b32_e32 v0, s0 427; MOVREL-NEXT: v_mov_b32_e32 v1, s1 428; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 429; MOVREL-NEXT: s_endpgm 430; 431; GFX10-LABEL: dyn_extract_v8i64_const_s_s: 432; GFX10: ; %bb.0: ; %entry 433; GFX10-NEXT: s_mov_b64 s[4:5], 1 434; GFX10-NEXT: s_mov_b32 m0, s2 435; GFX10-NEXT: s_mov_b64 s[18:19], 8 436; GFX10-NEXT: s_mov_b64 s[16:17], 7 437; GFX10-NEXT: s_mov_b64 s[14:15], 6 438; GFX10-NEXT: s_mov_b64 s[12:13], 5 439; GFX10-NEXT: s_mov_b64 s[10:11], 4 440; GFX10-NEXT: s_mov_b64 s[8:9], 3 441; GFX10-NEXT: s_mov_b64 s[6:7], 2 442; GFX10-NEXT: s_movrels_b64 s[0:1], s[4:5] 443; GFX10-NEXT: v_mov_b32_e32 v0, s0 444; GFX10-NEXT: v_mov_b32_e32 v1, s1 445; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 446; GFX10-NEXT: s_endpgm 447entry: 448 %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel 449 store i64 %ext, i64 addrspace(1)* undef 450 ret void 451} 452 453define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) { 454; GPRIDX-LABEL: dyn_extract_v8i64_s_v: 455; GPRIDX: ; %bb.0: ; %entry 456; GPRIDX-NEXT: s_mov_b32 s0, s2 457; GPRIDX-NEXT: s_mov_b32 s1, s3 458; GPRIDX-NEXT: s_mov_b32 s2, s4 459; GPRIDX-NEXT: s_mov_b32 s3, s5 460; GPRIDX-NEXT: s_mov_b32 s4, s6 461; GPRIDX-NEXT: s_mov_b32 s5, s7 462; GPRIDX-NEXT: v_mov_b32_e32 v1, s0 463; GPRIDX-NEXT: v_mov_b32_e32 v2, s1 464; GPRIDX-NEXT: v_mov_b32_e32 v3, s2 465; GPRIDX-NEXT: v_mov_b32_e32 v4, s3 466; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 467; GPRIDX-NEXT: s_mov_b32 s6, s8 468; GPRIDX-NEXT: s_mov_b32 s7, s9 469; GPRIDX-NEXT: v_mov_b32_e32 v5, s4 470; GPRIDX-NEXT: v_mov_b32_e32 v6, s5 471; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 472; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 473; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 474; GPRIDX-NEXT: s_mov_b32 s8, s10 475; GPRIDX-NEXT: s_mov_b32 s9, s11 476; GPRIDX-NEXT: v_mov_b32_e32 v7, s6 477; GPRIDX-NEXT: v_mov_b32_e32 v8, s7 478; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 479; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 480; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 481; GPRIDX-NEXT: s_mov_b32 s10, s12 482; GPRIDX-NEXT: s_mov_b32 s11, s13 483; GPRIDX-NEXT: v_mov_b32_e32 v9, s8 484; GPRIDX-NEXT: v_mov_b32_e32 v10, s9 485; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 486; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 487; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 488; GPRIDX-NEXT: v_mov_b32_e32 v11, s10 489; GPRIDX-NEXT: v_mov_b32_e32 v12, s11 490; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 491; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 492; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 493; GPRIDX-NEXT: v_mov_b32_e32 v13, s14 494; GPRIDX-NEXT: v_mov_b32_e32 v14, s15 495; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 496; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 497; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 498; GPRIDX-NEXT: v_mov_b32_e32 v15, s16 499; GPRIDX-NEXT: v_mov_b32_e32 v16, s17 500; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 501; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc 502; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 503; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 504; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc 505; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 506; GPRIDX-NEXT: s_endpgm 507; 508; MOVREL-LABEL: dyn_extract_v8i64_s_v: 509; MOVREL: ; %bb.0: ; %entry 510; MOVREL-NEXT: s_mov_b32 s0, s2 511; MOVREL-NEXT: s_mov_b32 s1, s3 512; MOVREL-NEXT: s_mov_b32 s2, s4 513; MOVREL-NEXT: s_mov_b32 s3, s5 514; MOVREL-NEXT: s_mov_b32 s4, s6 515; MOVREL-NEXT: s_mov_b32 s5, s7 516; MOVREL-NEXT: v_mov_b32_e32 v1, s0 517; MOVREL-NEXT: v_mov_b32_e32 v2, s1 518; MOVREL-NEXT: v_mov_b32_e32 v3, s2 519; MOVREL-NEXT: v_mov_b32_e32 v4, s3 520; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 521; MOVREL-NEXT: s_mov_b32 s6, s8 522; MOVREL-NEXT: s_mov_b32 s7, s9 523; MOVREL-NEXT: v_mov_b32_e32 v5, s4 524; MOVREL-NEXT: v_mov_b32_e32 v6, s5 525; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 526; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 527; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 528; MOVREL-NEXT: s_mov_b32 s8, s10 529; MOVREL-NEXT: s_mov_b32 s9, s11 530; MOVREL-NEXT: v_mov_b32_e32 v7, s6 531; MOVREL-NEXT: v_mov_b32_e32 v8, s7 532; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 533; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 534; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 535; MOVREL-NEXT: s_mov_b32 s10, s12 536; MOVREL-NEXT: s_mov_b32 s11, s13 537; MOVREL-NEXT: v_mov_b32_e32 v9, s8 538; MOVREL-NEXT: v_mov_b32_e32 v10, s9 539; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 540; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 541; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 542; MOVREL-NEXT: v_mov_b32_e32 v11, s10 543; MOVREL-NEXT: v_mov_b32_e32 v12, s11 544; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 545; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 546; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 547; MOVREL-NEXT: v_mov_b32_e32 v13, s14 548; MOVREL-NEXT: v_mov_b32_e32 v14, s15 549; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 550; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 551; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 552; MOVREL-NEXT: v_mov_b32_e32 v15, s16 553; MOVREL-NEXT: v_mov_b32_e32 v16, s17 554; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 555; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc 556; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 557; MOVREL-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 558; MOVREL-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc 559; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 560; MOVREL-NEXT: s_endpgm 561; 562; GFX10-LABEL: dyn_extract_v8i64_s_v: 563; GFX10: ; %bb.0: ; %entry 564; GFX10-NEXT: s_mov_b32 s0, s2 565; GFX10-NEXT: s_mov_b32 s2, s4 566; GFX10-NEXT: s_mov_b32 s19, s5 567; GFX10-NEXT: v_mov_b32_e32 v1, s2 568; GFX10-NEXT: v_mov_b32_e32 v2, s19 569; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 570; GFX10-NEXT: s_mov_b32 s1, s3 571; GFX10-NEXT: s_mov_b32 s4, s6 572; GFX10-NEXT: s_mov_b32 s5, s7 573; GFX10-NEXT: s_mov_b32 s6, s8 574; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 575; GFX10-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo 576; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 577; GFX10-NEXT: s_mov_b32 s7, s9 578; GFX10-NEXT: s_mov_b32 s8, s10 579; GFX10-NEXT: s_mov_b32 s9, s11 580; GFX10-NEXT: s_mov_b32 s10, s12 581; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 582; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo 583; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 584; GFX10-NEXT: s_mov_b32 s11, s13 585; GFX10-NEXT: s_mov_b32 s12, s14 586; GFX10-NEXT: s_mov_b32 s13, s15 587; GFX10-NEXT: s_mov_b32 s14, s16 588; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 589; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 590; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 591; GFX10-NEXT: s_mov_b32 s15, s17 592; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 593; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 594; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 595; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 596; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 597; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 598; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 599; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo 600; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 601; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo 602; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s15, vcc_lo 603; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 604; GFX10-NEXT: s_endpgm 605entry: 606 %ext = extractelement <8 x i64> %vec, i32 %sel 607 store i64 %ext, i64 addrspace(1)* undef 608 ret void 609} 610 611define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) { 612; GCN-LABEL: dyn_extract_v8i64_v_v: 613; GCN: ; %bb.0: ; %entry 614; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 615; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 616; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 617; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 618; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 619; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 620; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 621; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 622; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 623; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 624; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 625; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 626; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 627; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 628; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 629; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 630; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 631; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 632; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 633; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 634; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 635; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 636; GCN-NEXT: s_setpc_b64 s[30:31] 637; 638; GFX10-LABEL: dyn_extract_v8i64_v_v: 639; GFX10: ; %bb.0: ; %entry 640; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 641; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 642; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 643; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 644; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 645; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 646; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 647; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 648; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 649; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 650; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 651; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 652; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 653; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 654; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 655; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 656; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 657; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 658; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 659; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo 660; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 661; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 662; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo 663; GFX10-NEXT: s_setpc_b64 s[30:31] 664entry: 665 %ext = extractelement <8 x i64> %vec, i32 %sel 666 ret i64 %ext 667} 668 669define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) { 670; GPRIDX-LABEL: dyn_extract_v8i64_v_s: 671; GPRIDX: ; %bb.0: ; %entry 672; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 673; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) 674; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 675; GPRIDX-NEXT: v_mov_b32_e32 v17, v1 676; GPRIDX-NEXT: s_set_gpr_idx_off 677; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off 678; GPRIDX-NEXT: s_endpgm 679; 680; MOVREL-LABEL: dyn_extract_v8i64_v_s: 681; MOVREL: ; %bb.0: ; %entry 682; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 683; MOVREL-NEXT: v_movrels_b32_e32 v16, v0 684; MOVREL-NEXT: v_movrels_b32_e32 v17, v1 685; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17] 686; MOVREL-NEXT: s_endpgm 687; 688; GFX10-LABEL: dyn_extract_v8i64_v_s: 689; GFX10: ; %bb.0: ; %entry 690; GFX10-NEXT: s_lshl_b32 m0, s2, 1 691; GFX10-NEXT: v_movrels_b32_e32 v16, v0 692; GFX10-NEXT: v_movrels_b32_e32 v17, v1 693; GFX10-NEXT: global_store_dwordx2 v[0:1], v[16:17], off 694; GFX10-NEXT: s_endpgm 695entry: 696 %ext = extractelement <8 x i64> %vec, i32 %sel 697 store i64 %ext, i64 addrspace(1)* undef 698 ret void 699} 700 701define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) { 702; GPRIDX-LABEL: dyn_extract_v8i64_s_s: 703; GPRIDX: ; %bb.0: ; %entry 704; GPRIDX-NEXT: s_mov_b32 s0, s2 705; GPRIDX-NEXT: s_mov_b32 s1, s3 706; GPRIDX-NEXT: s_mov_b32 m0, s18 707; GPRIDX-NEXT: s_mov_b32 s2, s4 708; GPRIDX-NEXT: s_mov_b32 s3, s5 709; GPRIDX-NEXT: s_mov_b32 s4, s6 710; GPRIDX-NEXT: s_mov_b32 s5, s7 711; GPRIDX-NEXT: s_mov_b32 s6, s8 712; GPRIDX-NEXT: s_mov_b32 s7, s9 713; GPRIDX-NEXT: s_mov_b32 s8, s10 714; GPRIDX-NEXT: s_mov_b32 s9, s11 715; GPRIDX-NEXT: s_mov_b32 s10, s12 716; GPRIDX-NEXT: s_mov_b32 s11, s13 717; GPRIDX-NEXT: s_mov_b32 s12, s14 718; GPRIDX-NEXT: s_mov_b32 s13, s15 719; GPRIDX-NEXT: s_mov_b32 s14, s16 720; GPRIDX-NEXT: s_mov_b32 s15, s17 721; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] 722; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 723; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 724; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 725; GPRIDX-NEXT: s_endpgm 726; 727; MOVREL-LABEL: dyn_extract_v8i64_s_s: 728; MOVREL: ; %bb.0: ; %entry 729; MOVREL-NEXT: s_mov_b32 s0, s2 730; MOVREL-NEXT: s_mov_b32 s1, s3 731; MOVREL-NEXT: s_mov_b32 m0, s18 732; MOVREL-NEXT: s_mov_b32 s2, s4 733; MOVREL-NEXT: s_mov_b32 s3, s5 734; MOVREL-NEXT: s_mov_b32 s4, s6 735; MOVREL-NEXT: s_mov_b32 s5, s7 736; MOVREL-NEXT: s_mov_b32 s6, s8 737; MOVREL-NEXT: s_mov_b32 s7, s9 738; MOVREL-NEXT: s_mov_b32 s8, s10 739; MOVREL-NEXT: s_mov_b32 s9, s11 740; MOVREL-NEXT: s_mov_b32 s10, s12 741; MOVREL-NEXT: s_mov_b32 s11, s13 742; MOVREL-NEXT: s_mov_b32 s12, s14 743; MOVREL-NEXT: s_mov_b32 s13, s15 744; MOVREL-NEXT: s_mov_b32 s14, s16 745; MOVREL-NEXT: s_mov_b32 s15, s17 746; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] 747; MOVREL-NEXT: v_mov_b32_e32 v0, s0 748; MOVREL-NEXT: v_mov_b32_e32 v1, s1 749; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 750; MOVREL-NEXT: s_endpgm 751; 752; GFX10-LABEL: dyn_extract_v8i64_s_s: 753; GFX10: ; %bb.0: ; %entry 754; GFX10-NEXT: s_mov_b32 s0, s2 755; GFX10-NEXT: s_mov_b32 s1, s3 756; GFX10-NEXT: s_mov_b32 m0, s18 757; GFX10-NEXT: s_mov_b32 s2, s4 758; GFX10-NEXT: s_mov_b32 s3, s5 759; GFX10-NEXT: s_mov_b32 s4, s6 760; GFX10-NEXT: s_mov_b32 s5, s7 761; GFX10-NEXT: s_mov_b32 s6, s8 762; GFX10-NEXT: s_mov_b32 s7, s9 763; GFX10-NEXT: s_mov_b32 s8, s10 764; GFX10-NEXT: s_mov_b32 s9, s11 765; GFX10-NEXT: s_mov_b32 s10, s12 766; GFX10-NEXT: s_mov_b32 s11, s13 767; GFX10-NEXT: s_mov_b32 s12, s14 768; GFX10-NEXT: s_mov_b32 s13, s15 769; GFX10-NEXT: s_mov_b32 s14, s16 770; GFX10-NEXT: s_mov_b32 s15, s17 771; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] 772; GFX10-NEXT: v_mov_b32_e32 v0, s0 773; GFX10-NEXT: v_mov_b32_e32 v1, s1 774; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 775; GFX10-NEXT: s_endpgm 776entry: 777 %ext = extractelement <8 x i64> %vec, i32 %sel 778 store i64 %ext, i64 addrspace(1)* undef 779 ret void 780} 781 782define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) { 783; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3: 784; GPRIDX: ; %bb.0: ; %entry 785; GPRIDX-NEXT: s_add_i32 s10, s10, 3 786; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 787; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 788; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 789; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 790; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 791; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 792; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 793; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 794; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 795; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 796; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 797; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 798; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 799; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 800; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 801; GPRIDX-NEXT: ; return to shader part epilog 802; 803; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3: 804; MOVREL: ; %bb.0: ; %entry 805; MOVREL-NEXT: s_mov_b32 s0, s2 806; MOVREL-NEXT: s_mov_b32 s1, s3 807; MOVREL-NEXT: s_mov_b32 s3, s5 808; MOVREL-NEXT: s_mov_b32 m0, s10 809; MOVREL-NEXT: s_mov_b32 s2, s4 810; MOVREL-NEXT: s_mov_b32 s4, s6 811; MOVREL-NEXT: s_mov_b32 s5, s7 812; MOVREL-NEXT: s_mov_b32 s6, s8 813; MOVREL-NEXT: s_mov_b32 s7, s9 814; MOVREL-NEXT: s_movrels_b32 s0, s3 815; MOVREL-NEXT: v_mov_b32_e32 v0, s0 816; MOVREL-NEXT: ; return to shader part epilog 817; 818; GFX10-LABEL: dyn_extract_v8f32_s_s_offset3: 819; GFX10: ; %bb.0: ; %entry 820; GFX10-NEXT: s_mov_b32 s1, s3 821; GFX10-NEXT: s_mov_b32 s3, s5 822; GFX10-NEXT: s_mov_b32 m0, s10 823; GFX10-NEXT: s_mov_b32 s0, s2 824; GFX10-NEXT: s_mov_b32 s2, s4 825; GFX10-NEXT: s_mov_b32 s4, s6 826; GFX10-NEXT: s_mov_b32 s5, s7 827; GFX10-NEXT: s_mov_b32 s6, s8 828; GFX10-NEXT: s_mov_b32 s7, s9 829; GFX10-NEXT: s_movrels_b32 s0, s3 830; GFX10-NEXT: v_mov_b32_e32 v0, s0 831; GFX10-NEXT: ; return to shader part epilog 832entry: 833 %add = add i32 %sel, 3 834 %ext = extractelement <8 x float> %vec, i32 %add 835 ret float %ext 836} 837 838define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) { 839; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3: 840; GPRIDX: ; %bb.0: ; %entry 841; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 842; GPRIDX-NEXT: v_add_u32_e32 v8, 3, v8 843; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 844; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 845; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 846; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 847; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 848; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 849; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 850; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 851; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 852; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 853; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 854; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 855; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 856; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 857; GPRIDX-NEXT: s_setpc_b64 s[30:31] 858; 859; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3: 860; MOVREL: ; %bb.0: ; %entry 861; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 862; MOVREL-NEXT: v_add_u32_e32 v8, vcc, 3, v8 863; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 864; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 865; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 866; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 867; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 868; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 869; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 870; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 871; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 872; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 873; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 874; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 875; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 876; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 877; MOVREL-NEXT: s_setpc_b64 s[30:31] 878; 879; GFX10-LABEL: dyn_extract_v8f32_v_v_offset3: 880; GFX10: ; %bb.0: ; %entry 881; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 882; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 883; GFX10-NEXT: v_add_nc_u32_e32 v8, 3, v8 884; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 885; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 886; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 887; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 888; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 889; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 890; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 891; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 892; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 893; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 894; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 895; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 896; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 897; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 898; GFX10-NEXT: s_setpc_b64 s[30:31] 899entry: 900 %add = add i32 %sel, 3 901 %ext = extractelement <8 x float> %vec, i32 %add 902 ret float %ext 903} 904 905define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) { 906; GCN-LABEL: dyn_extract_v8f64_s_s_offset1: 907; GCN: ; %bb.0: ; %entry 908; GCN-NEXT: s_mov_b32 s0, s2 909; GCN-NEXT: s_mov_b32 s1, s3 910; GCN-NEXT: s_mov_b32 s2, s4 911; GCN-NEXT: s_mov_b32 s3, s5 912; GCN-NEXT: s_mov_b32 m0, s18 913; GCN-NEXT: s_mov_b32 s4, s6 914; GCN-NEXT: s_mov_b32 s5, s7 915; GCN-NEXT: s_mov_b32 s6, s8 916; GCN-NEXT: s_mov_b32 s7, s9 917; GCN-NEXT: s_mov_b32 s8, s10 918; GCN-NEXT: s_mov_b32 s9, s11 919; GCN-NEXT: s_mov_b32 s10, s12 920; GCN-NEXT: s_mov_b32 s11, s13 921; GCN-NEXT: s_mov_b32 s12, s14 922; GCN-NEXT: s_mov_b32 s13, s15 923; GCN-NEXT: s_mov_b32 s14, s16 924; GCN-NEXT: s_mov_b32 s15, s17 925; GCN-NEXT: s_movrels_b64 s[0:1], s[2:3] 926; GCN-NEXT: ; return to shader part epilog 927; 928; GFX10-LABEL: dyn_extract_v8f64_s_s_offset1: 929; GFX10: ; %bb.0: ; %entry 930; GFX10-NEXT: s_mov_b32 s0, s2 931; GFX10-NEXT: s_mov_b32 s1, s3 932; GFX10-NEXT: s_mov_b32 s2, s4 933; GFX10-NEXT: s_mov_b32 s3, s5 934; GFX10-NEXT: s_mov_b32 m0, s18 935; GFX10-NEXT: s_mov_b32 s4, s6 936; GFX10-NEXT: s_mov_b32 s5, s7 937; GFX10-NEXT: s_mov_b32 s6, s8 938; GFX10-NEXT: s_mov_b32 s7, s9 939; GFX10-NEXT: s_mov_b32 s8, s10 940; GFX10-NEXT: s_mov_b32 s9, s11 941; GFX10-NEXT: s_mov_b32 s10, s12 942; GFX10-NEXT: s_mov_b32 s11, s13 943; GFX10-NEXT: s_mov_b32 s12, s14 944; GFX10-NEXT: s_mov_b32 s13, s15 945; GFX10-NEXT: s_mov_b32 s14, s16 946; GFX10-NEXT: s_mov_b32 s15, s17 947; GFX10-NEXT: s_movrels_b64 s[0:1], s[2:3] 948; GFX10-NEXT: ; return to shader part epilog 949entry: 950 %add = add i32 %sel, 1 951 %ext = extractelement <8 x double> %vec, i32 %add 952 ret double %ext 953} 954 955define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) { 956; GCN-LABEL: dyn_extract_v8f64_s_s_offset2: 957; GCN: ; %bb.0: ; %entry 958; GCN-NEXT: s_mov_b32 s0, s2 959; GCN-NEXT: s_mov_b32 s1, s3 960; GCN-NEXT: s_mov_b32 s2, s4 961; GCN-NEXT: s_mov_b32 s3, s5 962; GCN-NEXT: s_mov_b32 s4, s6 963; GCN-NEXT: s_mov_b32 s5, s7 964; GCN-NEXT: s_mov_b32 m0, s18 965; GCN-NEXT: s_mov_b32 s6, s8 966; GCN-NEXT: s_mov_b32 s7, s9 967; GCN-NEXT: s_mov_b32 s8, s10 968; GCN-NEXT: s_mov_b32 s9, s11 969; GCN-NEXT: s_mov_b32 s10, s12 970; GCN-NEXT: s_mov_b32 s11, s13 971; GCN-NEXT: s_mov_b32 s12, s14 972; GCN-NEXT: s_mov_b32 s13, s15 973; GCN-NEXT: s_mov_b32 s14, s16 974; GCN-NEXT: s_mov_b32 s15, s17 975; GCN-NEXT: s_movrels_b64 s[0:1], s[4:5] 976; GCN-NEXT: ; return to shader part epilog 977; 978; GFX10-LABEL: dyn_extract_v8f64_s_s_offset2: 979; GFX10: ; %bb.0: ; %entry 980; GFX10-NEXT: s_mov_b32 s0, s2 981; GFX10-NEXT: s_mov_b32 s1, s3 982; GFX10-NEXT: s_mov_b32 s2, s4 983; GFX10-NEXT: s_mov_b32 s3, s5 984; GFX10-NEXT: s_mov_b32 s4, s6 985; GFX10-NEXT: s_mov_b32 s5, s7 986; GFX10-NEXT: s_mov_b32 m0, s18 987; GFX10-NEXT: s_mov_b32 s6, s8 988; GFX10-NEXT: s_mov_b32 s7, s9 989; GFX10-NEXT: s_mov_b32 s8, s10 990; GFX10-NEXT: s_mov_b32 s9, s11 991; GFX10-NEXT: s_mov_b32 s10, s12 992; GFX10-NEXT: s_mov_b32 s11, s13 993; GFX10-NEXT: s_mov_b32 s12, s14 994; GFX10-NEXT: s_mov_b32 s13, s15 995; GFX10-NEXT: s_mov_b32 s14, s16 996; GFX10-NEXT: s_mov_b32 s15, s17 997; GFX10-NEXT: s_movrels_b64 s[0:1], s[4:5] 998; GFX10-NEXT: ; return to shader part epilog 999entry: 1000 %add = add i32 %sel, 2 1001 %ext = extractelement <8 x double> %vec, i32 %add 1002 ret double %ext 1003} 1004 1005define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) { 1006; GCN-LABEL: dyn_extract_v8f64_s_s_offset3: 1007; GCN: ; %bb.0: ; %entry 1008; GCN-NEXT: s_mov_b32 s0, s2 1009; GCN-NEXT: s_mov_b32 s1, s3 1010; GCN-NEXT: s_mov_b32 s2, s4 1011; GCN-NEXT: s_mov_b32 s3, s5 1012; GCN-NEXT: s_mov_b32 s4, s6 1013; GCN-NEXT: s_mov_b32 s5, s7 1014; GCN-NEXT: s_mov_b32 s6, s8 1015; GCN-NEXT: s_mov_b32 s7, s9 1016; GCN-NEXT: s_mov_b32 m0, s18 1017; GCN-NEXT: s_mov_b32 s8, s10 1018; GCN-NEXT: s_mov_b32 s9, s11 1019; GCN-NEXT: s_mov_b32 s10, s12 1020; GCN-NEXT: s_mov_b32 s11, s13 1021; GCN-NEXT: s_mov_b32 s12, s14 1022; GCN-NEXT: s_mov_b32 s13, s15 1023; GCN-NEXT: s_mov_b32 s14, s16 1024; GCN-NEXT: s_mov_b32 s15, s17 1025; GCN-NEXT: s_movrels_b64 s[0:1], s[6:7] 1026; GCN-NEXT: ; return to shader part epilog 1027; 1028; GFX10-LABEL: dyn_extract_v8f64_s_s_offset3: 1029; GFX10: ; %bb.0: ; %entry 1030; GFX10-NEXT: s_mov_b32 s0, s2 1031; GFX10-NEXT: s_mov_b32 s1, s3 1032; GFX10-NEXT: s_mov_b32 s2, s4 1033; GFX10-NEXT: s_mov_b32 s3, s5 1034; GFX10-NEXT: s_mov_b32 s4, s6 1035; GFX10-NEXT: s_mov_b32 s5, s7 1036; GFX10-NEXT: s_mov_b32 s6, s8 1037; GFX10-NEXT: s_mov_b32 s7, s9 1038; GFX10-NEXT: s_mov_b32 m0, s18 1039; GFX10-NEXT: s_mov_b32 s8, s10 1040; GFX10-NEXT: s_mov_b32 s9, s11 1041; GFX10-NEXT: s_mov_b32 s10, s12 1042; GFX10-NEXT: s_mov_b32 s11, s13 1043; GFX10-NEXT: s_mov_b32 s12, s14 1044; GFX10-NEXT: s_mov_b32 s13, s15 1045; GFX10-NEXT: s_mov_b32 s14, s16 1046; GFX10-NEXT: s_mov_b32 s15, s17 1047; GFX10-NEXT: s_movrels_b64 s[0:1], s[6:7] 1048; GFX10-NEXT: ; return to shader part epilog 1049entry: 1050 %add = add i32 %sel, 3 1051 %ext = extractelement <8 x double> %vec, i32 %add 1052 ret double %ext 1053} 1054 1055define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) { 1056; GCN-LABEL: dyn_extract_v8f64_s_s_offset4: 1057; GCN: ; %bb.0: ; %entry 1058; GCN-NEXT: s_mov_b32 s0, s2 1059; GCN-NEXT: s_mov_b32 s1, s3 1060; GCN-NEXT: s_mov_b32 s2, s4 1061; GCN-NEXT: s_mov_b32 s3, s5 1062; GCN-NEXT: s_mov_b32 s4, s6 1063; GCN-NEXT: s_mov_b32 s5, s7 1064; GCN-NEXT: s_mov_b32 s6, s8 1065; GCN-NEXT: s_mov_b32 s7, s9 1066; GCN-NEXT: s_mov_b32 s8, s10 1067; GCN-NEXT: s_mov_b32 s9, s11 1068; GCN-NEXT: s_mov_b32 m0, s18 1069; GCN-NEXT: s_mov_b32 s10, s12 1070; GCN-NEXT: s_mov_b32 s11, s13 1071; GCN-NEXT: s_mov_b32 s12, s14 1072; GCN-NEXT: s_mov_b32 s13, s15 1073; GCN-NEXT: s_mov_b32 s14, s16 1074; GCN-NEXT: s_mov_b32 s15, s17 1075; GCN-NEXT: s_movrels_b64 s[0:1], s[8:9] 1076; GCN-NEXT: ; return to shader part epilog 1077; 1078; GFX10-LABEL: dyn_extract_v8f64_s_s_offset4: 1079; GFX10: ; %bb.0: ; %entry 1080; GFX10-NEXT: s_mov_b32 s0, s2 1081; GFX10-NEXT: s_mov_b32 s1, s3 1082; GFX10-NEXT: s_mov_b32 s2, s4 1083; GFX10-NEXT: s_mov_b32 s3, s5 1084; GFX10-NEXT: s_mov_b32 s4, s6 1085; GFX10-NEXT: s_mov_b32 s5, s7 1086; GFX10-NEXT: s_mov_b32 s6, s8 1087; GFX10-NEXT: s_mov_b32 s7, s9 1088; GFX10-NEXT: s_mov_b32 s8, s10 1089; GFX10-NEXT: s_mov_b32 s9, s11 1090; GFX10-NEXT: s_mov_b32 m0, s18 1091; GFX10-NEXT: s_mov_b32 s10, s12 1092; GFX10-NEXT: s_mov_b32 s11, s13 1093; GFX10-NEXT: s_mov_b32 s12, s14 1094; GFX10-NEXT: s_mov_b32 s13, s15 1095; GFX10-NEXT: s_mov_b32 s14, s16 1096; GFX10-NEXT: s_mov_b32 s15, s17 1097; GFX10-NEXT: s_movrels_b64 s[0:1], s[8:9] 1098; GFX10-NEXT: ; return to shader part epilog 1099entry: 1100 %add = add i32 %sel, 4 1101 %ext = extractelement <8 x double> %vec, i32 %add 1102 ret double %ext 1103} 1104 1105define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) { 1106; GCN-LABEL: dyn_extract_v8f64_s_s_offset5: 1107; GCN: ; %bb.0: ; %entry 1108; GCN-NEXT: s_mov_b32 s0, s2 1109; GCN-NEXT: s_mov_b32 s1, s3 1110; GCN-NEXT: s_mov_b32 s2, s4 1111; GCN-NEXT: s_mov_b32 s3, s5 1112; GCN-NEXT: s_mov_b32 s4, s6 1113; GCN-NEXT: s_mov_b32 s5, s7 1114; GCN-NEXT: s_mov_b32 s6, s8 1115; GCN-NEXT: s_mov_b32 s7, s9 1116; GCN-NEXT: s_mov_b32 s8, s10 1117; GCN-NEXT: s_mov_b32 s9, s11 1118; GCN-NEXT: s_mov_b32 s10, s12 1119; GCN-NEXT: s_mov_b32 s11, s13 1120; GCN-NEXT: s_mov_b32 m0, s18 1121; GCN-NEXT: s_mov_b32 s12, s14 1122; GCN-NEXT: s_mov_b32 s13, s15 1123; GCN-NEXT: s_mov_b32 s14, s16 1124; GCN-NEXT: s_mov_b32 s15, s17 1125; GCN-NEXT: s_movrels_b64 s[0:1], s[10:11] 1126; GCN-NEXT: ; return to shader part epilog 1127; 1128; GFX10-LABEL: dyn_extract_v8f64_s_s_offset5: 1129; GFX10: ; %bb.0: ; %entry 1130; GFX10-NEXT: s_mov_b32 s0, s2 1131; GFX10-NEXT: s_mov_b32 s1, s3 1132; GFX10-NEXT: s_mov_b32 s2, s4 1133; GFX10-NEXT: s_mov_b32 s3, s5 1134; GFX10-NEXT: s_mov_b32 s4, s6 1135; GFX10-NEXT: s_mov_b32 s5, s7 1136; GFX10-NEXT: s_mov_b32 s6, s8 1137; GFX10-NEXT: s_mov_b32 s7, s9 1138; GFX10-NEXT: s_mov_b32 s8, s10 1139; GFX10-NEXT: s_mov_b32 s9, s11 1140; GFX10-NEXT: s_mov_b32 s10, s12 1141; GFX10-NEXT: s_mov_b32 s11, s13 1142; GFX10-NEXT: s_mov_b32 m0, s18 1143; GFX10-NEXT: s_mov_b32 s12, s14 1144; GFX10-NEXT: s_mov_b32 s13, s15 1145; GFX10-NEXT: s_mov_b32 s14, s16 1146; GFX10-NEXT: s_mov_b32 s15, s17 1147; GFX10-NEXT: s_movrels_b64 s[0:1], s[10:11] 1148; GFX10-NEXT: ; return to shader part epilog 1149entry: 1150 %add = add i32 %sel, 5 1151 %ext = extractelement <8 x double> %vec, i32 %add 1152 ret double %ext 1153} 1154 1155define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) { 1156; GCN-LABEL: dyn_extract_v8f64_s_s_offset6: 1157; GCN: ; %bb.0: ; %entry 1158; GCN-NEXT: s_mov_b32 s0, s2 1159; GCN-NEXT: s_mov_b32 s1, s3 1160; GCN-NEXT: s_mov_b32 s2, s4 1161; GCN-NEXT: s_mov_b32 s3, s5 1162; GCN-NEXT: s_mov_b32 s4, s6 1163; GCN-NEXT: s_mov_b32 s5, s7 1164; GCN-NEXT: s_mov_b32 s6, s8 1165; GCN-NEXT: s_mov_b32 s7, s9 1166; GCN-NEXT: s_mov_b32 s8, s10 1167; GCN-NEXT: s_mov_b32 s9, s11 1168; GCN-NEXT: s_mov_b32 s10, s12 1169; GCN-NEXT: s_mov_b32 s11, s13 1170; GCN-NEXT: s_mov_b32 s12, s14 1171; GCN-NEXT: s_mov_b32 s13, s15 1172; GCN-NEXT: s_mov_b32 m0, s18 1173; GCN-NEXT: s_mov_b32 s14, s16 1174; GCN-NEXT: s_mov_b32 s15, s17 1175; GCN-NEXT: s_movrels_b64 s[0:1], s[12:13] 1176; GCN-NEXT: ; return to shader part epilog 1177; 1178; GFX10-LABEL: dyn_extract_v8f64_s_s_offset6: 1179; GFX10: ; %bb.0: ; %entry 1180; GFX10-NEXT: s_mov_b32 s0, s2 1181; GFX10-NEXT: s_mov_b32 s1, s3 1182; GFX10-NEXT: s_mov_b32 s2, s4 1183; GFX10-NEXT: s_mov_b32 s3, s5 1184; GFX10-NEXT: s_mov_b32 s4, s6 1185; GFX10-NEXT: s_mov_b32 s5, s7 1186; GFX10-NEXT: s_mov_b32 s6, s8 1187; GFX10-NEXT: s_mov_b32 s7, s9 1188; GFX10-NEXT: s_mov_b32 s8, s10 1189; GFX10-NEXT: s_mov_b32 s9, s11 1190; GFX10-NEXT: s_mov_b32 s10, s12 1191; GFX10-NEXT: s_mov_b32 s11, s13 1192; GFX10-NEXT: s_mov_b32 s12, s14 1193; GFX10-NEXT: s_mov_b32 s13, s15 1194; GFX10-NEXT: s_mov_b32 m0, s18 1195; GFX10-NEXT: s_mov_b32 s14, s16 1196; GFX10-NEXT: s_mov_b32 s15, s17 1197; GFX10-NEXT: s_movrels_b64 s[0:1], s[12:13] 1198; GFX10-NEXT: ; return to shader part epilog 1199entry: 1200 %add = add i32 %sel, 6 1201 %ext = extractelement <8 x double> %vec, i32 %add 1202 ret double %ext 1203} 1204 1205define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) { 1206; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7: 1207; GPRIDX: ; %bb.0: ; %entry 1208; GPRIDX-NEXT: s_mov_b32 s0, s2 1209; GPRIDX-NEXT: s_mov_b32 s1, s3 1210; GPRIDX-NEXT: s_mov_b32 s2, s4 1211; GPRIDX-NEXT: s_mov_b32 s3, s5 1212; GPRIDX-NEXT: s_mov_b32 s4, s6 1213; GPRIDX-NEXT: s_mov_b32 s5, s7 1214; GPRIDX-NEXT: s_mov_b32 s6, s8 1215; GPRIDX-NEXT: s_mov_b32 s7, s9 1216; GPRIDX-NEXT: s_mov_b32 s8, s10 1217; GPRIDX-NEXT: s_mov_b32 s9, s11 1218; GPRIDX-NEXT: s_mov_b32 s10, s12 1219; GPRIDX-NEXT: s_mov_b32 s11, s13 1220; GPRIDX-NEXT: s_mov_b32 s12, s14 1221; GPRIDX-NEXT: s_mov_b32 s13, s15 1222; GPRIDX-NEXT: s_mov_b32 s14, s16 1223; GPRIDX-NEXT: s_mov_b32 s15, s17 1224; GPRIDX-NEXT: s_mov_b32 m0, s18 1225; GPRIDX-NEXT: s_nop 0 1226; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[14:15] 1227; GPRIDX-NEXT: ; return to shader part epilog 1228; 1229; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7: 1230; MOVREL: ; %bb.0: ; %entry 1231; MOVREL-NEXT: s_mov_b32 s0, s2 1232; MOVREL-NEXT: s_mov_b32 s1, s3 1233; MOVREL-NEXT: s_mov_b32 s2, s4 1234; MOVREL-NEXT: s_mov_b32 s3, s5 1235; MOVREL-NEXT: s_mov_b32 s4, s6 1236; MOVREL-NEXT: s_mov_b32 s5, s7 1237; MOVREL-NEXT: s_mov_b32 s6, s8 1238; MOVREL-NEXT: s_mov_b32 s7, s9 1239; MOVREL-NEXT: s_mov_b32 s8, s10 1240; MOVREL-NEXT: s_mov_b32 s9, s11 1241; MOVREL-NEXT: s_mov_b32 s10, s12 1242; MOVREL-NEXT: s_mov_b32 s11, s13 1243; MOVREL-NEXT: s_mov_b32 s12, s14 1244; MOVREL-NEXT: s_mov_b32 s13, s15 1245; MOVREL-NEXT: s_mov_b32 s14, s16 1246; MOVREL-NEXT: s_mov_b32 s15, s17 1247; MOVREL-NEXT: s_mov_b32 m0, s18 1248; MOVREL-NEXT: s_movrels_b64 s[0:1], s[14:15] 1249; MOVREL-NEXT: ; return to shader part epilog 1250; 1251; GFX10-LABEL: dyn_extract_v8f64_s_s_offset7: 1252; GFX10: ; %bb.0: ; %entry 1253; GFX10-NEXT: s_mov_b32 s0, s2 1254; GFX10-NEXT: s_mov_b32 s1, s3 1255; GFX10-NEXT: s_mov_b32 s2, s4 1256; GFX10-NEXT: s_mov_b32 s3, s5 1257; GFX10-NEXT: s_mov_b32 s4, s6 1258; GFX10-NEXT: s_mov_b32 s5, s7 1259; GFX10-NEXT: s_mov_b32 s6, s8 1260; GFX10-NEXT: s_mov_b32 s7, s9 1261; GFX10-NEXT: s_mov_b32 s8, s10 1262; GFX10-NEXT: s_mov_b32 s9, s11 1263; GFX10-NEXT: s_mov_b32 s10, s12 1264; GFX10-NEXT: s_mov_b32 s11, s13 1265; GFX10-NEXT: s_mov_b32 s12, s14 1266; GFX10-NEXT: s_mov_b32 s13, s15 1267; GFX10-NEXT: s_mov_b32 s14, s16 1268; GFX10-NEXT: s_mov_b32 s15, s17 1269; GFX10-NEXT: s_mov_b32 m0, s18 1270; GFX10-NEXT: s_movrels_b64 s[0:1], s[14:15] 1271; GFX10-NEXT: ; return to shader part epilog 1272entry: 1273 %add = add i32 %sel, 7 1274 %ext = extractelement <8 x double> %vec, i32 %add 1275 ret double %ext 1276} 1277 1278define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) { 1279; GCN-LABEL: dyn_extract_v8f64_s_s_offsetm1: 1280; GCN: ; %bb.0: ; %entry 1281; GCN-NEXT: s_mov_b32 s0, s2 1282; GCN-NEXT: s_mov_b32 s1, s3 1283; GCN-NEXT: s_add_i32 m0, s18, -1 1284; GCN-NEXT: s_mov_b32 s2, s4 1285; GCN-NEXT: s_mov_b32 s3, s5 1286; GCN-NEXT: s_mov_b32 s4, s6 1287; GCN-NEXT: s_mov_b32 s5, s7 1288; GCN-NEXT: s_mov_b32 s6, s8 1289; GCN-NEXT: s_mov_b32 s7, s9 1290; GCN-NEXT: s_mov_b32 s8, s10 1291; GCN-NEXT: s_mov_b32 s9, s11 1292; GCN-NEXT: s_mov_b32 s10, s12 1293; GCN-NEXT: s_mov_b32 s11, s13 1294; GCN-NEXT: s_mov_b32 s12, s14 1295; GCN-NEXT: s_mov_b32 s13, s15 1296; GCN-NEXT: s_mov_b32 s14, s16 1297; GCN-NEXT: s_mov_b32 s15, s17 1298; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] 1299; GCN-NEXT: ; return to shader part epilog 1300; 1301; GFX10-LABEL: dyn_extract_v8f64_s_s_offsetm1: 1302; GFX10: ; %bb.0: ; %entry 1303; GFX10-NEXT: s_mov_b32 s0, s2 1304; GFX10-NEXT: s_mov_b32 s1, s3 1305; GFX10-NEXT: s_add_i32 m0, s18, -1 1306; GFX10-NEXT: s_mov_b32 s2, s4 1307; GFX10-NEXT: s_mov_b32 s3, s5 1308; GFX10-NEXT: s_mov_b32 s4, s6 1309; GFX10-NEXT: s_mov_b32 s5, s7 1310; GFX10-NEXT: s_mov_b32 s6, s8 1311; GFX10-NEXT: s_mov_b32 s7, s9 1312; GFX10-NEXT: s_mov_b32 s8, s10 1313; GFX10-NEXT: s_mov_b32 s9, s11 1314; GFX10-NEXT: s_mov_b32 s10, s12 1315; GFX10-NEXT: s_mov_b32 s11, s13 1316; GFX10-NEXT: s_mov_b32 s12, s14 1317; GFX10-NEXT: s_mov_b32 s13, s15 1318; GFX10-NEXT: s_mov_b32 s14, s16 1319; GFX10-NEXT: s_mov_b32 s15, s17 1320; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] 1321; GFX10-NEXT: ; return to shader part epilog 1322entry: 1323 %add = add i32 %sel, -1 1324 %ext = extractelement <8 x double> %vec, i32 %add 1325 ret double %ext 1326} 1327 1328define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) { 1329; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3: 1330; GPRIDX: ; %bb.0: ; %entry 1331; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1332; GPRIDX-NEXT: v_add_u32_e32 v16, 3, v16 1333; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 1334; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1335; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1336; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 1337; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1338; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1339; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 1340; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1341; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1342; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 1343; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 1344; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 1345; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 1346; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 1347; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 1348; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 1349; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 1350; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 1351; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 1352; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 1353; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 1354; GPRIDX-NEXT: s_setpc_b64 s[30:31] 1355; 1356; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3: 1357; MOVREL: ; %bb.0: ; %entry 1358; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1359; MOVREL-NEXT: v_add_u32_e32 v16, vcc, 3, v16 1360; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 1361; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1362; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1363; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 1364; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1365; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1366; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 1367; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1368; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1369; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 1370; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 1371; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 1372; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 1373; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 1374; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 1375; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 1376; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 1377; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 1378; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 1379; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 1380; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 1381; MOVREL-NEXT: s_setpc_b64 s[30:31] 1382; 1383; GFX10-LABEL: dyn_extract_v8f64_v_v_offset3: 1384; GFX10: ; %bb.0: ; %entry 1385; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1386; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1387; GFX10-NEXT: v_add_nc_u32_e32 v16, 3, v16 1388; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 1389; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 1390; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 1391; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 1392; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 1393; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 1394; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 1395; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 1396; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 1397; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 1398; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 1399; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 1400; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 1401; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 1402; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 1403; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 1404; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 1405; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo 1406; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 1407; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 1408; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo 1409; GFX10-NEXT: s_setpc_b64 s[30:31] 1410entry: 1411 %add = add i32 %sel, 3 1412 %ext = extractelement <8 x double> %vec, i32 %add 1413 ret double %ext 1414} 1415 1416define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) { 1417; GCN-LABEL: dyn_extract_v8p3_v_v: 1418; GCN: ; %bb.0: ; %entry 1419; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1420; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 1421; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1422; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 1423; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1424; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 1425; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1426; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 1427; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1428; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 1429; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 1430; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 1431; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1432; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 1433; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 1434; GCN-NEXT: s_setpc_b64 s[30:31] 1435; 1436; GFX10-LABEL: dyn_extract_v8p3_v_v: 1437; GFX10: ; %bb.0: ; %entry 1438; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1439; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1440; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 1441; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1442; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 1443; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 1444; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 1445; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 1446; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 1447; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 1448; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 1449; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 1450; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 1451; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 1452; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 1453; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 1454; GFX10-NEXT: s_setpc_b64 s[30:31] 1455entry: 1456 %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx 1457 ret i8 addrspace(3)* %ext 1458} 1459 1460define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) { 1461; GPRIDX-LABEL: dyn_extract_v8p3_s_s: 1462; GPRIDX: ; %bb.0: ; %entry 1463; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 1464; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 1465; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 1466; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 1467; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 1468; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 1469; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 1470; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 1471; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 1472; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 1473; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 1474; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 1475; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 1476; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 1477; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 1478; GPRIDX-NEXT: ds_write_b32 v0, v0 1479; GPRIDX-NEXT: s_endpgm 1480; 1481; MOVREL-LABEL: dyn_extract_v8p3_s_s: 1482; MOVREL: ; %bb.0: ; %entry 1483; MOVREL-NEXT: s_mov_b32 s0, s2 1484; MOVREL-NEXT: s_mov_b32 m0, s10 1485; MOVREL-NEXT: s_mov_b32 s1, s3 1486; MOVREL-NEXT: s_mov_b32 s2, s4 1487; MOVREL-NEXT: s_mov_b32 s3, s5 1488; MOVREL-NEXT: s_mov_b32 s4, s6 1489; MOVREL-NEXT: s_mov_b32 s5, s7 1490; MOVREL-NEXT: s_mov_b32 s6, s8 1491; MOVREL-NEXT: s_mov_b32 s7, s9 1492; MOVREL-NEXT: s_movrels_b32 s0, s0 1493; MOVREL-NEXT: v_mov_b32_e32 v0, s0 1494; MOVREL-NEXT: s_mov_b32 m0, -1 1495; MOVREL-NEXT: ds_write_b32 v0, v0 1496; MOVREL-NEXT: s_endpgm 1497; 1498; GFX10-LABEL: dyn_extract_v8p3_s_s: 1499; GFX10: ; %bb.0: ; %entry 1500; GFX10-NEXT: s_mov_b32 s0, s2 1501; GFX10-NEXT: s_mov_b32 m0, s10 1502; GFX10-NEXT: s_mov_b32 s1, s3 1503; GFX10-NEXT: s_mov_b32 s2, s4 1504; GFX10-NEXT: s_mov_b32 s3, s5 1505; GFX10-NEXT: s_mov_b32 s4, s6 1506; GFX10-NEXT: s_mov_b32 s5, s7 1507; GFX10-NEXT: s_mov_b32 s6, s8 1508; GFX10-NEXT: s_mov_b32 s7, s9 1509; GFX10-NEXT: s_movrels_b32 s0, s0 1510; GFX10-NEXT: v_mov_b32_e32 v0, s0 1511; GFX10-NEXT: ds_write_b32 v0, v0 1512; GFX10-NEXT: s_endpgm 1513entry: 1514 %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx 1515 store i8 addrspace(3)* %ext, i8 addrspace(3)* addrspace(3)* undef 1516 ret void 1517} 1518 1519define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) { 1520; GCN-LABEL: dyn_extract_v8p1_v_v: 1521; GCN: ; %bb.0: ; %entry 1522; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1523; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 1524; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1525; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1526; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 1527; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1528; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1529; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 1530; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1531; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1532; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 1533; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 1534; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 1535; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 1536; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 1537; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 1538; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 1539; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 1540; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 1541; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 1542; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 1543; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 1544; GCN-NEXT: s_setpc_b64 s[30:31] 1545; 1546; GFX10-LABEL: dyn_extract_v8p1_v_v: 1547; GFX10: ; %bb.0: ; %entry 1548; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1549; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1550; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 1551; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 1552; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 1553; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 1554; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 1555; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 1556; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 1557; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 1558; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 1559; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 1560; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 1561; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 1562; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 1563; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 1564; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 1565; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 1566; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 1567; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo 1568; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 1569; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 1570; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo 1571; GFX10-NEXT: s_setpc_b64 s[30:31] 1572entry: 1573 %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx 1574 ret i8 addrspace(1)* %ext 1575} 1576 1577define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) { 1578; GPRIDX-LABEL: dyn_extract_v8p1_s_s: 1579; GPRIDX: ; %bb.0: ; %entry 1580; GPRIDX-NEXT: s_mov_b32 s0, s2 1581; GPRIDX-NEXT: s_mov_b32 s1, s3 1582; GPRIDX-NEXT: s_mov_b32 m0, s18 1583; GPRIDX-NEXT: s_mov_b32 s2, s4 1584; GPRIDX-NEXT: s_mov_b32 s3, s5 1585; GPRIDX-NEXT: s_mov_b32 s4, s6 1586; GPRIDX-NEXT: s_mov_b32 s5, s7 1587; GPRIDX-NEXT: s_mov_b32 s6, s8 1588; GPRIDX-NEXT: s_mov_b32 s7, s9 1589; GPRIDX-NEXT: s_mov_b32 s8, s10 1590; GPRIDX-NEXT: s_mov_b32 s9, s11 1591; GPRIDX-NEXT: s_mov_b32 s10, s12 1592; GPRIDX-NEXT: s_mov_b32 s11, s13 1593; GPRIDX-NEXT: s_mov_b32 s12, s14 1594; GPRIDX-NEXT: s_mov_b32 s13, s15 1595; GPRIDX-NEXT: s_mov_b32 s14, s16 1596; GPRIDX-NEXT: s_mov_b32 s15, s17 1597; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] 1598; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 1599; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 1600; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1601; GPRIDX-NEXT: s_endpgm 1602; 1603; MOVREL-LABEL: dyn_extract_v8p1_s_s: 1604; MOVREL: ; %bb.0: ; %entry 1605; MOVREL-NEXT: s_mov_b32 s0, s2 1606; MOVREL-NEXT: s_mov_b32 s1, s3 1607; MOVREL-NEXT: s_mov_b32 m0, s18 1608; MOVREL-NEXT: s_mov_b32 s2, s4 1609; MOVREL-NEXT: s_mov_b32 s3, s5 1610; MOVREL-NEXT: s_mov_b32 s4, s6 1611; MOVREL-NEXT: s_mov_b32 s5, s7 1612; MOVREL-NEXT: s_mov_b32 s6, s8 1613; MOVREL-NEXT: s_mov_b32 s7, s9 1614; MOVREL-NEXT: s_mov_b32 s8, s10 1615; MOVREL-NEXT: s_mov_b32 s9, s11 1616; MOVREL-NEXT: s_mov_b32 s10, s12 1617; MOVREL-NEXT: s_mov_b32 s11, s13 1618; MOVREL-NEXT: s_mov_b32 s12, s14 1619; MOVREL-NEXT: s_mov_b32 s13, s15 1620; MOVREL-NEXT: s_mov_b32 s14, s16 1621; MOVREL-NEXT: s_mov_b32 s15, s17 1622; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] 1623; MOVREL-NEXT: v_mov_b32_e32 v0, s0 1624; MOVREL-NEXT: v_mov_b32_e32 v1, s1 1625; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1626; MOVREL-NEXT: s_endpgm 1627; 1628; GFX10-LABEL: dyn_extract_v8p1_s_s: 1629; GFX10: ; %bb.0: ; %entry 1630; GFX10-NEXT: s_mov_b32 s0, s2 1631; GFX10-NEXT: s_mov_b32 s1, s3 1632; GFX10-NEXT: s_mov_b32 m0, s18 1633; GFX10-NEXT: s_mov_b32 s2, s4 1634; GFX10-NEXT: s_mov_b32 s3, s5 1635; GFX10-NEXT: s_mov_b32 s4, s6 1636; GFX10-NEXT: s_mov_b32 s5, s7 1637; GFX10-NEXT: s_mov_b32 s6, s8 1638; GFX10-NEXT: s_mov_b32 s7, s9 1639; GFX10-NEXT: s_mov_b32 s8, s10 1640; GFX10-NEXT: s_mov_b32 s9, s11 1641; GFX10-NEXT: s_mov_b32 s10, s12 1642; GFX10-NEXT: s_mov_b32 s11, s13 1643; GFX10-NEXT: s_mov_b32 s12, s14 1644; GFX10-NEXT: s_mov_b32 s13, s15 1645; GFX10-NEXT: s_mov_b32 s14, s16 1646; GFX10-NEXT: s_mov_b32 s15, s17 1647; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] 1648; GFX10-NEXT: v_mov_b32_e32 v0, s0 1649; GFX10-NEXT: v_mov_b32_e32 v1, s1 1650; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1651; GFX10-NEXT: s_endpgm 1652entry: 1653 %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx 1654 store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef 1655 ret void 1656} 1657 1658define amdgpu_ps float @dyn_extract_v16f32_v_s(<16 x float> %vec, i32 inreg %sel) { 1659; GPRIDX-LABEL: dyn_extract_v16f32_v_s: 1660; GPRIDX: ; %bb.0: ; %entry 1661; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) 1662; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 1663; GPRIDX-NEXT: s_set_gpr_idx_off 1664; GPRIDX-NEXT: ; return to shader part epilog 1665; 1666; MOVREL-LABEL: dyn_extract_v16f32_v_s: 1667; MOVREL: ; %bb.0: ; %entry 1668; MOVREL-NEXT: s_mov_b32 m0, s2 1669; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 1670; MOVREL-NEXT: ; return to shader part epilog 1671; 1672; GFX10-LABEL: dyn_extract_v16f32_v_s: 1673; GFX10: ; %bb.0: ; %entry 1674; GFX10-NEXT: s_mov_b32 m0, s2 1675; GFX10-NEXT: v_movrels_b32_e32 v0, v0 1676; GFX10-NEXT: ; return to shader part epilog 1677entry: 1678 %ext = extractelement <16 x float> %vec, i32 %sel 1679 ret float %ext 1680} 1681 1682define amdgpu_ps float @dyn_extract_v32f32_v_s(<32 x float> %vec, i32 inreg %sel) { 1683; GPRIDX-LABEL: dyn_extract_v32f32_v_s: 1684; GPRIDX: ; %bb.0: ; %entry 1685; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) 1686; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 1687; GPRIDX-NEXT: s_set_gpr_idx_off 1688; GPRIDX-NEXT: ; return to shader part epilog 1689; 1690; MOVREL-LABEL: dyn_extract_v32f32_v_s: 1691; MOVREL: ; %bb.0: ; %entry 1692; MOVREL-NEXT: s_mov_b32 m0, s2 1693; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 1694; MOVREL-NEXT: ; return to shader part epilog 1695; 1696; GFX10-LABEL: dyn_extract_v32f32_v_s: 1697; GFX10: ; %bb.0: ; %entry 1698; GFX10-NEXT: s_mov_b32 m0, s2 1699; GFX10-NEXT: v_movrels_b32_e32 v0, v0 1700; GFX10-NEXT: ; return to shader part epilog 1701entry: 1702 %ext = extractelement <32 x float> %vec, i32 %sel 1703 ret float %ext 1704} 1705 1706define amdgpu_ps double @dyn_extract_v16f64_v_s(<16 x double> %vec, i32 inreg %sel) { 1707; GPRIDX-LABEL: dyn_extract_v16f64_v_s: 1708; GPRIDX: ; %bb.0: ; %entry 1709; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 1710; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) 1711; GPRIDX-NEXT: v_mov_b32_e32 v32, v0 1712; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 1713; GPRIDX-NEXT: s_set_gpr_idx_off 1714; GPRIDX-NEXT: v_readfirstlane_b32 s0, v32 1715; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 1716; GPRIDX-NEXT: ; return to shader part epilog 1717; 1718; MOVREL-LABEL: dyn_extract_v16f64_v_s: 1719; MOVREL: ; %bb.0: ; %entry 1720; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 1721; MOVREL-NEXT: v_movrels_b32_e32 v32, v0 1722; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 1723; MOVREL-NEXT: v_readfirstlane_b32 s0, v32 1724; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 1725; MOVREL-NEXT: ; return to shader part epilog 1726; 1727; GFX10-LABEL: dyn_extract_v16f64_v_s: 1728; GFX10: ; %bb.0: ; %entry 1729; GFX10-NEXT: s_lshl_b32 m0, s2, 1 1730; GFX10-NEXT: v_movrels_b32_e32 v32, v0 1731; GFX10-NEXT: v_movrels_b32_e32 v0, v1 1732; GFX10-NEXT: v_readfirstlane_b32 s0, v32 1733; GFX10-NEXT: v_readfirstlane_b32 s1, v0 1734; GFX10-NEXT: ; return to shader part epilog 1735entry: 1736 %ext = extractelement <16 x double> %vec, i32 %sel 1737 ret double %ext 1738} 1739 1740define amdgpu_ps float @dyn_extract_v16f32_s_s(i32 inreg %sel) { 1741; GCN-LABEL: dyn_extract_v16f32_s_s: 1742; GCN: ; %bb.0: ; %entry 1743; GCN-NEXT: s_mov_b32 s4, 1.0 1744; GCN-NEXT: s_mov_b32 m0, s2 1745; GCN-NEXT: s_mov_b32 s19, 0x41800000 1746; GCN-NEXT: s_mov_b32 s18, 0x41700000 1747; GCN-NEXT: s_mov_b32 s17, 0x41600000 1748; GCN-NEXT: s_mov_b32 s16, 0x41500000 1749; GCN-NEXT: s_mov_b32 s15, 0x41400000 1750; GCN-NEXT: s_mov_b32 s14, 0x41300000 1751; GCN-NEXT: s_mov_b32 s13, 0x41200000 1752; GCN-NEXT: s_mov_b32 s12, 0x41100000 1753; GCN-NEXT: s_mov_b32 s11, 0x41000000 1754; GCN-NEXT: s_mov_b32 s10, 0x40e00000 1755; GCN-NEXT: s_mov_b32 s9, 0x40c00000 1756; GCN-NEXT: s_mov_b32 s8, 0x40a00000 1757; GCN-NEXT: s_mov_b32 s7, 4.0 1758; GCN-NEXT: s_mov_b32 s6, 0x40400000 1759; GCN-NEXT: s_mov_b32 s5, 2.0 1760; GCN-NEXT: s_movrels_b32 s0, s4 1761; GCN-NEXT: v_mov_b32_e32 v0, s0 1762; GCN-NEXT: ; return to shader part epilog 1763; 1764; GFX10-LABEL: dyn_extract_v16f32_s_s: 1765; GFX10: ; %bb.0: ; %entry 1766; GFX10-NEXT: s_mov_b32 s4, 1.0 1767; GFX10-NEXT: s_mov_b32 m0, s2 1768; GFX10-NEXT: s_mov_b32 s19, 0x41800000 1769; GFX10-NEXT: s_mov_b32 s18, 0x41700000 1770; GFX10-NEXT: s_mov_b32 s17, 0x41600000 1771; GFX10-NEXT: s_mov_b32 s16, 0x41500000 1772; GFX10-NEXT: s_mov_b32 s15, 0x41400000 1773; GFX10-NEXT: s_mov_b32 s14, 0x41300000 1774; GFX10-NEXT: s_mov_b32 s13, 0x41200000 1775; GFX10-NEXT: s_mov_b32 s12, 0x41100000 1776; GFX10-NEXT: s_mov_b32 s11, 0x41000000 1777; GFX10-NEXT: s_mov_b32 s10, 0x40e00000 1778; GFX10-NEXT: s_mov_b32 s9, 0x40c00000 1779; GFX10-NEXT: s_mov_b32 s8, 0x40a00000 1780; GFX10-NEXT: s_mov_b32 s7, 4.0 1781; GFX10-NEXT: s_mov_b32 s6, 0x40400000 1782; GFX10-NEXT: s_mov_b32 s5, 2.0 1783; GFX10-NEXT: s_movrels_b32 s0, s4 1784; GFX10-NEXT: v_mov_b32_e32 v0, s0 1785; GFX10-NEXT: ; return to shader part epilog 1786entry: 1787 %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel 1788 ret float %ext 1789} 1790 1791define amdgpu_ps float @dyn_extract_v32f32_s_s(i32 inreg %sel) { 1792; GCN-LABEL: dyn_extract_v32f32_s_s: 1793; GCN: ; %bb.0: ; %entry 1794; GCN-NEXT: s_mov_b32 s36, 1.0 1795; GCN-NEXT: s_mov_b32 m0, s2 1796; GCN-NEXT: s_mov_b32 s67, 0x42000000 1797; GCN-NEXT: s_mov_b32 s66, 0x41f80000 1798; GCN-NEXT: s_mov_b32 s65, 0x41f00000 1799; GCN-NEXT: s_mov_b32 s64, 0x41e80000 1800; GCN-NEXT: s_mov_b32 s63, 0x41e00000 1801; GCN-NEXT: s_mov_b32 s62, 0x41d80000 1802; GCN-NEXT: s_mov_b32 s61, 0x41d00000 1803; GCN-NEXT: s_mov_b32 s60, 0x41c80000 1804; GCN-NEXT: s_mov_b32 s59, 0x41c00000 1805; GCN-NEXT: s_mov_b32 s58, 0x41b80000 1806; GCN-NEXT: s_mov_b32 s57, 0x41b00000 1807; GCN-NEXT: s_mov_b32 s56, 0x41a80000 1808; GCN-NEXT: s_mov_b32 s55, 0x41a00000 1809; GCN-NEXT: s_mov_b32 s54, 0x41980000 1810; GCN-NEXT: s_mov_b32 s53, 0x41900000 1811; GCN-NEXT: s_mov_b32 s52, 0x41880000 1812; GCN-NEXT: s_mov_b32 s51, 0x41800000 1813; GCN-NEXT: s_mov_b32 s50, 0x41700000 1814; GCN-NEXT: s_mov_b32 s49, 0x41600000 1815; GCN-NEXT: s_mov_b32 s48, 0x41500000 1816; GCN-NEXT: s_mov_b32 s47, 0x41400000 1817; GCN-NEXT: s_mov_b32 s46, 0x41300000 1818; GCN-NEXT: s_mov_b32 s45, 0x41200000 1819; GCN-NEXT: s_mov_b32 s44, 0x41100000 1820; GCN-NEXT: s_mov_b32 s43, 0x41000000 1821; GCN-NEXT: s_mov_b32 s42, 0x40e00000 1822; GCN-NEXT: s_mov_b32 s41, 0x40c00000 1823; GCN-NEXT: s_mov_b32 s40, 0x40a00000 1824; GCN-NEXT: s_mov_b32 s39, 4.0 1825; GCN-NEXT: s_mov_b32 s38, 0x40400000 1826; GCN-NEXT: s_mov_b32 s37, 2.0 1827; GCN-NEXT: s_movrels_b32 s0, s36 1828; GCN-NEXT: v_mov_b32_e32 v0, s0 1829; GCN-NEXT: ; return to shader part epilog 1830; 1831; GFX10-LABEL: dyn_extract_v32f32_s_s: 1832; GFX10: ; %bb.0: ; %entry 1833; GFX10-NEXT: s_mov_b32 s36, 1.0 1834; GFX10-NEXT: s_mov_b32 m0, s2 1835; GFX10-NEXT: s_mov_b32 s67, 0x42000000 1836; GFX10-NEXT: s_mov_b32 s66, 0x41f80000 1837; GFX10-NEXT: s_mov_b32 s65, 0x41f00000 1838; GFX10-NEXT: s_mov_b32 s64, 0x41e80000 1839; GFX10-NEXT: s_mov_b32 s63, 0x41e00000 1840; GFX10-NEXT: s_mov_b32 s62, 0x41d80000 1841; GFX10-NEXT: s_mov_b32 s61, 0x41d00000 1842; GFX10-NEXT: s_mov_b32 s60, 0x41c80000 1843; GFX10-NEXT: s_mov_b32 s59, 0x41c00000 1844; GFX10-NEXT: s_mov_b32 s58, 0x41b80000 1845; GFX10-NEXT: s_mov_b32 s57, 0x41b00000 1846; GFX10-NEXT: s_mov_b32 s56, 0x41a80000 1847; GFX10-NEXT: s_mov_b32 s55, 0x41a00000 1848; GFX10-NEXT: s_mov_b32 s54, 0x41980000 1849; GFX10-NEXT: s_mov_b32 s53, 0x41900000 1850; GFX10-NEXT: s_mov_b32 s52, 0x41880000 1851; GFX10-NEXT: s_mov_b32 s51, 0x41800000 1852; GFX10-NEXT: s_mov_b32 s50, 0x41700000 1853; GFX10-NEXT: s_mov_b32 s49, 0x41600000 1854; GFX10-NEXT: s_mov_b32 s48, 0x41500000 1855; GFX10-NEXT: s_mov_b32 s47, 0x41400000 1856; GFX10-NEXT: s_mov_b32 s46, 0x41300000 1857; GFX10-NEXT: s_mov_b32 s45, 0x41200000 1858; GFX10-NEXT: s_mov_b32 s44, 0x41100000 1859; GFX10-NEXT: s_mov_b32 s43, 0x41000000 1860; GFX10-NEXT: s_mov_b32 s42, 0x40e00000 1861; GFX10-NEXT: s_mov_b32 s41, 0x40c00000 1862; GFX10-NEXT: s_mov_b32 s40, 0x40a00000 1863; GFX10-NEXT: s_mov_b32 s39, 4.0 1864; GFX10-NEXT: s_mov_b32 s38, 0x40400000 1865; GFX10-NEXT: s_mov_b32 s37, 2.0 1866; GFX10-NEXT: s_movrels_b32 s0, s36 1867; GFX10-NEXT: v_mov_b32_e32 v0, s0 1868; GFX10-NEXT: ; return to shader part epilog 1869entry: 1870 %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel 1871 ret float %ext 1872} 1873 1874define amdgpu_ps double @dyn_extract_v16f64_s_s(i32 inreg %sel) { 1875; GCN-LABEL: dyn_extract_v16f64_s_s: 1876; GCN: ; %bb.0: ; %entry 1877; GCN-NEXT: s_mov_b32 s66, 0 1878; GCN-NEXT: s_mov_b64 s[36:37], 1.0 1879; GCN-NEXT: s_mov_b32 m0, s2 1880; GCN-NEXT: s_mov_b32 s67, 0x40300000 1881; GCN-NEXT: s_mov_b32 s65, 0x402e0000 1882; GCN-NEXT: s_mov_b32 s64, s66 1883; GCN-NEXT: s_mov_b32 s63, 0x402c0000 1884; GCN-NEXT: s_mov_b32 s62, s66 1885; GCN-NEXT: s_mov_b32 s61, 0x402a0000 1886; GCN-NEXT: s_mov_b32 s60, s66 1887; GCN-NEXT: s_mov_b32 s59, 0x40280000 1888; GCN-NEXT: s_mov_b32 s58, s66 1889; GCN-NEXT: s_mov_b32 s57, 0x40260000 1890; GCN-NEXT: s_mov_b32 s56, s66 1891; GCN-NEXT: s_mov_b32 s55, 0x40240000 1892; GCN-NEXT: s_mov_b32 s54, s66 1893; GCN-NEXT: s_mov_b32 s53, 0x40220000 1894; GCN-NEXT: s_mov_b32 s52, s66 1895; GCN-NEXT: s_mov_b32 s51, 0x40200000 1896; GCN-NEXT: s_mov_b32 s50, s66 1897; GCN-NEXT: s_mov_b32 s49, 0x401c0000 1898; GCN-NEXT: s_mov_b32 s48, s66 1899; GCN-NEXT: s_mov_b32 s47, 0x40180000 1900; GCN-NEXT: s_mov_b32 s46, s66 1901; GCN-NEXT: s_mov_b32 s45, 0x40140000 1902; GCN-NEXT: s_mov_b32 s44, s66 1903; GCN-NEXT: s_mov_b64 s[42:43], 4.0 1904; GCN-NEXT: s_mov_b32 s41, 0x40080000 1905; GCN-NEXT: s_mov_b32 s40, s66 1906; GCN-NEXT: s_mov_b64 s[38:39], 2.0 1907; GCN-NEXT: s_movrels_b64 s[0:1], s[36:37] 1908; GCN-NEXT: ; return to shader part epilog 1909; 1910; GFX10-LABEL: dyn_extract_v16f64_s_s: 1911; GFX10: ; %bb.0: ; %entry 1912; GFX10-NEXT: s_mov_b32 s66, 0 1913; GFX10-NEXT: s_mov_b64 s[36:37], 1.0 1914; GFX10-NEXT: s_mov_b32 m0, s2 1915; GFX10-NEXT: s_mov_b32 s67, 0x40300000 1916; GFX10-NEXT: s_mov_b32 s65, 0x402e0000 1917; GFX10-NEXT: s_mov_b32 s64, s66 1918; GFX10-NEXT: s_mov_b32 s63, 0x402c0000 1919; GFX10-NEXT: s_mov_b32 s62, s66 1920; GFX10-NEXT: s_mov_b32 s61, 0x402a0000 1921; GFX10-NEXT: s_mov_b32 s60, s66 1922; GFX10-NEXT: s_mov_b32 s59, 0x40280000 1923; GFX10-NEXT: s_mov_b32 s58, s66 1924; GFX10-NEXT: s_mov_b32 s57, 0x40260000 1925; GFX10-NEXT: s_mov_b32 s56, s66 1926; GFX10-NEXT: s_mov_b32 s55, 0x40240000 1927; GFX10-NEXT: s_mov_b32 s54, s66 1928; GFX10-NEXT: s_mov_b32 s53, 0x40220000 1929; GFX10-NEXT: s_mov_b32 s52, s66 1930; GFX10-NEXT: s_mov_b32 s51, 0x40200000 1931; GFX10-NEXT: s_mov_b32 s50, s66 1932; GFX10-NEXT: s_mov_b32 s49, 0x401c0000 1933; GFX10-NEXT: s_mov_b32 s48, s66 1934; GFX10-NEXT: s_mov_b32 s47, 0x40180000 1935; GFX10-NEXT: s_mov_b32 s46, s66 1936; GFX10-NEXT: s_mov_b32 s45, 0x40140000 1937; GFX10-NEXT: s_mov_b32 s44, s66 1938; GFX10-NEXT: s_mov_b64 s[42:43], 4.0 1939; GFX10-NEXT: s_mov_b32 s41, 0x40080000 1940; GFX10-NEXT: s_mov_b32 s40, s66 1941; GFX10-NEXT: s_mov_b64 s[38:39], 2.0 1942; GFX10-NEXT: s_movrels_b64 s[0:1], s[36:37] 1943; GFX10-NEXT: ; return to shader part epilog 1944entry: 1945 %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel 1946 ret double %ext 1947} 1948 1949define amdgpu_ps float @dyn_extract_v6f32_s_v(<6 x float> inreg %vec, i32 %sel) { 1950; GCN-LABEL: dyn_extract_v6f32_s_v: 1951; GCN: ; %bb.0: ; %entry 1952; GCN-NEXT: s_mov_b32 s0, s2 1953; GCN-NEXT: s_mov_b32 s1, s3 1954; GCN-NEXT: v_mov_b32_e32 v1, s0 1955; GCN-NEXT: v_mov_b32_e32 v2, s1 1956; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 1957; GCN-NEXT: v_mov_b32_e32 v3, s4 1958; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1959; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 1960; GCN-NEXT: v_mov_b32_e32 v4, s5 1961; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1962; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 1963; GCN-NEXT: v_mov_b32_e32 v5, s6 1964; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 1965; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 1966; GCN-NEXT: v_mov_b32_e32 v6, s7 1967; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1968; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 1969; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v6, vcc 1970; GCN-NEXT: ; return to shader part epilog 1971; 1972; GFX10-LABEL: dyn_extract_v6f32_s_v: 1973; GFX10: ; %bb.0: ; %entry 1974; GFX10-NEXT: s_mov_b32 s1, s3 1975; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 1976; GFX10-NEXT: v_mov_b32_e32 v1, s1 1977; GFX10-NEXT: s_mov_b32 s0, s2 1978; GFX10-NEXT: s_mov_b32 s2, s4 1979; GFX10-NEXT: s_mov_b32 s3, s5 1980; GFX10-NEXT: s_mov_b32 s4, s6 1981; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 1982; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 1983; GFX10-NEXT: s_mov_b32 s5, s7 1984; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo 1985; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 1986; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo 1987; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 1988; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 1989; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 1990; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s5, vcc_lo 1991; GFX10-NEXT: ; return to shader part epilog 1992entry: 1993 %ext = extractelement <6 x float> %vec, i32 %sel 1994 ret float %ext 1995} 1996 1997define float @dyn_extract_v6f32_v_v(<6 x float> %vec, i32 %sel) { 1998; GCN-LABEL: dyn_extract_v6f32_v_v: 1999; GCN: ; %bb.0: ; %entry 2000; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2001; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 2002; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2003; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v6 2004; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2005; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v6 2006; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2007; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v6 2008; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2009; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v6 2010; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 2011; GCN-NEXT: s_setpc_b64 s[30:31] 2012; 2013; GFX10-LABEL: dyn_extract_v6f32_v_v: 2014; GFX10: ; %bb.0: ; %entry 2015; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2016; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2017; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 2018; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2019; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v6 2020; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2021; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v6 2022; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2023; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v6 2024; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2025; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v6 2026; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2027; GFX10-NEXT: s_setpc_b64 s[30:31] 2028entry: 2029 %ext = extractelement <6 x float> %vec, i32 %sel 2030 ret float %ext 2031} 2032 2033define amdgpu_ps float @dyn_extract_v6f32_v_s(<6 x float> %vec, i32 inreg %sel) { 2034; GCN-LABEL: dyn_extract_v6f32_v_s: 2035; GCN: ; %bb.0: ; %entry 2036; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 2037; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2038; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 2039; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2040; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 2041; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2042; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 2043; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2044; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 2045; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 2046; GCN-NEXT: ; return to shader part epilog 2047; 2048; GFX10-LABEL: dyn_extract_v6f32_v_s: 2049; GFX10: ; %bb.0: ; %entry 2050; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 2051; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2052; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 2053; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2054; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 2055; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2056; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 2057; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2058; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 2059; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2060; GFX10-NEXT: ; return to shader part epilog 2061entry: 2062 %ext = extractelement <6 x float> %vec, i32 %sel 2063 ret float %ext 2064} 2065 2066define amdgpu_ps float @dyn_extract_v6f32_s_s(<6 x float> inreg %vec, i32 inreg %sel) { 2067; GCN-LABEL: dyn_extract_v6f32_s_s: 2068; GCN: ; %bb.0: ; %entry 2069; GCN-NEXT: s_cmp_eq_u32 s8, 1 2070; GCN-NEXT: s_cselect_b32 s0, s3, s2 2071; GCN-NEXT: s_cmp_eq_u32 s8, 2 2072; GCN-NEXT: s_cselect_b32 s0, s4, s0 2073; GCN-NEXT: s_cmp_eq_u32 s8, 3 2074; GCN-NEXT: s_cselect_b32 s0, s5, s0 2075; GCN-NEXT: s_cmp_eq_u32 s8, 4 2076; GCN-NEXT: s_cselect_b32 s0, s6, s0 2077; GCN-NEXT: s_cmp_eq_u32 s8, 5 2078; GCN-NEXT: s_cselect_b32 s0, s7, s0 2079; GCN-NEXT: v_mov_b32_e32 v0, s0 2080; GCN-NEXT: ; return to shader part epilog 2081; 2082; GFX10-LABEL: dyn_extract_v6f32_s_s: 2083; GFX10: ; %bb.0: ; %entry 2084; GFX10-NEXT: s_cmp_eq_u32 s8, 1 2085; GFX10-NEXT: s_cselect_b32 s0, s3, s2 2086; GFX10-NEXT: s_cmp_eq_u32 s8, 2 2087; GFX10-NEXT: s_cselect_b32 s0, s4, s0 2088; GFX10-NEXT: s_cmp_eq_u32 s8, 3 2089; GFX10-NEXT: s_cselect_b32 s0, s5, s0 2090; GFX10-NEXT: s_cmp_eq_u32 s8, 4 2091; GFX10-NEXT: s_cselect_b32 s0, s6, s0 2092; GFX10-NEXT: s_cmp_eq_u32 s8, 5 2093; GFX10-NEXT: s_cselect_b32 s0, s7, s0 2094; GFX10-NEXT: v_mov_b32_e32 v0, s0 2095; GFX10-NEXT: ; return to shader part epilog 2096entry: 2097 %ext = extractelement <6 x float> %vec, i32 %sel 2098 ret float %ext 2099} 2100 2101define amdgpu_ps float @dyn_extract_v7f32_s_v(<7 x float> inreg %vec, i32 %sel) { 2102; GCN-LABEL: dyn_extract_v7f32_s_v: 2103; GCN: ; %bb.0: ; %entry 2104; GCN-NEXT: s_mov_b32 s0, s2 2105; GCN-NEXT: s_mov_b32 s1, s3 2106; GCN-NEXT: s_mov_b32 s2, s4 2107; GCN-NEXT: v_mov_b32_e32 v1, s0 2108; GCN-NEXT: v_mov_b32_e32 v2, s1 2109; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2110; GCN-NEXT: v_mov_b32_e32 v3, s2 2111; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2112; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2113; GCN-NEXT: v_mov_b32_e32 v4, s5 2114; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2115; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2116; GCN-NEXT: v_mov_b32_e32 v5, s6 2117; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2118; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2119; GCN-NEXT: v_mov_b32_e32 v6, s7 2120; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2121; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2122; GCN-NEXT: v_mov_b32_e32 v7, s8 2123; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 2124; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 2125; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v7, vcc 2126; GCN-NEXT: ; return to shader part epilog 2127; 2128; GFX10-LABEL: dyn_extract_v7f32_s_v: 2129; GFX10: ; %bb.0: ; %entry 2130; GFX10-NEXT: s_mov_b32 s1, s3 2131; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2132; GFX10-NEXT: v_mov_b32_e32 v1, s1 2133; GFX10-NEXT: s_mov_b32 s0, s2 2134; GFX10-NEXT: s_mov_b32 s2, s4 2135; GFX10-NEXT: s_mov_b32 s3, s5 2136; GFX10-NEXT: s_mov_b32 s4, s6 2137; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 2138; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2139; GFX10-NEXT: s_mov_b32 s5, s7 2140; GFX10-NEXT: s_mov_b32 s6, s8 2141; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo 2142; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2143; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo 2144; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2145; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 2146; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2147; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo 2148; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 2149; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s6, vcc_lo 2150; GFX10-NEXT: ; return to shader part epilog 2151entry: 2152 %ext = extractelement <7 x float> %vec, i32 %sel 2153 ret float %ext 2154} 2155 2156define float @dyn_extract_v7f32_v_v(<7 x float> %vec, i32 %sel) { 2157; GCN-LABEL: dyn_extract_v7f32_v_v: 2158; GCN: ; %bb.0: ; %entry 2159; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2160; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 2161; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2162; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v7 2163; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2164; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v7 2165; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2166; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v7 2167; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2168; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v7 2169; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 2170; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v7 2171; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 2172; GCN-NEXT: s_setpc_b64 s[30:31] 2173; 2174; GFX10-LABEL: dyn_extract_v7f32_v_v: 2175; GFX10: ; %bb.0: ; %entry 2176; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2177; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2178; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7 2179; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2180; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v7 2181; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2182; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v7 2183; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2184; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v7 2185; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2186; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v7 2187; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2188; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v7 2189; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 2190; GFX10-NEXT: s_setpc_b64 s[30:31] 2191entry: 2192 %ext = extractelement <7 x float> %vec, i32 %sel 2193 ret float %ext 2194} 2195 2196define amdgpu_ps float @dyn_extract_v7f32_v_s(<7 x float> %vec, i32 inreg %sel) { 2197; GCN-LABEL: dyn_extract_v7f32_v_s: 2198; GCN: ; %bb.0: ; %entry 2199; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 2200; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2201; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 2202; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2203; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 2204; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2205; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 2206; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2207; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 2208; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 2209; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 2210; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 2211; GCN-NEXT: ; return to shader part epilog 2212; 2213; GFX10-LABEL: dyn_extract_v7f32_v_s: 2214; GFX10: ; %bb.0: ; %entry 2215; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 2216; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2217; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 2218; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2219; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 2220; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2221; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 2222; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2223; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 2224; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2225; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 6 2226; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 2227; GFX10-NEXT: ; return to shader part epilog 2228entry: 2229 %ext = extractelement <7 x float> %vec, i32 %sel 2230 ret float %ext 2231} 2232 2233define amdgpu_ps float @dyn_extract_v7f32_s_s(<7 x float> inreg %vec, i32 inreg %sel) { 2234; GCN-LABEL: dyn_extract_v7f32_s_s: 2235; GCN: ; %bb.0: ; %entry 2236; GCN-NEXT: s_cmp_eq_u32 s9, 1 2237; GCN-NEXT: s_cselect_b32 s0, s3, s2 2238; GCN-NEXT: s_cmp_eq_u32 s9, 2 2239; GCN-NEXT: s_cselect_b32 s0, s4, s0 2240; GCN-NEXT: s_cmp_eq_u32 s9, 3 2241; GCN-NEXT: s_cselect_b32 s0, s5, s0 2242; GCN-NEXT: s_cmp_eq_u32 s9, 4 2243; GCN-NEXT: s_cselect_b32 s0, s6, s0 2244; GCN-NEXT: s_cmp_eq_u32 s9, 5 2245; GCN-NEXT: s_cselect_b32 s0, s7, s0 2246; GCN-NEXT: s_cmp_eq_u32 s9, 6 2247; GCN-NEXT: s_cselect_b32 s0, s8, s0 2248; GCN-NEXT: v_mov_b32_e32 v0, s0 2249; GCN-NEXT: ; return to shader part epilog 2250; 2251; GFX10-LABEL: dyn_extract_v7f32_s_s: 2252; GFX10: ; %bb.0: ; %entry 2253; GFX10-NEXT: s_cmp_eq_u32 s9, 1 2254; GFX10-NEXT: s_cselect_b32 s0, s3, s2 2255; GFX10-NEXT: s_cmp_eq_u32 s9, 2 2256; GFX10-NEXT: s_cselect_b32 s0, s4, s0 2257; GFX10-NEXT: s_cmp_eq_u32 s9, 3 2258; GFX10-NEXT: s_cselect_b32 s0, s5, s0 2259; GFX10-NEXT: s_cmp_eq_u32 s9, 4 2260; GFX10-NEXT: s_cselect_b32 s0, s6, s0 2261; GFX10-NEXT: s_cmp_eq_u32 s9, 5 2262; GFX10-NEXT: s_cselect_b32 s0, s7, s0 2263; GFX10-NEXT: s_cmp_eq_u32 s9, 6 2264; GFX10-NEXT: s_cselect_b32 s0, s8, s0 2265; GFX10-NEXT: v_mov_b32_e32 v0, s0 2266; GFX10-NEXT: ; return to shader part epilog 2267entry: 2268 %ext = extractelement <7 x float> %vec, i32 %sel 2269 ret float %ext 2270} 2271 2272define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel) { 2273; GCN-LABEL: dyn_extract_v6f64_s_v: 2274; GCN: ; %bb.0: ; %entry 2275; GCN-NEXT: s_mov_b32 s0, s2 2276; GCN-NEXT: s_mov_b32 s1, s3 2277; GCN-NEXT: s_mov_b32 s2, s4 2278; GCN-NEXT: s_mov_b32 s3, s5 2279; GCN-NEXT: s_mov_b32 s4, s6 2280; GCN-NEXT: s_mov_b32 s5, s7 2281; GCN-NEXT: v_mov_b32_e32 v1, s0 2282; GCN-NEXT: v_mov_b32_e32 v2, s1 2283; GCN-NEXT: v_mov_b32_e32 v3, s2 2284; GCN-NEXT: v_mov_b32_e32 v4, s3 2285; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2286; GCN-NEXT: s_mov_b32 s6, s8 2287; GCN-NEXT: s_mov_b32 s7, s9 2288; GCN-NEXT: v_mov_b32_e32 v5, s4 2289; GCN-NEXT: v_mov_b32_e32 v6, s5 2290; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2291; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2292; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2293; GCN-NEXT: v_mov_b32_e32 v7, s6 2294; GCN-NEXT: v_mov_b32_e32 v8, s7 2295; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2296; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 2297; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2298; GCN-NEXT: v_mov_b32_e32 v9, s10 2299; GCN-NEXT: v_mov_b32_e32 v10, s11 2300; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2301; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 2302; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2303; GCN-NEXT: v_mov_b32_e32 v11, s12 2304; GCN-NEXT: v_mov_b32_e32 v12, s13 2305; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2306; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 2307; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2308; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v11, vcc 2309; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v12, vcc 2310; GCN-NEXT: v_readfirstlane_b32 s0, v0 2311; GCN-NEXT: v_readfirstlane_b32 s1, v1 2312; GCN-NEXT: ; return to shader part epilog 2313; 2314; GFX10-LABEL: dyn_extract_v6f64_s_v: 2315; GFX10: ; %bb.0: ; %entry 2316; GFX10-NEXT: s_mov_b32 s0, s2 2317; GFX10-NEXT: s_mov_b32 s2, s4 2318; GFX10-NEXT: s_mov_b32 s15, s5 2319; GFX10-NEXT: v_mov_b32_e32 v1, s2 2320; GFX10-NEXT: v_mov_b32_e32 v2, s15 2321; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2322; GFX10-NEXT: s_mov_b32 s1, s3 2323; GFX10-NEXT: s_mov_b32 s4, s6 2324; GFX10-NEXT: s_mov_b32 s5, s7 2325; GFX10-NEXT: s_mov_b32 s6, s8 2326; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 2327; GFX10-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo 2328; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2329; GFX10-NEXT: s_mov_b32 s7, s9 2330; GFX10-NEXT: s_mov_b32 s8, s10 2331; GFX10-NEXT: s_mov_b32 s9, s11 2332; GFX10-NEXT: s_mov_b32 s10, s12 2333; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 2334; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo 2335; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2336; GFX10-NEXT: s_mov_b32 s11, s13 2337; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 2338; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 2339; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2340; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 2341; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 2342; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2343; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s10, vcc_lo 2344; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s11, vcc_lo 2345; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2346; GFX10-NEXT: v_readfirstlane_b32 s1, v1 2347; GFX10-NEXT: ; return to shader part epilog 2348entry: 2349 %ext = extractelement <6 x double> %vec, i32 %sel 2350 ret double %ext 2351} 2352 2353define double @dyn_extract_v6f64_v_v(<6 x double> %vec, i32 %sel) { 2354; GCN-LABEL: dyn_extract_v6f64_v_v: 2355; GCN: ; %bb.0: ; %entry 2356; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2357; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v12 2358; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2359; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2360; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v12 2361; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2362; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2363; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v12 2364; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 2365; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2366; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v12 2367; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 2368; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2369; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v12 2370; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 2371; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 2372; GCN-NEXT: s_setpc_b64 s[30:31] 2373; 2374; GFX10-LABEL: dyn_extract_v6f64_v_v: 2375; GFX10: ; %bb.0: ; %entry 2376; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2377; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2378; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 2379; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2380; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2381; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12 2382; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2383; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 2384; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12 2385; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 2386; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 2387; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12 2388; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 2389; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 2390; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v12 2391; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 2392; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 2393; GFX10-NEXT: s_setpc_b64 s[30:31] 2394entry: 2395 %ext = extractelement <6 x double> %vec, i32 %sel 2396 ret double %ext 2397} 2398 2399define amdgpu_ps double @dyn_extract_v6f64_v_s(<6 x double> %vec, i32 inreg %sel) { 2400; GPRIDX-LABEL: dyn_extract_v6f64_v_s: 2401; GPRIDX: ; %bb.0: ; %entry 2402; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 2403; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) 2404; GPRIDX-NEXT: v_mov_b32_e32 v12, v0 2405; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 2406; GPRIDX-NEXT: s_set_gpr_idx_off 2407; GPRIDX-NEXT: v_readfirstlane_b32 s0, v12 2408; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 2409; GPRIDX-NEXT: ; return to shader part epilog 2410; 2411; MOVREL-LABEL: dyn_extract_v6f64_v_s: 2412; MOVREL: ; %bb.0: ; %entry 2413; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 2414; MOVREL-NEXT: v_movrels_b32_e32 v12, v0 2415; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 2416; MOVREL-NEXT: v_readfirstlane_b32 s0, v12 2417; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 2418; MOVREL-NEXT: ; return to shader part epilog 2419; 2420; GFX10-LABEL: dyn_extract_v6f64_v_s: 2421; GFX10: ; %bb.0: ; %entry 2422; GFX10-NEXT: s_lshl_b32 m0, s2, 1 2423; GFX10-NEXT: v_movrels_b32_e32 v12, v0 2424; GFX10-NEXT: v_movrels_b32_e32 v0, v1 2425; GFX10-NEXT: v_readfirstlane_b32 s0, v12 2426; GFX10-NEXT: v_readfirstlane_b32 s1, v0 2427; GFX10-NEXT: ; return to shader part epilog 2428entry: 2429 %ext = extractelement <6 x double> %vec, i32 %sel 2430 ret double %ext 2431} 2432 2433define amdgpu_ps double @dyn_extract_v6f64_s_s(<6 x double> inreg %vec, i32 inreg %sel) { 2434; GCN-LABEL: dyn_extract_v6f64_s_s: 2435; GCN: ; %bb.0: ; %entry 2436; GCN-NEXT: s_mov_b32 s0, s2 2437; GCN-NEXT: s_mov_b32 s1, s3 2438; GCN-NEXT: s_mov_b32 m0, s14 2439; GCN-NEXT: s_mov_b32 s2, s4 2440; GCN-NEXT: s_mov_b32 s3, s5 2441; GCN-NEXT: s_mov_b32 s4, s6 2442; GCN-NEXT: s_mov_b32 s5, s7 2443; GCN-NEXT: s_mov_b32 s6, s8 2444; GCN-NEXT: s_mov_b32 s7, s9 2445; GCN-NEXT: s_mov_b32 s8, s10 2446; GCN-NEXT: s_mov_b32 s9, s11 2447; GCN-NEXT: s_mov_b32 s10, s12 2448; GCN-NEXT: s_mov_b32 s11, s13 2449; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] 2450; GCN-NEXT: ; return to shader part epilog 2451; 2452; GFX10-LABEL: dyn_extract_v6f64_s_s: 2453; GFX10: ; %bb.0: ; %entry 2454; GFX10-NEXT: s_mov_b32 s0, s2 2455; GFX10-NEXT: s_mov_b32 s1, s3 2456; GFX10-NEXT: s_mov_b32 m0, s14 2457; GFX10-NEXT: s_mov_b32 s2, s4 2458; GFX10-NEXT: s_mov_b32 s3, s5 2459; GFX10-NEXT: s_mov_b32 s4, s6 2460; GFX10-NEXT: s_mov_b32 s5, s7 2461; GFX10-NEXT: s_mov_b32 s6, s8 2462; GFX10-NEXT: s_mov_b32 s7, s9 2463; GFX10-NEXT: s_mov_b32 s8, s10 2464; GFX10-NEXT: s_mov_b32 s9, s11 2465; GFX10-NEXT: s_mov_b32 s10, s12 2466; GFX10-NEXT: s_mov_b32 s11, s13 2467; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] 2468; GFX10-NEXT: ; return to shader part epilog 2469entry: 2470 %ext = extractelement <6 x double> %vec, i32 %sel 2471 ret double %ext 2472} 2473 2474define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel) { 2475; GCN-LABEL: dyn_extract_v7f64_s_v: 2476; GCN: ; %bb.0: ; %entry 2477; GCN-NEXT: s_mov_b32 s0, s2 2478; GCN-NEXT: s_mov_b32 s1, s3 2479; GCN-NEXT: s_mov_b32 s2, s4 2480; GCN-NEXT: s_mov_b32 s3, s5 2481; GCN-NEXT: s_mov_b32 s4, s6 2482; GCN-NEXT: s_mov_b32 s5, s7 2483; GCN-NEXT: v_mov_b32_e32 v1, s0 2484; GCN-NEXT: v_mov_b32_e32 v2, s1 2485; GCN-NEXT: v_mov_b32_e32 v3, s2 2486; GCN-NEXT: v_mov_b32_e32 v4, s3 2487; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2488; GCN-NEXT: s_mov_b32 s6, s8 2489; GCN-NEXT: s_mov_b32 s7, s9 2490; GCN-NEXT: v_mov_b32_e32 v5, s4 2491; GCN-NEXT: v_mov_b32_e32 v6, s5 2492; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2493; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2494; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2495; GCN-NEXT: s_mov_b32 s8, s10 2496; GCN-NEXT: s_mov_b32 s9, s11 2497; GCN-NEXT: v_mov_b32_e32 v7, s6 2498; GCN-NEXT: v_mov_b32_e32 v8, s7 2499; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2500; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 2501; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2502; GCN-NEXT: v_mov_b32_e32 v9, s8 2503; GCN-NEXT: v_mov_b32_e32 v10, s9 2504; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2505; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 2506; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2507; GCN-NEXT: v_mov_b32_e32 v11, s12 2508; GCN-NEXT: v_mov_b32_e32 v12, s13 2509; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2510; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 2511; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2512; GCN-NEXT: v_mov_b32_e32 v13, s14 2513; GCN-NEXT: v_mov_b32_e32 v14, s15 2514; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 2515; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 2516; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 2517; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v13, vcc 2518; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v14, vcc 2519; GCN-NEXT: v_readfirstlane_b32 s0, v0 2520; GCN-NEXT: v_readfirstlane_b32 s1, v1 2521; GCN-NEXT: ; return to shader part epilog 2522; 2523; GFX10-LABEL: dyn_extract_v7f64_s_v: 2524; GFX10: ; %bb.0: ; %entry 2525; GFX10-NEXT: s_mov_b32 s0, s2 2526; GFX10-NEXT: s_mov_b32 s2, s4 2527; GFX10-NEXT: s_mov_b32 s19, s5 2528; GFX10-NEXT: v_mov_b32_e32 v1, s2 2529; GFX10-NEXT: v_mov_b32_e32 v2, s19 2530; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2531; GFX10-NEXT: s_mov_b32 s1, s3 2532; GFX10-NEXT: s_mov_b32 s4, s6 2533; GFX10-NEXT: s_mov_b32 s5, s7 2534; GFX10-NEXT: s_mov_b32 s6, s8 2535; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 2536; GFX10-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo 2537; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2538; GFX10-NEXT: s_mov_b32 s7, s9 2539; GFX10-NEXT: s_mov_b32 s8, s10 2540; GFX10-NEXT: s_mov_b32 s9, s11 2541; GFX10-NEXT: s_mov_b32 s10, s12 2542; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 2543; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo 2544; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2545; GFX10-NEXT: s_mov_b32 s11, s13 2546; GFX10-NEXT: s_mov_b32 s12, s14 2547; GFX10-NEXT: s_mov_b32 s13, s15 2548; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 2549; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 2550; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2551; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 2552; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 2553; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2554; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 2555; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 2556; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 2557; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s12, vcc_lo 2558; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s13, vcc_lo 2559; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2560; GFX10-NEXT: v_readfirstlane_b32 s1, v1 2561; GFX10-NEXT: ; return to shader part epilog 2562entry: 2563 %ext = extractelement <7 x double> %vec, i32 %sel 2564 ret double %ext 2565} 2566 2567define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) { 2568; GCN-LABEL: dyn_extract_v7f64_v_v: 2569; GCN: ; %bb.0: ; %entry 2570; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2571; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v14 2572; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2573; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2574; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v14 2575; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2576; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2577; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v14 2578; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 2579; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2580; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v14 2581; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 2582; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2583; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v14 2584; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 2585; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 2586; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v14 2587; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 2588; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 2589; GCN-NEXT: s_setpc_b64 s[30:31] 2590; 2591; GFX10-LABEL: dyn_extract_v7f64_v_v: 2592; GFX10: ; %bb.0: ; %entry 2593; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2594; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2595; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14 2596; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2597; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2598; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v14 2599; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2600; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 2601; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v14 2602; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 2603; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 2604; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v14 2605; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 2606; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 2607; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v14 2608; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 2609; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 2610; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v14 2611; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 2612; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo 2613; GFX10-NEXT: s_setpc_b64 s[30:31] 2614entry: 2615 %ext = extractelement <7 x double> %vec, i32 %sel 2616 ret double %ext 2617} 2618 2619define amdgpu_ps double @dyn_extract_v7f64_v_s(<7 x double> %vec, i32 inreg %sel) { 2620; GPRIDX-LABEL: dyn_extract_v7f64_v_s: 2621; GPRIDX: ; %bb.0: ; %entry 2622; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 2623; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) 2624; GPRIDX-NEXT: v_mov_b32_e32 v14, v0 2625; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 2626; GPRIDX-NEXT: s_set_gpr_idx_off 2627; GPRIDX-NEXT: v_readfirstlane_b32 s0, v14 2628; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 2629; GPRIDX-NEXT: ; return to shader part epilog 2630; 2631; MOVREL-LABEL: dyn_extract_v7f64_v_s: 2632; MOVREL: ; %bb.0: ; %entry 2633; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 2634; MOVREL-NEXT: v_movrels_b32_e32 v14, v0 2635; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 2636; MOVREL-NEXT: v_readfirstlane_b32 s0, v14 2637; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 2638; MOVREL-NEXT: ; return to shader part epilog 2639; 2640; GFX10-LABEL: dyn_extract_v7f64_v_s: 2641; GFX10: ; %bb.0: ; %entry 2642; GFX10-NEXT: s_lshl_b32 m0, s2, 1 2643; GFX10-NEXT: v_movrels_b32_e32 v14, v0 2644; GFX10-NEXT: v_movrels_b32_e32 v0, v1 2645; GFX10-NEXT: v_readfirstlane_b32 s0, v14 2646; GFX10-NEXT: v_readfirstlane_b32 s1, v0 2647; GFX10-NEXT: ; return to shader part epilog 2648entry: 2649 %ext = extractelement <7 x double> %vec, i32 %sel 2650 ret double %ext 2651} 2652 2653define amdgpu_ps double @dyn_extract_v7f64_s_s(<7 x double> inreg %vec, i32 inreg %sel) { 2654; GCN-LABEL: dyn_extract_v7f64_s_s: 2655; GCN: ; %bb.0: ; %entry 2656; GCN-NEXT: s_mov_b32 s0, s2 2657; GCN-NEXT: s_mov_b32 s1, s3 2658; GCN-NEXT: s_mov_b32 m0, s16 2659; GCN-NEXT: s_mov_b32 s2, s4 2660; GCN-NEXT: s_mov_b32 s3, s5 2661; GCN-NEXT: s_mov_b32 s4, s6 2662; GCN-NEXT: s_mov_b32 s5, s7 2663; GCN-NEXT: s_mov_b32 s6, s8 2664; GCN-NEXT: s_mov_b32 s7, s9 2665; GCN-NEXT: s_mov_b32 s8, s10 2666; GCN-NEXT: s_mov_b32 s9, s11 2667; GCN-NEXT: s_mov_b32 s10, s12 2668; GCN-NEXT: s_mov_b32 s11, s13 2669; GCN-NEXT: s_mov_b32 s12, s14 2670; GCN-NEXT: s_mov_b32 s13, s15 2671; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] 2672; GCN-NEXT: ; return to shader part epilog 2673; 2674; GFX10-LABEL: dyn_extract_v7f64_s_s: 2675; GFX10: ; %bb.0: ; %entry 2676; GFX10-NEXT: s_mov_b32 s0, s2 2677; GFX10-NEXT: s_mov_b32 s1, s3 2678; GFX10-NEXT: s_mov_b32 m0, s16 2679; GFX10-NEXT: s_mov_b32 s2, s4 2680; GFX10-NEXT: s_mov_b32 s3, s5 2681; GFX10-NEXT: s_mov_b32 s4, s6 2682; GFX10-NEXT: s_mov_b32 s5, s7 2683; GFX10-NEXT: s_mov_b32 s6, s8 2684; GFX10-NEXT: s_mov_b32 s7, s9 2685; GFX10-NEXT: s_mov_b32 s8, s10 2686; GFX10-NEXT: s_mov_b32 s9, s11 2687; GFX10-NEXT: s_mov_b32 s10, s12 2688; GFX10-NEXT: s_mov_b32 s11, s13 2689; GFX10-NEXT: s_mov_b32 s12, s14 2690; GFX10-NEXT: s_mov_b32 s13, s15 2691; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] 2692; GFX10-NEXT: ; return to shader part epilog 2693entry: 2694 %ext = extractelement <7 x double> %vec, i32 %sel 2695 ret double %ext 2696} 2697 2698define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32 %sel) { 2699; GPRIDX-LABEL: dyn_extract_v5f64_s_s: 2700; GPRIDX: .amd_kernel_code_t 2701; GPRIDX-NEXT: amd_code_version_major = 1 2702; GPRIDX-NEXT: amd_code_version_minor = 2 2703; GPRIDX-NEXT: amd_machine_kind = 1 2704; GPRIDX-NEXT: amd_machine_version_major = 9 2705; GPRIDX-NEXT: amd_machine_version_minor = 0 2706; GPRIDX-NEXT: amd_machine_version_stepping = 0 2707; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 2708; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 2709; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 2710; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1 2711; GPRIDX-NEXT: priority = 0 2712; GPRIDX-NEXT: float_mode = 240 2713; GPRIDX-NEXT: priv = 0 2714; GPRIDX-NEXT: enable_dx10_clamp = 1 2715; GPRIDX-NEXT: debug_mode = 0 2716; GPRIDX-NEXT: enable_ieee_mode = 1 2717; GPRIDX-NEXT: enable_wgp_mode = 0 2718; GPRIDX-NEXT: enable_mem_ordered = 0 2719; GPRIDX-NEXT: enable_fwd_progress = 0 2720; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 2721; GPRIDX-NEXT: user_sgpr_count = 6 2722; GPRIDX-NEXT: enable_trap_handler = 0 2723; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 2724; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0 2725; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0 2726; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 2727; GPRIDX-NEXT: enable_vgpr_workitem_id = 0 2728; GPRIDX-NEXT: enable_exception_msb = 0 2729; GPRIDX-NEXT: granulated_lds_size = 0 2730; GPRIDX-NEXT: enable_exception = 0 2731; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 2732; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0 2733; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0 2734; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 2735; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0 2736; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 2737; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 2738; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 2739; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 2740; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 2741; GPRIDX-NEXT: enable_wavefront_size32 = 0 2742; GPRIDX-NEXT: enable_ordered_append_gds = 0 2743; GPRIDX-NEXT: private_element_size = 1 2744; GPRIDX-NEXT: is_ptr64 = 1 2745; GPRIDX-NEXT: is_dynamic_callstack = 0 2746; GPRIDX-NEXT: is_debug_enabled = 0 2747; GPRIDX-NEXT: is_xnack_enabled = 1 2748; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 2749; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 2750; GPRIDX-NEXT: gds_segment_byte_size = 0 2751; GPRIDX-NEXT: kernarg_segment_byte_size = 12 2752; GPRIDX-NEXT: workgroup_fbarrier_count = 0 2753; GPRIDX-NEXT: wavefront_sgpr_count = 9 2754; GPRIDX-NEXT: workitem_vgpr_count = 3 2755; GPRIDX-NEXT: reserved_vgpr_first = 0 2756; GPRIDX-NEXT: reserved_vgpr_count = 0 2757; GPRIDX-NEXT: reserved_sgpr_first = 0 2758; GPRIDX-NEXT: reserved_sgpr_count = 0 2759; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 2760; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 2761; GPRIDX-NEXT: kernarg_segment_alignment = 4 2762; GPRIDX-NEXT: group_segment_alignment = 4 2763; GPRIDX-NEXT: private_segment_alignment = 4 2764; GPRIDX-NEXT: wavefront_size = 6 2765; GPRIDX-NEXT: call_convention = -1 2766; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 2767; GPRIDX-NEXT: .end_amd_kernel_code_t 2768; GPRIDX-NEXT: ; %bb.0: ; %entry 2769; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2770; GPRIDX-NEXT: s_load_dword s8, s[4:5], 0x8 2771; GPRIDX-NEXT: s_mov_b32 s2, 0 2772; GPRIDX-NEXT: s_mov_b32 s3, 0x40140000 2773; GPRIDX-NEXT: s_mov_b32 s5, 0x40080000 2774; GPRIDX-NEXT: s_mov_b32 s4, s2 2775; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) 2776; GPRIDX-NEXT: s_cmp_eq_u32 s8, 1 2777; GPRIDX-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 2778; GPRIDX-NEXT: s_cmp_eq_u32 s8, 2 2779; GPRIDX-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 2780; GPRIDX-NEXT: s_cmp_eq_u32 s8, 3 2781; GPRIDX-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5] 2782; GPRIDX-NEXT: s_cmp_eq_u32 s8, 4 2783; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 2784; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 2785; GPRIDX-NEXT: v_mov_b32_e32 v1, s3 2786; GPRIDX-NEXT: v_mov_b32_e32 v2, 0 2787; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2788; GPRIDX-NEXT: s_endpgm 2789; 2790; MOVREL-LABEL: dyn_extract_v5f64_s_s: 2791; MOVREL: .amd_kernel_code_t 2792; MOVREL-NEXT: amd_code_version_major = 1 2793; MOVREL-NEXT: amd_code_version_minor = 2 2794; MOVREL-NEXT: amd_machine_kind = 1 2795; MOVREL-NEXT: amd_machine_version_major = 8 2796; MOVREL-NEXT: amd_machine_version_minor = 0 2797; MOVREL-NEXT: amd_machine_version_stepping = 3 2798; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 2799; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 2800; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 2801; MOVREL-NEXT: granulated_wavefront_sgpr_count = 1 2802; MOVREL-NEXT: priority = 0 2803; MOVREL-NEXT: float_mode = 240 2804; MOVREL-NEXT: priv = 0 2805; MOVREL-NEXT: enable_dx10_clamp = 1 2806; MOVREL-NEXT: debug_mode = 0 2807; MOVREL-NEXT: enable_ieee_mode = 1 2808; MOVREL-NEXT: enable_wgp_mode = 0 2809; MOVREL-NEXT: enable_mem_ordered = 0 2810; MOVREL-NEXT: enable_fwd_progress = 0 2811; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 2812; MOVREL-NEXT: user_sgpr_count = 6 2813; MOVREL-NEXT: enable_trap_handler = 0 2814; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 2815; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0 2816; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0 2817; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 2818; MOVREL-NEXT: enable_vgpr_workitem_id = 0 2819; MOVREL-NEXT: enable_exception_msb = 0 2820; MOVREL-NEXT: granulated_lds_size = 0 2821; MOVREL-NEXT: enable_exception = 0 2822; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 2823; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0 2824; MOVREL-NEXT: enable_sgpr_queue_ptr = 0 2825; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 2826; MOVREL-NEXT: enable_sgpr_dispatch_id = 0 2827; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 2828; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 2829; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 2830; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 2831; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 2832; MOVREL-NEXT: enable_wavefront_size32 = 0 2833; MOVREL-NEXT: enable_ordered_append_gds = 0 2834; MOVREL-NEXT: private_element_size = 1 2835; MOVREL-NEXT: is_ptr64 = 1 2836; MOVREL-NEXT: is_dynamic_callstack = 0 2837; MOVREL-NEXT: is_debug_enabled = 0 2838; MOVREL-NEXT: is_xnack_enabled = 0 2839; MOVREL-NEXT: workitem_private_segment_byte_size = 0 2840; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 2841; MOVREL-NEXT: gds_segment_byte_size = 0 2842; MOVREL-NEXT: kernarg_segment_byte_size = 12 2843; MOVREL-NEXT: workgroup_fbarrier_count = 0 2844; MOVREL-NEXT: wavefront_sgpr_count = 9 2845; MOVREL-NEXT: workitem_vgpr_count = 4 2846; MOVREL-NEXT: reserved_vgpr_first = 0 2847; MOVREL-NEXT: reserved_vgpr_count = 0 2848; MOVREL-NEXT: reserved_sgpr_first = 0 2849; MOVREL-NEXT: reserved_sgpr_count = 0 2850; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 2851; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 2852; MOVREL-NEXT: kernarg_segment_alignment = 4 2853; MOVREL-NEXT: group_segment_alignment = 4 2854; MOVREL-NEXT: private_segment_alignment = 4 2855; MOVREL-NEXT: wavefront_size = 6 2856; MOVREL-NEXT: call_convention = -1 2857; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 2858; MOVREL-NEXT: .end_amd_kernel_code_t 2859; MOVREL-NEXT: ; %bb.0: ; %entry 2860; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2861; MOVREL-NEXT: s_load_dword s8, s[4:5], 0x8 2862; MOVREL-NEXT: s_mov_b32 s2, 0 2863; MOVREL-NEXT: s_mov_b32 s3, 0x40140000 2864; MOVREL-NEXT: s_mov_b32 s5, 0x40080000 2865; MOVREL-NEXT: s_mov_b32 s4, s2 2866; MOVREL-NEXT: s_waitcnt lgkmcnt(0) 2867; MOVREL-NEXT: s_cmp_eq_u32 s8, 1 2868; MOVREL-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 2869; MOVREL-NEXT: s_cmp_eq_u32 s8, 2 2870; MOVREL-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 2871; MOVREL-NEXT: s_cmp_eq_u32 s8, 3 2872; MOVREL-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5] 2873; MOVREL-NEXT: s_cmp_eq_u32 s8, 4 2874; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 2875; MOVREL-NEXT: v_mov_b32_e32 v0, s2 2876; MOVREL-NEXT: v_mov_b32_e32 v3, s1 2877; MOVREL-NEXT: v_mov_b32_e32 v1, s3 2878; MOVREL-NEXT: v_mov_b32_e32 v2, s0 2879; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 2880; MOVREL-NEXT: s_endpgm 2881; 2882; GFX10-LABEL: dyn_extract_v5f64_s_s: 2883; GFX10: .amd_kernel_code_t 2884; GFX10-NEXT: amd_code_version_major = 1 2885; GFX10-NEXT: amd_code_version_minor = 2 2886; GFX10-NEXT: amd_machine_kind = 1 2887; GFX10-NEXT: amd_machine_version_major = 10 2888; GFX10-NEXT: amd_machine_version_minor = 1 2889; GFX10-NEXT: amd_machine_version_stepping = 0 2890; GFX10-NEXT: kernel_code_entry_byte_offset = 256 2891; GFX10-NEXT: kernel_code_prefetch_byte_size = 0 2892; GFX10-NEXT: granulated_workitem_vgpr_count = 0 2893; GFX10-NEXT: granulated_wavefront_sgpr_count = 1 2894; GFX10-NEXT: priority = 0 2895; GFX10-NEXT: float_mode = 240 2896; GFX10-NEXT: priv = 0 2897; GFX10-NEXT: enable_dx10_clamp = 1 2898; GFX10-NEXT: debug_mode = 0 2899; GFX10-NEXT: enable_ieee_mode = 1 2900; GFX10-NEXT: enable_wgp_mode = 1 2901; GFX10-NEXT: enable_mem_ordered = 1 2902; GFX10-NEXT: enable_fwd_progress = 0 2903; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 2904; GFX10-NEXT: user_sgpr_count = 6 2905; GFX10-NEXT: enable_trap_handler = 0 2906; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1 2907; GFX10-NEXT: enable_sgpr_workgroup_id_y = 0 2908; GFX10-NEXT: enable_sgpr_workgroup_id_z = 0 2909; GFX10-NEXT: enable_sgpr_workgroup_info = 0 2910; GFX10-NEXT: enable_vgpr_workitem_id = 0 2911; GFX10-NEXT: enable_exception_msb = 0 2912; GFX10-NEXT: granulated_lds_size = 0 2913; GFX10-NEXT: enable_exception = 0 2914; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1 2915; GFX10-NEXT: enable_sgpr_dispatch_ptr = 0 2916; GFX10-NEXT: enable_sgpr_queue_ptr = 0 2917; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1 2918; GFX10-NEXT: enable_sgpr_dispatch_id = 0 2919; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0 2920; GFX10-NEXT: enable_sgpr_private_segment_size = 0 2921; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0 2922; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0 2923; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0 2924; GFX10-NEXT: enable_wavefront_size32 = 1 2925; GFX10-NEXT: enable_ordered_append_gds = 0 2926; GFX10-NEXT: private_element_size = 1 2927; GFX10-NEXT: is_ptr64 = 1 2928; GFX10-NEXT: is_dynamic_callstack = 0 2929; GFX10-NEXT: is_debug_enabled = 0 2930; GFX10-NEXT: is_xnack_enabled = 1 2931; GFX10-NEXT: workitem_private_segment_byte_size = 0 2932; GFX10-NEXT: workgroup_group_segment_byte_size = 0 2933; GFX10-NEXT: gds_segment_byte_size = 0 2934; GFX10-NEXT: kernarg_segment_byte_size = 12 2935; GFX10-NEXT: workgroup_fbarrier_count = 0 2936; GFX10-NEXT: wavefront_sgpr_count = 9 2937; GFX10-NEXT: workitem_vgpr_count = 3 2938; GFX10-NEXT: reserved_vgpr_first = 0 2939; GFX10-NEXT: reserved_vgpr_count = 0 2940; GFX10-NEXT: reserved_sgpr_first = 0 2941; GFX10-NEXT: reserved_sgpr_count = 0 2942; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 2943; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0 2944; GFX10-NEXT: kernarg_segment_alignment = 4 2945; GFX10-NEXT: group_segment_alignment = 4 2946; GFX10-NEXT: private_segment_alignment = 4 2947; GFX10-NEXT: wavefront_size = 5 2948; GFX10-NEXT: call_convention = -1 2949; GFX10-NEXT: runtime_loader_kernel_symbol = 0 2950; GFX10-NEXT: .end_amd_kernel_code_t 2951; GFX10-NEXT: ; %bb.0: ; %entry 2952; GFX10-NEXT: s_clause 0x1 2953; GFX10-NEXT: s_load_dword s8, s[4:5], 0x8 2954; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2955; GFX10-NEXT: s_mov_b32 s2, 0 2956; GFX10-NEXT: s_mov_b32 s3, 0x40140000 2957; GFX10-NEXT: s_mov_b32 s5, 0x40080000 2958; GFX10-NEXT: s_mov_b32 s4, s2 2959; GFX10-NEXT: v_mov_b32_e32 v2, 0 2960; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2961; GFX10-NEXT: s_cmp_eq_u32 s8, 1 2962; GFX10-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 2963; GFX10-NEXT: s_cmp_eq_u32 s8, 2 2964; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 2965; GFX10-NEXT: s_cmp_eq_u32 s8, 3 2966; GFX10-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5] 2967; GFX10-NEXT: s_cmp_eq_u32 s8, 4 2968; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 2969; GFX10-NEXT: v_mov_b32_e32 v0, s2 2970; GFX10-NEXT: v_mov_b32_e32 v1, s3 2971; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2972; GFX10-NEXT: s_endpgm 2973entry: 2974 %ext = extractelement <5 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, i32 %sel 2975 store double %ext, double addrspace(1)* %out 2976 ret void 2977} 2978 2979define float @dyn_extract_v15f32_const_s_v(i32 %sel) { 2980; GCN-LABEL: dyn_extract_v15f32_const_s_v: 2981; GCN: ; %bb.0: ; %entry 2982; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2983; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2984; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000 2985; GCN-NEXT: v_cndmask_b32_e64 v13, 1.0, 2.0, vcc 2986; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2987; GCN-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc 2988; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2989; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000 2990; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc 2991; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2992; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000 2993; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2994; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2995; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000 2996; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2997; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 2998; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000 2999; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 3000; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 3001; GCN-NEXT: v_mov_b32_e32 v6, 0x41100000 3002; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 3003; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0 3004; GCN-NEXT: v_mov_b32_e32 v7, 0x41200000 3005; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 3006; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0 3007; GCN-NEXT: v_mov_b32_e32 v8, 0x41300000 3008; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 3009; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0 3010; GCN-NEXT: v_mov_b32_e32 v9, 0x41400000 3011; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 3012; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0 3013; GCN-NEXT: v_mov_b32_e32 v10, 0x41500000 3014; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 3015; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0 3016; GCN-NEXT: v_mov_b32_e32 v11, 0x41600000 3017; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc 3018; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0 3019; GCN-NEXT: v_mov_b32_e32 v12, 0x41700000 3020; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 3021; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0 3022; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v12, vcc 3023; GCN-NEXT: s_setpc_b64 s[30:31] 3024; 3025; GFX10-LABEL: dyn_extract_v15f32_const_s_v: 3026; GFX10: ; %bb.0: ; %entry 3027; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3028; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3029; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 3030; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo 3031; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 3032; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo 3033; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 3034; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo 3035; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 3036; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo 3037; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 3038; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo 3039; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 3040; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo 3041; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 3042; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo 3043; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 3044; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo 3045; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 3046; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo 3047; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 3048; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo 3049; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 3050; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo 3051; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 3052; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo 3053; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 3054; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo 3055; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 3056; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, 0x41700000, vcc_lo 3057; GFX10-NEXT: s_setpc_b64 s[30:31] 3058entry: 3059 %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel 3060 ret float %ext 3061} 3062 3063define amdgpu_ps float @dyn_extract_v15f32_const_s_s(i32 inreg %sel) { 3064; GCN-LABEL: dyn_extract_v15f32_const_s_s: 3065; GCN: ; %bb.0: ; %entry 3066; GCN-NEXT: s_mov_b32 s4, 1.0 3067; GCN-NEXT: s_mov_b32 m0, s2 3068; GCN-NEXT: s_mov_b32 s18, 0x41700000 3069; GCN-NEXT: s_mov_b32 s17, 0x41600000 3070; GCN-NEXT: s_mov_b32 s16, 0x41500000 3071; GCN-NEXT: s_mov_b32 s15, 0x41400000 3072; GCN-NEXT: s_mov_b32 s14, 0x41300000 3073; GCN-NEXT: s_mov_b32 s13, 0x41200000 3074; GCN-NEXT: s_mov_b32 s12, 0x41100000 3075; GCN-NEXT: s_mov_b32 s11, 0x41000000 3076; GCN-NEXT: s_mov_b32 s10, 0x40e00000 3077; GCN-NEXT: s_mov_b32 s9, 0x40c00000 3078; GCN-NEXT: s_mov_b32 s8, 0x40a00000 3079; GCN-NEXT: s_mov_b32 s7, 4.0 3080; GCN-NEXT: s_mov_b32 s6, 0x40400000 3081; GCN-NEXT: s_mov_b32 s5, 2.0 3082; GCN-NEXT: s_movrels_b32 s0, s4 3083; GCN-NEXT: v_mov_b32_e32 v0, s0 3084; GCN-NEXT: ; return to shader part epilog 3085; 3086; GFX10-LABEL: dyn_extract_v15f32_const_s_s: 3087; GFX10: ; %bb.0: ; %entry 3088; GFX10-NEXT: s_mov_b32 s4, 1.0 3089; GFX10-NEXT: s_mov_b32 m0, s2 3090; GFX10-NEXT: s_mov_b32 s18, 0x41700000 3091; GFX10-NEXT: s_mov_b32 s17, 0x41600000 3092; GFX10-NEXT: s_mov_b32 s16, 0x41500000 3093; GFX10-NEXT: s_mov_b32 s15, 0x41400000 3094; GFX10-NEXT: s_mov_b32 s14, 0x41300000 3095; GFX10-NEXT: s_mov_b32 s13, 0x41200000 3096; GFX10-NEXT: s_mov_b32 s12, 0x41100000 3097; GFX10-NEXT: s_mov_b32 s11, 0x41000000 3098; GFX10-NEXT: s_mov_b32 s10, 0x40e00000 3099; GFX10-NEXT: s_mov_b32 s9, 0x40c00000 3100; GFX10-NEXT: s_mov_b32 s8, 0x40a00000 3101; GFX10-NEXT: s_mov_b32 s7, 4.0 3102; GFX10-NEXT: s_mov_b32 s6, 0x40400000 3103; GFX10-NEXT: s_mov_b32 s5, 2.0 3104; GFX10-NEXT: s_movrels_b32 s0, s4 3105; GFX10-NEXT: v_mov_b32_e32 v0, s0 3106; GFX10-NEXT: ; return to shader part epilog 3107entry: 3108 %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel 3109 ret float %ext 3110} 3111 3112define amdgpu_ps float @dyn_extract_v15f32_s_v(<15 x float> inreg %vec, i32 %sel) { 3113; GCN-LABEL: dyn_extract_v15f32_s_v: 3114; GCN: ; %bb.0: ; %entry 3115; GCN-NEXT: s_mov_b32 s0, s2 3116; GCN-NEXT: s_mov_b32 s1, s3 3117; GCN-NEXT: s_mov_b32 s2, s4 3118; GCN-NEXT: v_mov_b32_e32 v1, s0 3119; GCN-NEXT: v_mov_b32_e32 v2, s1 3120; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 3121; GCN-NEXT: s_mov_b32 s3, s5 3122; GCN-NEXT: v_mov_b32_e32 v3, s2 3123; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3124; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 3125; GCN-NEXT: s_mov_b32 s4, s6 3126; GCN-NEXT: v_mov_b32_e32 v4, s3 3127; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 3128; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 3129; GCN-NEXT: s_mov_b32 s5, s7 3130; GCN-NEXT: v_mov_b32_e32 v5, s4 3131; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 3132; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 3133; GCN-NEXT: s_mov_b32 s6, s8 3134; GCN-NEXT: v_mov_b32_e32 v6, s5 3135; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 3136; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 3137; GCN-NEXT: s_mov_b32 s7, s9 3138; GCN-NEXT: v_mov_b32_e32 v7, s6 3139; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 3140; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 3141; GCN-NEXT: s_mov_b32 s8, s10 3142; GCN-NEXT: v_mov_b32_e32 v8, s7 3143; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 3144; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 3145; GCN-NEXT: s_mov_b32 s9, s11 3146; GCN-NEXT: v_mov_b32_e32 v9, s8 3147; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 3148; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0 3149; GCN-NEXT: s_mov_b32 s10, s12 3150; GCN-NEXT: v_mov_b32_e32 v10, s9 3151; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 3152; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0 3153; GCN-NEXT: v_mov_b32_e32 v11, s10 3154; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc 3155; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0 3156; GCN-NEXT: v_mov_b32_e32 v12, s13 3157; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 3158; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0 3159; GCN-NEXT: v_mov_b32_e32 v13, s14 3160; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc 3161; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0 3162; GCN-NEXT: v_mov_b32_e32 v14, s15 3163; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 3164; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0 3165; GCN-NEXT: v_mov_b32_e32 v15, s16 3166; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc 3167; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0 3168; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 3169; GCN-NEXT: ; return to shader part epilog 3170; 3171; GFX10-LABEL: dyn_extract_v15f32_s_v: 3172; GFX10: ; %bb.0: ; %entry 3173; GFX10-NEXT: s_mov_b32 s1, s3 3174; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 3175; GFX10-NEXT: v_mov_b32_e32 v1, s1 3176; GFX10-NEXT: s_mov_b32 s0, s2 3177; GFX10-NEXT: s_mov_b32 s2, s4 3178; GFX10-NEXT: s_mov_b32 s3, s5 3179; GFX10-NEXT: s_mov_b32 s4, s6 3180; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 3181; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 3182; GFX10-NEXT: s_mov_b32 s5, s7 3183; GFX10-NEXT: s_mov_b32 s6, s8 3184; GFX10-NEXT: s_mov_b32 s7, s9 3185; GFX10-NEXT: s_mov_b32 s8, s10 3186; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo 3187; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 3188; GFX10-NEXT: s_mov_b32 s9, s11 3189; GFX10-NEXT: s_mov_b32 s10, s12 3190; GFX10-NEXT: s_mov_b32 s11, s13 3191; GFX10-NEXT: s_mov_b32 s12, s14 3192; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo 3193; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 3194; GFX10-NEXT: s_mov_b32 s13, s15 3195; GFX10-NEXT: s_mov_b32 s14, s16 3196; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 3197; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 3198; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo 3199; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 3200; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 3201; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 3202; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo 3203; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 3204; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 3205; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 3206; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s9, vcc_lo 3207; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 3208; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 3209; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 3210; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s11, vcc_lo 3211; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 3212; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 3213; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 3214; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s13, vcc_lo 3215; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 3216; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo 3217; GFX10-NEXT: ; return to shader part epilog 3218entry: 3219 %ext = extractelement <15 x float> %vec, i32 %sel 3220 ret float %ext 3221} 3222 3223define float @dyn_extract_v15f32_v_v(<15 x float> %vec, i32 %sel) { 3224; GCN-LABEL: dyn_extract_v15f32_v_v: 3225; GCN: ; %bb.0: ; %entry 3226; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3227; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 3228; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3229; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 3230; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3231; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 3232; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3233; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 3234; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 3235; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 3236; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 3237; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 3238; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 3239; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 3240; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 3241; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 3242; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 3243; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 3244; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 3245; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 3246; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 3247; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 3248; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc 3249; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 3250; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 3251; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 3252; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc 3253; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 3254; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 3255; GCN-NEXT: s_setpc_b64 s[30:31] 3256; 3257; GFX10-LABEL: dyn_extract_v15f32_v_v: 3258; GFX10: ; %bb.0: ; %entry 3259; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3260; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3261; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 3262; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 3263; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 3264; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3265; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 3266; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 3267; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 3268; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 3269; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 3270; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 3271; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 3272; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 3273; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 3274; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 3275; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 3276; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 3277; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 3278; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo 3279; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 3280; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 3281; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 3282; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo 3283; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 3284; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 3285; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 3286; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo 3287; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 3288; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 3289; GFX10-NEXT: s_setpc_b64 s[30:31] 3290entry: 3291 %ext = extractelement <15 x float> %vec, i32 %sel 3292 ret float %ext 3293} 3294 3295define amdgpu_ps float @dyn_extract_v15f32_v_s(<15 x float> %vec, i32 inreg %sel) { 3296; GPRIDX-LABEL: dyn_extract_v15f32_v_s: 3297; GPRIDX: ; %bb.0: ; %entry 3298; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) 3299; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 3300; GPRIDX-NEXT: s_set_gpr_idx_off 3301; GPRIDX-NEXT: ; return to shader part epilog 3302; 3303; MOVREL-LABEL: dyn_extract_v15f32_v_s: 3304; MOVREL: ; %bb.0: ; %entry 3305; MOVREL-NEXT: s_mov_b32 m0, s2 3306; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 3307; MOVREL-NEXT: ; return to shader part epilog 3308; 3309; GFX10-LABEL: dyn_extract_v15f32_v_s: 3310; GFX10: ; %bb.0: ; %entry 3311; GFX10-NEXT: s_mov_b32 m0, s2 3312; GFX10-NEXT: v_movrels_b32_e32 v0, v0 3313; GFX10-NEXT: ; return to shader part epilog 3314entry: 3315 %ext = extractelement <15 x float> %vec, i32 %sel 3316 ret float %ext 3317} 3318 3319define amdgpu_ps float @dyn_extract_v15f32_s_s(<15 x float> inreg %vec, i32 inreg %sel) { 3320; GCN-LABEL: dyn_extract_v15f32_s_s: 3321; GCN: ; %bb.0: ; %entry 3322; GCN-NEXT: s_mov_b32 s0, s2 3323; GCN-NEXT: s_mov_b32 m0, s17 3324; GCN-NEXT: s_mov_b32 s1, s3 3325; GCN-NEXT: s_mov_b32 s2, s4 3326; GCN-NEXT: s_mov_b32 s3, s5 3327; GCN-NEXT: s_mov_b32 s4, s6 3328; GCN-NEXT: s_mov_b32 s5, s7 3329; GCN-NEXT: s_mov_b32 s6, s8 3330; GCN-NEXT: s_mov_b32 s7, s9 3331; GCN-NEXT: s_mov_b32 s8, s10 3332; GCN-NEXT: s_mov_b32 s9, s11 3333; GCN-NEXT: s_mov_b32 s10, s12 3334; GCN-NEXT: s_mov_b32 s11, s13 3335; GCN-NEXT: s_mov_b32 s12, s14 3336; GCN-NEXT: s_mov_b32 s13, s15 3337; GCN-NEXT: s_mov_b32 s14, s16 3338; GCN-NEXT: s_movrels_b32 s0, s0 3339; GCN-NEXT: v_mov_b32_e32 v0, s0 3340; GCN-NEXT: ; return to shader part epilog 3341; 3342; GFX10-LABEL: dyn_extract_v15f32_s_s: 3343; GFX10: ; %bb.0: ; %entry 3344; GFX10-NEXT: s_mov_b32 s0, s2 3345; GFX10-NEXT: s_mov_b32 m0, s17 3346; GFX10-NEXT: s_mov_b32 s1, s3 3347; GFX10-NEXT: s_mov_b32 s2, s4 3348; GFX10-NEXT: s_mov_b32 s3, s5 3349; GFX10-NEXT: s_mov_b32 s4, s6 3350; GFX10-NEXT: s_mov_b32 s5, s7 3351; GFX10-NEXT: s_mov_b32 s6, s8 3352; GFX10-NEXT: s_mov_b32 s7, s9 3353; GFX10-NEXT: s_mov_b32 s8, s10 3354; GFX10-NEXT: s_mov_b32 s9, s11 3355; GFX10-NEXT: s_mov_b32 s10, s12 3356; GFX10-NEXT: s_mov_b32 s11, s13 3357; GFX10-NEXT: s_mov_b32 s12, s14 3358; GFX10-NEXT: s_mov_b32 s13, s15 3359; GFX10-NEXT: s_mov_b32 s14, s16 3360; GFX10-NEXT: s_movrels_b32 s0, s0 3361; GFX10-NEXT: v_mov_b32_e32 v0, s0 3362; GFX10-NEXT: ; return to shader part epilog 3363entry: 3364 %ext = extractelement <15 x float> %vec, i32 %sel 3365 ret float %ext 3366} 3367 3368define amdgpu_ps float @dyn_extract_v15f32_s_s_offset3(<15 x float> inreg %vec, i32 inreg %sel) { 3369; GCN-LABEL: dyn_extract_v15f32_s_s_offset3: 3370; GCN: ; %bb.0: ; %entry 3371; GCN-NEXT: s_mov_b32 s0, s2 3372; GCN-NEXT: s_mov_b32 s1, s3 3373; GCN-NEXT: s_mov_b32 s3, s5 3374; GCN-NEXT: s_mov_b32 m0, s17 3375; GCN-NEXT: s_mov_b32 s2, s4 3376; GCN-NEXT: s_mov_b32 s4, s6 3377; GCN-NEXT: s_mov_b32 s5, s7 3378; GCN-NEXT: s_mov_b32 s6, s8 3379; GCN-NEXT: s_mov_b32 s7, s9 3380; GCN-NEXT: s_mov_b32 s8, s10 3381; GCN-NEXT: s_mov_b32 s9, s11 3382; GCN-NEXT: s_mov_b32 s10, s12 3383; GCN-NEXT: s_mov_b32 s11, s13 3384; GCN-NEXT: s_mov_b32 s12, s14 3385; GCN-NEXT: s_mov_b32 s13, s15 3386; GCN-NEXT: s_mov_b32 s14, s16 3387; GCN-NEXT: s_movrels_b32 s0, s3 3388; GCN-NEXT: v_mov_b32_e32 v0, s0 3389; GCN-NEXT: ; return to shader part epilog 3390; 3391; GFX10-LABEL: dyn_extract_v15f32_s_s_offset3: 3392; GFX10: ; %bb.0: ; %entry 3393; GFX10-NEXT: s_mov_b32 s1, s3 3394; GFX10-NEXT: s_mov_b32 s3, s5 3395; GFX10-NEXT: s_mov_b32 m0, s17 3396; GFX10-NEXT: s_mov_b32 s0, s2 3397; GFX10-NEXT: s_mov_b32 s2, s4 3398; GFX10-NEXT: s_mov_b32 s4, s6 3399; GFX10-NEXT: s_mov_b32 s5, s7 3400; GFX10-NEXT: s_mov_b32 s6, s8 3401; GFX10-NEXT: s_mov_b32 s7, s9 3402; GFX10-NEXT: s_mov_b32 s8, s10 3403; GFX10-NEXT: s_mov_b32 s9, s11 3404; GFX10-NEXT: s_mov_b32 s10, s12 3405; GFX10-NEXT: s_mov_b32 s11, s13 3406; GFX10-NEXT: s_mov_b32 s12, s14 3407; GFX10-NEXT: s_mov_b32 s13, s15 3408; GFX10-NEXT: s_mov_b32 s14, s16 3409; GFX10-NEXT: s_movrels_b32 s0, s3 3410; GFX10-NEXT: v_mov_b32_e32 v0, s0 3411; GFX10-NEXT: ; return to shader part epilog 3412entry: 3413 %add = add i32 %sel, 3 3414 %ext = extractelement <15 x float> %vec, i32 %add 3415 ret float %ext 3416} 3417 3418define float @dyn_extract_v15f32_v_v_offset3(<15 x float> %vec, i32 %sel) { 3419; GPRIDX-LABEL: dyn_extract_v15f32_v_v_offset3: 3420; GPRIDX: ; %bb.0: ; %entry 3421; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3422; GPRIDX-NEXT: v_add_u32_e32 v15, 3, v15 3423; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 3424; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3425; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 3426; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3427; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 3428; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3429; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 3430; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 3431; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 3432; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 3433; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 3434; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 3435; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 3436; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 3437; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 3438; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 3439; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 3440; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 3441; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 3442; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 3443; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 3444; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc 3445; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 3446; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 3447; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 3448; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc 3449; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 3450; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 3451; GPRIDX-NEXT: s_setpc_b64 s[30:31] 3452; 3453; MOVREL-LABEL: dyn_extract_v15f32_v_v_offset3: 3454; MOVREL: ; %bb.0: ; %entry 3455; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3456; MOVREL-NEXT: v_add_u32_e32 v15, vcc, 3, v15 3457; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 3458; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3459; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 3460; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3461; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 3462; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3463; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 3464; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 3465; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 3466; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 3467; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 3468; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 3469; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 3470; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 3471; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 3472; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 3473; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 3474; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 3475; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 3476; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 3477; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 3478; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc 3479; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 3480; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 3481; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 3482; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc 3483; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 3484; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 3485; MOVREL-NEXT: s_setpc_b64 s[30:31] 3486; 3487; GFX10-LABEL: dyn_extract_v15f32_v_v_offset3: 3488; GFX10: ; %bb.0: ; %entry 3489; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3490; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3491; GFX10-NEXT: v_add_nc_u32_e32 v15, 3, v15 3492; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 3493; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 3494; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 3495; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3496; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 3497; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 3498; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 3499; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 3500; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 3501; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 3502; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 3503; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 3504; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 3505; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 3506; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 3507; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 3508; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 3509; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo 3510; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 3511; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 3512; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 3513; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo 3514; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 3515; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 3516; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 3517; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo 3518; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 3519; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 3520; GFX10-NEXT: s_setpc_b64 s[30:31] 3521entry: 3522 %add = add i32 %sel, 3 3523 %ext = extractelement <15 x float> %vec, i32 %add 3524 ret float %ext 3525} 3526 3527define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(float addrspace(1)* %out, i32 %sel) { 3528; GPRIDX-LABEL: dyn_extract_v4f32_s_s_s: 3529; GPRIDX: .amd_kernel_code_t 3530; GPRIDX-NEXT: amd_code_version_major = 1 3531; GPRIDX-NEXT: amd_code_version_minor = 2 3532; GPRIDX-NEXT: amd_machine_kind = 1 3533; GPRIDX-NEXT: amd_machine_version_major = 9 3534; GPRIDX-NEXT: amd_machine_version_minor = 0 3535; GPRIDX-NEXT: amd_machine_version_stepping = 0 3536; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 3537; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 3538; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 3539; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 0 3540; GPRIDX-NEXT: priority = 0 3541; GPRIDX-NEXT: float_mode = 240 3542; GPRIDX-NEXT: priv = 0 3543; GPRIDX-NEXT: enable_dx10_clamp = 1 3544; GPRIDX-NEXT: debug_mode = 0 3545; GPRIDX-NEXT: enable_ieee_mode = 1 3546; GPRIDX-NEXT: enable_wgp_mode = 0 3547; GPRIDX-NEXT: enable_mem_ordered = 0 3548; GPRIDX-NEXT: enable_fwd_progress = 0 3549; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3550; GPRIDX-NEXT: user_sgpr_count = 6 3551; GPRIDX-NEXT: enable_trap_handler = 0 3552; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 3553; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0 3554; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0 3555; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 3556; GPRIDX-NEXT: enable_vgpr_workitem_id = 0 3557; GPRIDX-NEXT: enable_exception_msb = 0 3558; GPRIDX-NEXT: granulated_lds_size = 0 3559; GPRIDX-NEXT: enable_exception = 0 3560; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 3561; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0 3562; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0 3563; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3564; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0 3565; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 3566; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 3567; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3568; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3569; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3570; GPRIDX-NEXT: enable_wavefront_size32 = 0 3571; GPRIDX-NEXT: enable_ordered_append_gds = 0 3572; GPRIDX-NEXT: private_element_size = 1 3573; GPRIDX-NEXT: is_ptr64 = 1 3574; GPRIDX-NEXT: is_dynamic_callstack = 0 3575; GPRIDX-NEXT: is_debug_enabled = 0 3576; GPRIDX-NEXT: is_xnack_enabled = 1 3577; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 3578; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 3579; GPRIDX-NEXT: gds_segment_byte_size = 0 3580; GPRIDX-NEXT: kernarg_segment_byte_size = 12 3581; GPRIDX-NEXT: workgroup_fbarrier_count = 0 3582; GPRIDX-NEXT: wavefront_sgpr_count = 6 3583; GPRIDX-NEXT: workitem_vgpr_count = 2 3584; GPRIDX-NEXT: reserved_vgpr_first = 0 3585; GPRIDX-NEXT: reserved_vgpr_count = 0 3586; GPRIDX-NEXT: reserved_sgpr_first = 0 3587; GPRIDX-NEXT: reserved_sgpr_count = 0 3588; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3589; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 3590; GPRIDX-NEXT: kernarg_segment_alignment = 4 3591; GPRIDX-NEXT: group_segment_alignment = 4 3592; GPRIDX-NEXT: private_segment_alignment = 4 3593; GPRIDX-NEXT: wavefront_size = 6 3594; GPRIDX-NEXT: call_convention = -1 3595; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 3596; GPRIDX-NEXT: .end_amd_kernel_code_t 3597; GPRIDX-NEXT: ; %bb.0: ; %entry 3598; GPRIDX-NEXT: s_load_dword s2, s[4:5], 0x8 3599; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3600; GPRIDX-NEXT: v_mov_b32_e32 v1, 0 3601; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) 3602; GPRIDX-NEXT: s_cmp_eq_u32 s2, 1 3603; GPRIDX-NEXT: s_cselect_b32 s3, 2.0, 1.0 3604; GPRIDX-NEXT: s_cmp_eq_u32 s2, 2 3605; GPRIDX-NEXT: s_cselect_b32 s3, 0x40400000, s3 3606; GPRIDX-NEXT: s_cmp_eq_u32 s2, 3 3607; GPRIDX-NEXT: s_cselect_b32 s2, 4.0, s3 3608; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 3609; GPRIDX-NEXT: global_store_dword v1, v0, s[0:1] 3610; GPRIDX-NEXT: s_endpgm 3611; 3612; MOVREL-LABEL: dyn_extract_v4f32_s_s_s: 3613; MOVREL: .amd_kernel_code_t 3614; MOVREL-NEXT: amd_code_version_major = 1 3615; MOVREL-NEXT: amd_code_version_minor = 2 3616; MOVREL-NEXT: amd_machine_kind = 1 3617; MOVREL-NEXT: amd_machine_version_major = 8 3618; MOVREL-NEXT: amd_machine_version_minor = 0 3619; MOVREL-NEXT: amd_machine_version_stepping = 3 3620; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 3621; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 3622; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 3623; MOVREL-NEXT: granulated_wavefront_sgpr_count = 0 3624; MOVREL-NEXT: priority = 0 3625; MOVREL-NEXT: float_mode = 240 3626; MOVREL-NEXT: priv = 0 3627; MOVREL-NEXT: enable_dx10_clamp = 1 3628; MOVREL-NEXT: debug_mode = 0 3629; MOVREL-NEXT: enable_ieee_mode = 1 3630; MOVREL-NEXT: enable_wgp_mode = 0 3631; MOVREL-NEXT: enable_mem_ordered = 0 3632; MOVREL-NEXT: enable_fwd_progress = 0 3633; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3634; MOVREL-NEXT: user_sgpr_count = 6 3635; MOVREL-NEXT: enable_trap_handler = 0 3636; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 3637; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0 3638; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0 3639; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 3640; MOVREL-NEXT: enable_vgpr_workitem_id = 0 3641; MOVREL-NEXT: enable_exception_msb = 0 3642; MOVREL-NEXT: granulated_lds_size = 0 3643; MOVREL-NEXT: enable_exception = 0 3644; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 3645; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0 3646; MOVREL-NEXT: enable_sgpr_queue_ptr = 0 3647; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3648; MOVREL-NEXT: enable_sgpr_dispatch_id = 0 3649; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 3650; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 3651; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3652; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3653; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3654; MOVREL-NEXT: enable_wavefront_size32 = 0 3655; MOVREL-NEXT: enable_ordered_append_gds = 0 3656; MOVREL-NEXT: private_element_size = 1 3657; MOVREL-NEXT: is_ptr64 = 1 3658; MOVREL-NEXT: is_dynamic_callstack = 0 3659; MOVREL-NEXT: is_debug_enabled = 0 3660; MOVREL-NEXT: is_xnack_enabled = 0 3661; MOVREL-NEXT: workitem_private_segment_byte_size = 0 3662; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 3663; MOVREL-NEXT: gds_segment_byte_size = 0 3664; MOVREL-NEXT: kernarg_segment_byte_size = 12 3665; MOVREL-NEXT: workgroup_fbarrier_count = 0 3666; MOVREL-NEXT: wavefront_sgpr_count = 6 3667; MOVREL-NEXT: workitem_vgpr_count = 3 3668; MOVREL-NEXT: reserved_vgpr_first = 0 3669; MOVREL-NEXT: reserved_vgpr_count = 0 3670; MOVREL-NEXT: reserved_sgpr_first = 0 3671; MOVREL-NEXT: reserved_sgpr_count = 0 3672; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3673; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 3674; MOVREL-NEXT: kernarg_segment_alignment = 4 3675; MOVREL-NEXT: group_segment_alignment = 4 3676; MOVREL-NEXT: private_segment_alignment = 4 3677; MOVREL-NEXT: wavefront_size = 6 3678; MOVREL-NEXT: call_convention = -1 3679; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 3680; MOVREL-NEXT: .end_amd_kernel_code_t 3681; MOVREL-NEXT: ; %bb.0: ; %entry 3682; MOVREL-NEXT: s_load_dword s2, s[4:5], 0x8 3683; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3684; MOVREL-NEXT: s_waitcnt lgkmcnt(0) 3685; MOVREL-NEXT: s_cmp_eq_u32 s2, 1 3686; MOVREL-NEXT: s_cselect_b32 s3, 2.0, 1.0 3687; MOVREL-NEXT: s_cmp_eq_u32 s2, 2 3688; MOVREL-NEXT: s_cselect_b32 s3, 0x40400000, s3 3689; MOVREL-NEXT: s_cmp_eq_u32 s2, 3 3690; MOVREL-NEXT: s_cselect_b32 s2, 4.0, s3 3691; MOVREL-NEXT: v_mov_b32_e32 v0, s0 3692; MOVREL-NEXT: v_mov_b32_e32 v2, s2 3693; MOVREL-NEXT: v_mov_b32_e32 v1, s1 3694; MOVREL-NEXT: flat_store_dword v[0:1], v2 3695; MOVREL-NEXT: s_endpgm 3696; 3697; GFX10-LABEL: dyn_extract_v4f32_s_s_s: 3698; GFX10: .amd_kernel_code_t 3699; GFX10-NEXT: amd_code_version_major = 1 3700; GFX10-NEXT: amd_code_version_minor = 2 3701; GFX10-NEXT: amd_machine_kind = 1 3702; GFX10-NEXT: amd_machine_version_major = 10 3703; GFX10-NEXT: amd_machine_version_minor = 1 3704; GFX10-NEXT: amd_machine_version_stepping = 0 3705; GFX10-NEXT: kernel_code_entry_byte_offset = 256 3706; GFX10-NEXT: kernel_code_prefetch_byte_size = 0 3707; GFX10-NEXT: granulated_workitem_vgpr_count = 0 3708; GFX10-NEXT: granulated_wavefront_sgpr_count = 0 3709; GFX10-NEXT: priority = 0 3710; GFX10-NEXT: float_mode = 240 3711; GFX10-NEXT: priv = 0 3712; GFX10-NEXT: enable_dx10_clamp = 1 3713; GFX10-NEXT: debug_mode = 0 3714; GFX10-NEXT: enable_ieee_mode = 1 3715; GFX10-NEXT: enable_wgp_mode = 1 3716; GFX10-NEXT: enable_mem_ordered = 1 3717; GFX10-NEXT: enable_fwd_progress = 0 3718; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3719; GFX10-NEXT: user_sgpr_count = 6 3720; GFX10-NEXT: enable_trap_handler = 0 3721; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1 3722; GFX10-NEXT: enable_sgpr_workgroup_id_y = 0 3723; GFX10-NEXT: enable_sgpr_workgroup_id_z = 0 3724; GFX10-NEXT: enable_sgpr_workgroup_info = 0 3725; GFX10-NEXT: enable_vgpr_workitem_id = 0 3726; GFX10-NEXT: enable_exception_msb = 0 3727; GFX10-NEXT: granulated_lds_size = 0 3728; GFX10-NEXT: enable_exception = 0 3729; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1 3730; GFX10-NEXT: enable_sgpr_dispatch_ptr = 0 3731; GFX10-NEXT: enable_sgpr_queue_ptr = 0 3732; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3733; GFX10-NEXT: enable_sgpr_dispatch_id = 0 3734; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0 3735; GFX10-NEXT: enable_sgpr_private_segment_size = 0 3736; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3737; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3738; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3739; GFX10-NEXT: enable_wavefront_size32 = 1 3740; GFX10-NEXT: enable_ordered_append_gds = 0 3741; GFX10-NEXT: private_element_size = 1 3742; GFX10-NEXT: is_ptr64 = 1 3743; GFX10-NEXT: is_dynamic_callstack = 0 3744; GFX10-NEXT: is_debug_enabled = 0 3745; GFX10-NEXT: is_xnack_enabled = 1 3746; GFX10-NEXT: workitem_private_segment_byte_size = 0 3747; GFX10-NEXT: workgroup_group_segment_byte_size = 0 3748; GFX10-NEXT: gds_segment_byte_size = 0 3749; GFX10-NEXT: kernarg_segment_byte_size = 12 3750; GFX10-NEXT: workgroup_fbarrier_count = 0 3751; GFX10-NEXT: wavefront_sgpr_count = 6 3752; GFX10-NEXT: workitem_vgpr_count = 2 3753; GFX10-NEXT: reserved_vgpr_first = 0 3754; GFX10-NEXT: reserved_vgpr_count = 0 3755; GFX10-NEXT: reserved_sgpr_first = 0 3756; GFX10-NEXT: reserved_sgpr_count = 0 3757; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3758; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0 3759; GFX10-NEXT: kernarg_segment_alignment = 4 3760; GFX10-NEXT: group_segment_alignment = 4 3761; GFX10-NEXT: private_segment_alignment = 4 3762; GFX10-NEXT: wavefront_size = 5 3763; GFX10-NEXT: call_convention = -1 3764; GFX10-NEXT: runtime_loader_kernel_symbol = 0 3765; GFX10-NEXT: .end_amd_kernel_code_t 3766; GFX10-NEXT: ; %bb.0: ; %entry 3767; GFX10-NEXT: s_clause 0x1 3768; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 3769; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3770; GFX10-NEXT: v_mov_b32_e32 v1, 0 3771; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3772; GFX10-NEXT: s_cmp_eq_u32 s2, 1 3773; GFX10-NEXT: s_cselect_b32 s3, 2.0, 1.0 3774; GFX10-NEXT: s_cmp_eq_u32 s2, 2 3775; GFX10-NEXT: s_cselect_b32 s3, 0x40400000, s3 3776; GFX10-NEXT: s_cmp_eq_u32 s2, 3 3777; GFX10-NEXT: s_cselect_b32 s2, 4.0, s3 3778; GFX10-NEXT: v_mov_b32_e32 v0, s2 3779; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 3780; GFX10-NEXT: s_endpgm 3781entry: 3782 %ext = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %sel 3783 store float %ext, float addrspace(1)* %out 3784 ret void 3785} 3786 3787define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(double addrspace(1)* %out, i32 %sel) { 3788; GPRIDX-LABEL: dyn_extract_v4f64_s_s_s: 3789; GPRIDX: .amd_kernel_code_t 3790; GPRIDX-NEXT: amd_code_version_major = 1 3791; GPRIDX-NEXT: amd_code_version_minor = 2 3792; GPRIDX-NEXT: amd_machine_kind = 1 3793; GPRIDX-NEXT: amd_machine_version_major = 9 3794; GPRIDX-NEXT: amd_machine_version_minor = 0 3795; GPRIDX-NEXT: amd_machine_version_stepping = 0 3796; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 3797; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 3798; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 3799; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 0 3800; GPRIDX-NEXT: priority = 0 3801; GPRIDX-NEXT: float_mode = 240 3802; GPRIDX-NEXT: priv = 0 3803; GPRIDX-NEXT: enable_dx10_clamp = 1 3804; GPRIDX-NEXT: debug_mode = 0 3805; GPRIDX-NEXT: enable_ieee_mode = 1 3806; GPRIDX-NEXT: enable_wgp_mode = 0 3807; GPRIDX-NEXT: enable_mem_ordered = 0 3808; GPRIDX-NEXT: enable_fwd_progress = 0 3809; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3810; GPRIDX-NEXT: user_sgpr_count = 6 3811; GPRIDX-NEXT: enable_trap_handler = 0 3812; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 3813; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0 3814; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0 3815; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 3816; GPRIDX-NEXT: enable_vgpr_workitem_id = 0 3817; GPRIDX-NEXT: enable_exception_msb = 0 3818; GPRIDX-NEXT: granulated_lds_size = 0 3819; GPRIDX-NEXT: enable_exception = 0 3820; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 3821; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0 3822; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0 3823; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3824; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0 3825; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 3826; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 3827; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3828; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3829; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3830; GPRIDX-NEXT: enable_wavefront_size32 = 0 3831; GPRIDX-NEXT: enable_ordered_append_gds = 0 3832; GPRIDX-NEXT: private_element_size = 1 3833; GPRIDX-NEXT: is_ptr64 = 1 3834; GPRIDX-NEXT: is_dynamic_callstack = 0 3835; GPRIDX-NEXT: is_debug_enabled = 0 3836; GPRIDX-NEXT: is_xnack_enabled = 1 3837; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 3838; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 3839; GPRIDX-NEXT: gds_segment_byte_size = 0 3840; GPRIDX-NEXT: kernarg_segment_byte_size = 12 3841; GPRIDX-NEXT: workgroup_fbarrier_count = 0 3842; GPRIDX-NEXT: wavefront_sgpr_count = 7 3843; GPRIDX-NEXT: workitem_vgpr_count = 3 3844; GPRIDX-NEXT: reserved_vgpr_first = 0 3845; GPRIDX-NEXT: reserved_vgpr_count = 0 3846; GPRIDX-NEXT: reserved_sgpr_first = 0 3847; GPRIDX-NEXT: reserved_sgpr_count = 0 3848; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3849; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 3850; GPRIDX-NEXT: kernarg_segment_alignment = 4 3851; GPRIDX-NEXT: group_segment_alignment = 4 3852; GPRIDX-NEXT: private_segment_alignment = 4 3853; GPRIDX-NEXT: wavefront_size = 6 3854; GPRIDX-NEXT: call_convention = -1 3855; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 3856; GPRIDX-NEXT: .end_amd_kernel_code_t 3857; GPRIDX-NEXT: ; %bb.0: ; %entry 3858; GPRIDX-NEXT: s_load_dword s6, s[4:5], 0x8 3859; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3860; GPRIDX-NEXT: s_mov_b32 s2, 0 3861; GPRIDX-NEXT: s_mov_b32 s3, 0x40080000 3862; GPRIDX-NEXT: v_mov_b32_e32 v2, 0 3863; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) 3864; GPRIDX-NEXT: s_cmp_eq_u32 s6, 1 3865; GPRIDX-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 3866; GPRIDX-NEXT: s_cmp_eq_u32 s6, 2 3867; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 3868; GPRIDX-NEXT: s_cmp_eq_u32 s6, 3 3869; GPRIDX-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 3870; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 3871; GPRIDX-NEXT: v_mov_b32_e32 v1, s3 3872; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 3873; GPRIDX-NEXT: s_endpgm 3874; 3875; MOVREL-LABEL: dyn_extract_v4f64_s_s_s: 3876; MOVREL: .amd_kernel_code_t 3877; MOVREL-NEXT: amd_code_version_major = 1 3878; MOVREL-NEXT: amd_code_version_minor = 2 3879; MOVREL-NEXT: amd_machine_kind = 1 3880; MOVREL-NEXT: amd_machine_version_major = 8 3881; MOVREL-NEXT: amd_machine_version_minor = 0 3882; MOVREL-NEXT: amd_machine_version_stepping = 3 3883; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 3884; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 3885; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 3886; MOVREL-NEXT: granulated_wavefront_sgpr_count = 0 3887; MOVREL-NEXT: priority = 0 3888; MOVREL-NEXT: float_mode = 240 3889; MOVREL-NEXT: priv = 0 3890; MOVREL-NEXT: enable_dx10_clamp = 1 3891; MOVREL-NEXT: debug_mode = 0 3892; MOVREL-NEXT: enable_ieee_mode = 1 3893; MOVREL-NEXT: enable_wgp_mode = 0 3894; MOVREL-NEXT: enable_mem_ordered = 0 3895; MOVREL-NEXT: enable_fwd_progress = 0 3896; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3897; MOVREL-NEXT: user_sgpr_count = 6 3898; MOVREL-NEXT: enable_trap_handler = 0 3899; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 3900; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0 3901; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0 3902; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 3903; MOVREL-NEXT: enable_vgpr_workitem_id = 0 3904; MOVREL-NEXT: enable_exception_msb = 0 3905; MOVREL-NEXT: granulated_lds_size = 0 3906; MOVREL-NEXT: enable_exception = 0 3907; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 3908; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0 3909; MOVREL-NEXT: enable_sgpr_queue_ptr = 0 3910; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3911; MOVREL-NEXT: enable_sgpr_dispatch_id = 0 3912; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 3913; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 3914; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3915; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3916; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3917; MOVREL-NEXT: enable_wavefront_size32 = 0 3918; MOVREL-NEXT: enable_ordered_append_gds = 0 3919; MOVREL-NEXT: private_element_size = 1 3920; MOVREL-NEXT: is_ptr64 = 1 3921; MOVREL-NEXT: is_dynamic_callstack = 0 3922; MOVREL-NEXT: is_debug_enabled = 0 3923; MOVREL-NEXT: is_xnack_enabled = 0 3924; MOVREL-NEXT: workitem_private_segment_byte_size = 0 3925; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 3926; MOVREL-NEXT: gds_segment_byte_size = 0 3927; MOVREL-NEXT: kernarg_segment_byte_size = 12 3928; MOVREL-NEXT: workgroup_fbarrier_count = 0 3929; MOVREL-NEXT: wavefront_sgpr_count = 7 3930; MOVREL-NEXT: workitem_vgpr_count = 4 3931; MOVREL-NEXT: reserved_vgpr_first = 0 3932; MOVREL-NEXT: reserved_vgpr_count = 0 3933; MOVREL-NEXT: reserved_sgpr_first = 0 3934; MOVREL-NEXT: reserved_sgpr_count = 0 3935; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3936; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 3937; MOVREL-NEXT: kernarg_segment_alignment = 4 3938; MOVREL-NEXT: group_segment_alignment = 4 3939; MOVREL-NEXT: private_segment_alignment = 4 3940; MOVREL-NEXT: wavefront_size = 6 3941; MOVREL-NEXT: call_convention = -1 3942; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 3943; MOVREL-NEXT: .end_amd_kernel_code_t 3944; MOVREL-NEXT: ; %bb.0: ; %entry 3945; MOVREL-NEXT: s_load_dword s6, s[4:5], 0x8 3946; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3947; MOVREL-NEXT: s_mov_b32 s2, 0 3948; MOVREL-NEXT: s_mov_b32 s3, 0x40080000 3949; MOVREL-NEXT: s_waitcnt lgkmcnt(0) 3950; MOVREL-NEXT: s_cmp_eq_u32 s6, 1 3951; MOVREL-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 3952; MOVREL-NEXT: s_cmp_eq_u32 s6, 2 3953; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 3954; MOVREL-NEXT: s_cmp_eq_u32 s6, 3 3955; MOVREL-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 3956; MOVREL-NEXT: v_mov_b32_e32 v0, s2 3957; MOVREL-NEXT: v_mov_b32_e32 v3, s1 3958; MOVREL-NEXT: v_mov_b32_e32 v1, s3 3959; MOVREL-NEXT: v_mov_b32_e32 v2, s0 3960; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 3961; MOVREL-NEXT: s_endpgm 3962; 3963; GFX10-LABEL: dyn_extract_v4f64_s_s_s: 3964; GFX10: .amd_kernel_code_t 3965; GFX10-NEXT: amd_code_version_major = 1 3966; GFX10-NEXT: amd_code_version_minor = 2 3967; GFX10-NEXT: amd_machine_kind = 1 3968; GFX10-NEXT: amd_machine_version_major = 10 3969; GFX10-NEXT: amd_machine_version_minor = 1 3970; GFX10-NEXT: amd_machine_version_stepping = 0 3971; GFX10-NEXT: kernel_code_entry_byte_offset = 256 3972; GFX10-NEXT: kernel_code_prefetch_byte_size = 0 3973; GFX10-NEXT: granulated_workitem_vgpr_count = 0 3974; GFX10-NEXT: granulated_wavefront_sgpr_count = 0 3975; GFX10-NEXT: priority = 0 3976; GFX10-NEXT: float_mode = 240 3977; GFX10-NEXT: priv = 0 3978; GFX10-NEXT: enable_dx10_clamp = 1 3979; GFX10-NEXT: debug_mode = 0 3980; GFX10-NEXT: enable_ieee_mode = 1 3981; GFX10-NEXT: enable_wgp_mode = 1 3982; GFX10-NEXT: enable_mem_ordered = 1 3983; GFX10-NEXT: enable_fwd_progress = 0 3984; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3985; GFX10-NEXT: user_sgpr_count = 6 3986; GFX10-NEXT: enable_trap_handler = 0 3987; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1 3988; GFX10-NEXT: enable_sgpr_workgroup_id_y = 0 3989; GFX10-NEXT: enable_sgpr_workgroup_id_z = 0 3990; GFX10-NEXT: enable_sgpr_workgroup_info = 0 3991; GFX10-NEXT: enable_vgpr_workitem_id = 0 3992; GFX10-NEXT: enable_exception_msb = 0 3993; GFX10-NEXT: granulated_lds_size = 0 3994; GFX10-NEXT: enable_exception = 0 3995; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1 3996; GFX10-NEXT: enable_sgpr_dispatch_ptr = 0 3997; GFX10-NEXT: enable_sgpr_queue_ptr = 0 3998; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3999; GFX10-NEXT: enable_sgpr_dispatch_id = 0 4000; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0 4001; GFX10-NEXT: enable_sgpr_private_segment_size = 0 4002; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4003; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4004; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4005; GFX10-NEXT: enable_wavefront_size32 = 1 4006; GFX10-NEXT: enable_ordered_append_gds = 0 4007; GFX10-NEXT: private_element_size = 1 4008; GFX10-NEXT: is_ptr64 = 1 4009; GFX10-NEXT: is_dynamic_callstack = 0 4010; GFX10-NEXT: is_debug_enabled = 0 4011; GFX10-NEXT: is_xnack_enabled = 1 4012; GFX10-NEXT: workitem_private_segment_byte_size = 0 4013; GFX10-NEXT: workgroup_group_segment_byte_size = 0 4014; GFX10-NEXT: gds_segment_byte_size = 0 4015; GFX10-NEXT: kernarg_segment_byte_size = 12 4016; GFX10-NEXT: workgroup_fbarrier_count = 0 4017; GFX10-NEXT: wavefront_sgpr_count = 7 4018; GFX10-NEXT: workitem_vgpr_count = 3 4019; GFX10-NEXT: reserved_vgpr_first = 0 4020; GFX10-NEXT: reserved_vgpr_count = 0 4021; GFX10-NEXT: reserved_sgpr_first = 0 4022; GFX10-NEXT: reserved_sgpr_count = 0 4023; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4024; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0 4025; GFX10-NEXT: kernarg_segment_alignment = 4 4026; GFX10-NEXT: group_segment_alignment = 4 4027; GFX10-NEXT: private_segment_alignment = 4 4028; GFX10-NEXT: wavefront_size = 5 4029; GFX10-NEXT: call_convention = -1 4030; GFX10-NEXT: runtime_loader_kernel_symbol = 0 4031; GFX10-NEXT: .end_amd_kernel_code_t 4032; GFX10-NEXT: ; %bb.0: ; %entry 4033; GFX10-NEXT: s_clause 0x1 4034; GFX10-NEXT: s_load_dword s6, s[4:5], 0x8 4035; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4036; GFX10-NEXT: s_mov_b32 s2, 0 4037; GFX10-NEXT: s_mov_b32 s3, 0x40080000 4038; GFX10-NEXT: v_mov_b32_e32 v2, 0 4039; GFX10-NEXT: s_waitcnt lgkmcnt(0) 4040; GFX10-NEXT: s_cmp_eq_u32 s6, 1 4041; GFX10-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 4042; GFX10-NEXT: s_cmp_eq_u32 s6, 2 4043; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 4044; GFX10-NEXT: s_cmp_eq_u32 s6, 3 4045; GFX10-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 4046; GFX10-NEXT: v_mov_b32_e32 v0, s2 4047; GFX10-NEXT: v_mov_b32_e32 v1, s3 4048; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 4049; GFX10-NEXT: s_endpgm 4050entry: 4051 %ext = extractelement <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, i32 %sel 4052 store double %ext, double addrspace(1)* %out 4053 ret void 4054} 4055 4056define i32 @v_extract_v64i32_7(<64 x i32> addrspace(1)* %ptr) { 4057; GPRIDX-LABEL: v_extract_v64i32_7: 4058; GPRIDX: ; %bb.0: 4059; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4060; GPRIDX-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 4061; GPRIDX-NEXT: s_waitcnt vmcnt(0) 4062; GPRIDX-NEXT: v_mov_b32_e32 v0, v7 4063; GPRIDX-NEXT: s_setpc_b64 s[30:31] 4064; 4065; MOVREL-LABEL: v_extract_v64i32_7: 4066; MOVREL: ; %bb.0: 4067; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4068; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 4069; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4070; MOVREL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] 4071; MOVREL-NEXT: s_waitcnt vmcnt(0) 4072; MOVREL-NEXT: v_mov_b32_e32 v0, v7 4073; MOVREL-NEXT: s_setpc_b64 s[30:31] 4074; 4075; GFX10-LABEL: v_extract_v64i32_7: 4076; GFX10: ; %bb.0: 4077; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4078; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4079; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 4080; GFX10-NEXT: s_waitcnt vmcnt(0) 4081; GFX10-NEXT: v_mov_b32_e32 v0, v7 4082; GFX10-NEXT: s_setpc_b64 s[30:31] 4083 %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr 4084 %elt = extractelement <64 x i32> %vec, i32 7 4085 ret i32 %elt 4086} 4087 4088define i32 @v_extract_v64i32_32(<64 x i32> addrspace(1)* %ptr) { 4089; GPRIDX-LABEL: v_extract_v64i32_32: 4090; GPRIDX: ; %bb.0: 4091; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4092; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 4093; GPRIDX-NEXT: s_waitcnt vmcnt(0) 4094; GPRIDX-NEXT: s_setpc_b64 s[30:31] 4095; 4096; MOVREL-LABEL: v_extract_v64i32_32: 4097; MOVREL: ; %bb.0: 4098; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4099; MOVREL-NEXT: s_mov_b64 s[4:5], 0x80 4100; MOVREL-NEXT: v_mov_b32_e32 v2, s4 4101; MOVREL-NEXT: v_mov_b32_e32 v3, s5 4102; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 4103; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 4104; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4105; MOVREL-NEXT: s_waitcnt vmcnt(0) 4106; MOVREL-NEXT: s_setpc_b64 s[30:31] 4107; 4108; GFX10-LABEL: v_extract_v64i32_32: 4109; GFX10: ; %bb.0: 4110; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4111; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4112; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 4113; GFX10-NEXT: s_waitcnt vmcnt(0) 4114; GFX10-NEXT: s_setpc_b64 s[30:31] 4115 %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr 4116 %elt = extractelement <64 x i32> %vec, i32 32 4117 ret i32 %elt 4118} 4119 4120define i32 @v_extract_v64i32_33(<64 x i32> addrspace(1)* %ptr) { 4121; GPRIDX-LABEL: v_extract_v64i32_33: 4122; GPRIDX: ; %bb.0: 4123; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4124; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 4125; GPRIDX-NEXT: s_waitcnt vmcnt(0) 4126; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 4127; GPRIDX-NEXT: s_setpc_b64 s[30:31] 4128; 4129; MOVREL-LABEL: v_extract_v64i32_33: 4130; MOVREL: ; %bb.0: 4131; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4132; MOVREL-NEXT: s_mov_b64 s[4:5], 0x80 4133; MOVREL-NEXT: v_mov_b32_e32 v2, s4 4134; MOVREL-NEXT: v_mov_b32_e32 v3, s5 4135; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 4136; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 4137; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4138; MOVREL-NEXT: s_waitcnt vmcnt(0) 4139; MOVREL-NEXT: v_mov_b32_e32 v0, v1 4140; MOVREL-NEXT: s_setpc_b64 s[30:31] 4141; 4142; GFX10-LABEL: v_extract_v64i32_33: 4143; GFX10: ; %bb.0: 4144; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4145; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4146; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 4147; GFX10-NEXT: s_waitcnt vmcnt(0) 4148; GFX10-NEXT: v_mov_b32_e32 v0, v1 4149; GFX10-NEXT: s_setpc_b64 s[30:31] 4150 %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr 4151 %elt = extractelement <64 x i32> %vec, i32 33 4152 ret i32 %elt 4153} 4154 4155define i32 @v_extract_v64i32_37(<64 x i32> addrspace(1)* %ptr) { 4156; GPRIDX-LABEL: v_extract_v64i32_37: 4157; GPRIDX: ; %bb.0: 4158; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4159; GPRIDX-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:144 4160; GPRIDX-NEXT: s_waitcnt vmcnt(0) 4161; GPRIDX-NEXT: v_mov_b32_e32 v0, v5 4162; GPRIDX-NEXT: s_setpc_b64 s[30:31] 4163; 4164; MOVREL-LABEL: v_extract_v64i32_37: 4165; MOVREL: ; %bb.0: 4166; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4167; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x90, v0 4168; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4169; MOVREL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] 4170; MOVREL-NEXT: s_waitcnt vmcnt(0) 4171; MOVREL-NEXT: v_mov_b32_e32 v0, v5 4172; MOVREL-NEXT: s_setpc_b64 s[30:31] 4173; 4174; GFX10-LABEL: v_extract_v64i32_37: 4175; GFX10: ; %bb.0: 4176; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4177; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4178; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:144 4179; GFX10-NEXT: s_waitcnt vmcnt(0) 4180; GFX10-NEXT: v_mov_b32_e32 v0, v5 4181; GFX10-NEXT: s_setpc_b64 s[30:31] 4182 %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr 4183 %elt = extractelement <64 x i32> %vec, i32 37 4184 ret i32 %elt 4185} 4186