1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 6 7define float @dyn_extract_v8f32_const_s_v(i32 %sel) { 8; GCN-LABEL: dyn_extract_v8f32_const_s_v: 9; GCN: ; %bb.0: ; %entry 10; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 12; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000 13; GCN-NEXT: v_cndmask_b32_e64 v6, 1.0, 2.0, vcc 14; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 15; GCN-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc 16; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 17; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000 18; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc 19; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 20; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000 21; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 22; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 23; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000 24; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 25; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 26; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000 27; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 28; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 29; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc 30; GCN-NEXT: s_setpc_b64 s[30:31] 31; 32; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_v: 33; GFX10PLUS: ; %bb.0: ; %entry 34; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 36; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 37; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo 38; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 39; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo 40; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 41; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo 42; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 43; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo 44; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 45; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo 46; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 47; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo 48; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 49; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, 0x41000000, vcc_lo 50; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 51entry: 52 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel 53 ret float %ext 54} 55 56define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) { 57; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s: 58; GPRIDX: ; %bb.0: ; %entry 59; GPRIDX-NEXT: s_cmp_eq_u32 s2, 1 60; GPRIDX-NEXT: s_cselect_b32 s0, 2.0, 1.0 61; GPRIDX-NEXT: s_cmp_eq_u32 s2, 2 62; GPRIDX-NEXT: s_cselect_b32 s0, 0x40400000, s0 63; GPRIDX-NEXT: s_cmp_eq_u32 s2, 3 64; GPRIDX-NEXT: s_cselect_b32 s0, 4.0, s0 65; GPRIDX-NEXT: s_cmp_eq_u32 s2, 4 66; GPRIDX-NEXT: s_cselect_b32 s0, 0x40a00000, s0 67; GPRIDX-NEXT: s_cmp_eq_u32 s2, 5 68; GPRIDX-NEXT: s_cselect_b32 s0, 0x40c00000, s0 69; GPRIDX-NEXT: s_cmp_eq_u32 s2, 6 70; GPRIDX-NEXT: s_cselect_b32 s0, 0x40e00000, s0 71; GPRIDX-NEXT: s_cmp_eq_u32 s2, 7 72; GPRIDX-NEXT: s_cselect_b32 s0, 0x41000000, s0 73; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 74; GPRIDX-NEXT: ; return to shader part epilog 75; 76; MOVREL-LABEL: dyn_extract_v8f32_const_s_s: 77; MOVREL: ; %bb.0: ; %entry 78; MOVREL-NEXT: s_mov_b32 s4, 1.0 79; MOVREL-NEXT: s_mov_b32 m0, s2 80; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 81; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 82; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 83; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 84; MOVREL-NEXT: s_mov_b32 s7, 4.0 85; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 86; MOVREL-NEXT: s_mov_b32 s5, 2.0 87; MOVREL-NEXT: s_movrels_b32 s0, s4 88; MOVREL-NEXT: v_mov_b32_e32 v0, s0 89; MOVREL-NEXT: ; return to shader part epilog 90; 91; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_s: 92; GFX10PLUS: ; %bb.0: ; %entry 93; GFX10PLUS-NEXT: s_mov_b32 s4, 1.0 94; GFX10PLUS-NEXT: s_mov_b32 m0, s2 95; GFX10PLUS-NEXT: s_mov_b32 s11, 0x41000000 96; GFX10PLUS-NEXT: s_mov_b32 s10, 0x40e00000 97; GFX10PLUS-NEXT: s_mov_b32 s9, 0x40c00000 98; GFX10PLUS-NEXT: s_mov_b32 s8, 0x40a00000 99; GFX10PLUS-NEXT: s_mov_b32 s7, 4.0 100; GFX10PLUS-NEXT: s_mov_b32 s6, 0x40400000 101; GFX10PLUS-NEXT: s_mov_b32 s5, 2.0 102; GFX10PLUS-NEXT: s_movrels_b32 s0, s4 103; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 104; GFX10PLUS-NEXT: ; return to shader part epilog 105entry: 106 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel 107 ret float %ext 108} 109 110define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) { 111; GCN-LABEL: dyn_extract_v8f32_s_v: 112; GCN: ; %bb.0: ; %entry 113; GCN-NEXT: s_mov_b32 s0, s2 114; GCN-NEXT: s_mov_b32 s1, s3 115; GCN-NEXT: s_mov_b32 s2, s4 116; GCN-NEXT: v_mov_b32_e32 v1, s0 117; GCN-NEXT: v_mov_b32_e32 v2, s1 118; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 119; GCN-NEXT: s_mov_b32 s3, s5 120; GCN-NEXT: v_mov_b32_e32 v3, s2 121; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 122; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 123; GCN-NEXT: v_mov_b32_e32 v4, s3 124; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 125; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 126; GCN-NEXT: v_mov_b32_e32 v5, s6 127; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 128; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 129; GCN-NEXT: v_mov_b32_e32 v6, s7 130; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 131; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 132; GCN-NEXT: v_mov_b32_e32 v7, s8 133; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 134; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 135; GCN-NEXT: v_mov_b32_e32 v8, s9 136; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 137; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 138; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v8, vcc 139; GCN-NEXT: ; return to shader part epilog 140; 141; GFX10PLUS-LABEL: dyn_extract_v8f32_s_v: 142; GFX10PLUS: ; %bb.0: ; %entry 143; GFX10PLUS-NEXT: s_mov_b32 s1, s3 144; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 145; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s1 146; GFX10PLUS-NEXT: s_mov_b32 s0, s2 147; GFX10PLUS-NEXT: s_mov_b32 s2, s4 148; GFX10PLUS-NEXT: s_mov_b32 s3, s5 149; GFX10PLUS-NEXT: s_mov_b32 s4, s6 150; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 151; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 152; GFX10PLUS-NEXT: s_mov_b32 s5, s7 153; GFX10PLUS-NEXT: s_mov_b32 s6, s8 154; GFX10PLUS-NEXT: s_mov_b32 s7, s9 155; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo 156; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 157; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo 158; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 159; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 160; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 161; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo 162; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 163; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 164; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 165; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s7, vcc_lo 166; GFX10PLUS-NEXT: ; return to shader part epilog 167entry: 168 %ext = extractelement <8 x float> %vec, i32 %sel 169 ret float %ext 170} 171 172define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) { 173; GCN-LABEL: dyn_extract_v8f32_v_v: 174; GCN: ; %bb.0: ; %entry 175; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 176; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 177; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 178; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 179; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 180; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 181; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 182; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 183; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 184; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 185; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 186; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 187; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 188; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 189; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 190; GCN-NEXT: s_setpc_b64 s[30:31] 191; 192; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v: 193; GFX10PLUS: ; %bb.0: ; %entry 194; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 195; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 196; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 197; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 198; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 199; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 200; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 201; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 202; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 203; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 204; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 205; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 206; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 207; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 208; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 209; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 210; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 211entry: 212 %ext = extractelement <8 x float> %vec, i32 %sel 213 ret float %ext 214} 215 216define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) { 217; GPRIDX-LABEL: dyn_extract_v8f32_v_s: 218; GPRIDX: ; %bb.0: ; %entry 219; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 220; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 221; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 222; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 223; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 224; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 225; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 226; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 227; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 228; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 229; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 230; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 231; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7 232; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 233; GPRIDX-NEXT: ; return to shader part epilog 234; 235; MOVREL-LABEL: dyn_extract_v8f32_v_s: 236; MOVREL: ; %bb.0: ; %entry 237; MOVREL-NEXT: s_mov_b32 m0, s2 238; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 239; MOVREL-NEXT: ; return to shader part epilog 240; 241; GFX10PLUS-LABEL: dyn_extract_v8f32_v_s: 242; GFX10PLUS: ; %bb.0: ; %entry 243; GFX10PLUS-NEXT: s_mov_b32 m0, s2 244; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0 245; GFX10PLUS-NEXT: ; return to shader part epilog 246entry: 247 %ext = extractelement <8 x float> %vec, i32 %sel 248 ret float %ext 249} 250 251define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) { 252; GPRIDX-LABEL: dyn_extract_v8f32_s_s: 253; GPRIDX: ; %bb.0: ; %entry 254; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 255; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 256; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 257; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 258; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 259; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 260; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 261; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 262; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 263; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 264; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 265; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 266; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 267; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 268; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 269; GPRIDX-NEXT: ; return to shader part epilog 270; 271; MOVREL-LABEL: dyn_extract_v8f32_s_s: 272; MOVREL: ; %bb.0: ; %entry 273; MOVREL-NEXT: s_mov_b32 s0, s2 274; MOVREL-NEXT: s_mov_b32 m0, s10 275; MOVREL-NEXT: s_mov_b32 s1, s3 276; MOVREL-NEXT: s_mov_b32 s2, s4 277; MOVREL-NEXT: s_mov_b32 s3, s5 278; MOVREL-NEXT: s_mov_b32 s4, s6 279; MOVREL-NEXT: s_mov_b32 s5, s7 280; MOVREL-NEXT: s_mov_b32 s6, s8 281; MOVREL-NEXT: s_mov_b32 s7, s9 282; MOVREL-NEXT: s_movrels_b32 s0, s0 283; MOVREL-NEXT: v_mov_b32_e32 v0, s0 284; MOVREL-NEXT: ; return to shader part epilog 285; 286; GFX10PLUS-LABEL: dyn_extract_v8f32_s_s: 287; GFX10PLUS: ; %bb.0: ; %entry 288; GFX10PLUS-NEXT: s_mov_b32 s0, s2 289; GFX10PLUS-NEXT: s_mov_b32 m0, s10 290; GFX10PLUS-NEXT: s_mov_b32 s1, s3 291; GFX10PLUS-NEXT: s_mov_b32 s2, s4 292; GFX10PLUS-NEXT: s_mov_b32 s3, s5 293; GFX10PLUS-NEXT: s_mov_b32 s4, s6 294; GFX10PLUS-NEXT: s_mov_b32 s5, s7 295; GFX10PLUS-NEXT: s_mov_b32 s6, s8 296; GFX10PLUS-NEXT: s_mov_b32 s7, s9 297; GFX10PLUS-NEXT: s_movrels_b32 s0, s0 298; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 299; GFX10PLUS-NEXT: ; return to shader part epilog 300entry: 301 %ext = extractelement <8 x float> %vec, i32 %sel 302 ret float %ext 303} 304 305define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { 306; GCN-LABEL: dyn_extract_v8i64_const_s_v: 307; GCN: ; %bb.0: ; %entry 308; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 309; GCN-NEXT: s_mov_b64 s[4:5], 1 310; GCN-NEXT: s_mov_b64 s[6:7], 2 311; GCN-NEXT: v_mov_b32_e32 v1, s4 312; GCN-NEXT: v_mov_b32_e32 v2, s5 313; GCN-NEXT: v_mov_b32_e32 v3, s6 314; GCN-NEXT: v_mov_b32_e32 v4, s7 315; GCN-NEXT: s_mov_b64 s[8:9], 3 316; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 317; GCN-NEXT: v_mov_b32_e32 v5, s8 318; GCN-NEXT: v_mov_b32_e32 v6, s9 319; GCN-NEXT: s_mov_b64 s[10:11], 4 320; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 321; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 322; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 323; GCN-NEXT: v_mov_b32_e32 v7, s10 324; GCN-NEXT: v_mov_b32_e32 v8, s11 325; GCN-NEXT: s_mov_b64 s[12:13], 5 326; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 327; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 328; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 329; GCN-NEXT: s_mov_b64 s[14:15], 6 330; GCN-NEXT: v_mov_b32_e32 v9, s12 331; GCN-NEXT: v_mov_b32_e32 v10, s13 332; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 333; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 334; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 335; GCN-NEXT: s_mov_b64 s[16:17], 7 336; GCN-NEXT: v_mov_b32_e32 v11, s14 337; GCN-NEXT: v_mov_b32_e32 v12, s15 338; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 339; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 340; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 341; GCN-NEXT: s_mov_b64 s[18:19], 8 342; GCN-NEXT: v_mov_b32_e32 v13, s16 343; GCN-NEXT: v_mov_b32_e32 v14, s17 344; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 345; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 346; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 347; GCN-NEXT: v_mov_b32_e32 v15, s18 348; GCN-NEXT: v_mov_b32_e32 v16, s19 349; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 350; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc 351; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 352; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 353; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc 354; GCN-NEXT: s_setpc_b64 s[30:31] 355; 356; GFX10-LABEL: dyn_extract_v8i64_const_s_v: 357; GFX10: ; %bb.0: ; %entry 358; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 359; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 360; GFX10-NEXT: s_mov_b64 s[6:7], 2 361; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 362; GFX10-NEXT: v_mov_b32_e32 v1, s6 363; GFX10-NEXT: v_mov_b32_e32 v2, s7 364; GFX10-NEXT: s_mov_b64 s[4:5], 1 365; GFX10-NEXT: s_mov_b64 s[8:9], 3 366; GFX10-NEXT: s_mov_b64 s[10:11], 4 367; GFX10-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo 368; GFX10-NEXT: v_cndmask_b32_e32 v2, s5, v2, vcc_lo 369; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 370; GFX10-NEXT: s_mov_b64 s[12:13], 5 371; GFX10-NEXT: s_mov_b64 s[14:15], 6 372; GFX10-NEXT: s_mov_b64 s[16:17], 7 373; GFX10-NEXT: s_mov_b64 s[18:19], 8 374; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 375; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 376; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 377; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 378; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 379; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 380; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 381; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo 382; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 383; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo 384; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo 385; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 386; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s16, vcc_lo 387; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s17, vcc_lo 388; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 389; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s18, vcc_lo 390; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s19, vcc_lo 391; GFX10-NEXT: s_setpc_b64 s[30:31] 392; 393; GFX11-LABEL: dyn_extract_v8i64_const_s_v: 394; GFX11: ; %bb.0: ; %entry 395; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 396; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 397; GFX11-NEXT: s_mov_b64 s[2:3], 2 398; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 399; GFX11-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3 400; GFX11-NEXT: s_mov_b64 s[0:1], 1 401; GFX11-NEXT: s_mov_b64 s[4:5], 3 402; GFX11-NEXT: s_mov_b64 s[6:7], 4 403; GFX11-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 404; GFX11-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo 405; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 406; GFX11-NEXT: s_mov_b64 s[8:9], 5 407; GFX11-NEXT: s_mov_b64 s[10:11], 6 408; GFX11-NEXT: s_mov_b64 s[12:13], 7 409; GFX11-NEXT: s_mov_b64 s[14:15], 8 410; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 411; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo 412; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 413; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 414; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 415; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 416; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 417; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 418; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 419; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 420; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 421; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 422; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 423; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo 424; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 425; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo 426; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s15, vcc_lo 427; GFX11-NEXT: s_setpc_b64 s[30:31] 428entry: 429 %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel 430 ret i64 %ext 431} 432 433define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) { 434; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s: 435; GPRIDX: ; %bb.0: ; %entry 436; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 437; GPRIDX-NEXT: s_mov_b32 m0, s2 438; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 439; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 440; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 441; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 442; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 443; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 444; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 445; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5] 446; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 447; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 448; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 449; GPRIDX-NEXT: s_endpgm 450; 451; MOVREL-LABEL: dyn_extract_v8i64_const_s_s: 452; MOVREL: ; %bb.0: ; %entry 453; MOVREL-NEXT: s_mov_b64 s[4:5], 1 454; MOVREL-NEXT: s_mov_b32 m0, s2 455; MOVREL-NEXT: s_mov_b64 s[18:19], 8 456; MOVREL-NEXT: s_mov_b64 s[16:17], 7 457; MOVREL-NEXT: s_mov_b64 s[14:15], 6 458; MOVREL-NEXT: s_mov_b64 s[12:13], 5 459; MOVREL-NEXT: s_mov_b64 s[10:11], 4 460; MOVREL-NEXT: s_mov_b64 s[8:9], 3 461; MOVREL-NEXT: s_mov_b64 s[6:7], 2 462; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5] 463; MOVREL-NEXT: v_mov_b32_e32 v0, s0 464; MOVREL-NEXT: v_mov_b32_e32 v1, s1 465; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 466; MOVREL-NEXT: s_endpgm 467; 468; GFX10-LABEL: dyn_extract_v8i64_const_s_s: 469; GFX10: ; %bb.0: ; %entry 470; GFX10-NEXT: s_mov_b64 s[4:5], 1 471; GFX10-NEXT: s_mov_b32 m0, s2 472; GFX10-NEXT: s_mov_b64 s[18:19], 8 473; GFX10-NEXT: s_mov_b64 s[16:17], 7 474; GFX10-NEXT: s_mov_b64 s[14:15], 6 475; GFX10-NEXT: s_mov_b64 s[12:13], 5 476; GFX10-NEXT: s_mov_b64 s[10:11], 4 477; GFX10-NEXT: s_mov_b64 s[8:9], 3 478; GFX10-NEXT: s_mov_b64 s[6:7], 2 479; GFX10-NEXT: s_movrels_b64 s[0:1], s[4:5] 480; GFX10-NEXT: v_mov_b32_e32 v0, s0 481; GFX10-NEXT: v_mov_b32_e32 v1, s1 482; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 483; GFX10-NEXT: s_endpgm 484; 485; GFX11-LABEL: dyn_extract_v8i64_const_s_s: 486; GFX11: ; %bb.0: ; %entry 487; GFX11-NEXT: s_mov_b64 s[4:5], 1 488; GFX11-NEXT: s_mov_b32 m0, s2 489; GFX11-NEXT: s_mov_b64 s[18:19], 8 490; GFX11-NEXT: s_mov_b64 s[16:17], 7 491; GFX11-NEXT: s_mov_b64 s[14:15], 6 492; GFX11-NEXT: s_mov_b64 s[12:13], 5 493; GFX11-NEXT: s_mov_b64 s[10:11], 4 494; GFX11-NEXT: s_mov_b64 s[8:9], 3 495; GFX11-NEXT: s_mov_b64 s[6:7], 2 496; GFX11-NEXT: s_movrels_b64 s[0:1], s[4:5] 497; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 498; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 499; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 500; GFX11-NEXT: s_endpgm 501entry: 502 %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel 503 store i64 %ext, i64 addrspace(1)* undef 504 ret void 505} 506 507define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) { 508; GPRIDX-LABEL: dyn_extract_v8i64_s_v: 509; GPRIDX: ; %bb.0: ; %entry 510; GPRIDX-NEXT: s_mov_b32 s0, s2 511; GPRIDX-NEXT: s_mov_b32 s1, s3 512; GPRIDX-NEXT: s_mov_b32 s2, s4 513; GPRIDX-NEXT: s_mov_b32 s3, s5 514; GPRIDX-NEXT: s_mov_b32 s4, s6 515; GPRIDX-NEXT: s_mov_b32 s5, s7 516; GPRIDX-NEXT: v_mov_b32_e32 v1, s0 517; GPRIDX-NEXT: v_mov_b32_e32 v2, s1 518; GPRIDX-NEXT: v_mov_b32_e32 v3, s2 519; GPRIDX-NEXT: v_mov_b32_e32 v4, s3 520; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 521; GPRIDX-NEXT: s_mov_b32 s6, s8 522; GPRIDX-NEXT: s_mov_b32 s7, s9 523; GPRIDX-NEXT: v_mov_b32_e32 v5, s4 524; GPRIDX-NEXT: v_mov_b32_e32 v6, s5 525; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 526; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 527; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 528; GPRIDX-NEXT: s_mov_b32 s8, s10 529; GPRIDX-NEXT: s_mov_b32 s9, s11 530; GPRIDX-NEXT: v_mov_b32_e32 v7, s6 531; GPRIDX-NEXT: v_mov_b32_e32 v8, s7 532; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 533; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 534; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 535; GPRIDX-NEXT: s_mov_b32 s10, s12 536; GPRIDX-NEXT: s_mov_b32 s11, s13 537; GPRIDX-NEXT: v_mov_b32_e32 v9, s8 538; GPRIDX-NEXT: v_mov_b32_e32 v10, s9 539; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 540; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 541; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 542; GPRIDX-NEXT: v_mov_b32_e32 v11, s10 543; GPRIDX-NEXT: v_mov_b32_e32 v12, s11 544; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 545; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 546; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 547; GPRIDX-NEXT: v_mov_b32_e32 v13, s14 548; GPRIDX-NEXT: v_mov_b32_e32 v14, s15 549; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 550; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 551; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 552; GPRIDX-NEXT: v_mov_b32_e32 v15, s16 553; GPRIDX-NEXT: v_mov_b32_e32 v16, s17 554; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 555; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc 556; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 557; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 558; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc 559; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 560; GPRIDX-NEXT: s_endpgm 561; 562; MOVREL-LABEL: dyn_extract_v8i64_s_v: 563; MOVREL: ; %bb.0: ; %entry 564; MOVREL-NEXT: s_mov_b32 s0, s2 565; MOVREL-NEXT: s_mov_b32 s1, s3 566; MOVREL-NEXT: s_mov_b32 s2, s4 567; MOVREL-NEXT: s_mov_b32 s3, s5 568; MOVREL-NEXT: s_mov_b32 s4, s6 569; MOVREL-NEXT: s_mov_b32 s5, s7 570; MOVREL-NEXT: v_mov_b32_e32 v1, s0 571; MOVREL-NEXT: v_mov_b32_e32 v2, s1 572; MOVREL-NEXT: v_mov_b32_e32 v3, s2 573; MOVREL-NEXT: v_mov_b32_e32 v4, s3 574; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 575; MOVREL-NEXT: s_mov_b32 s6, s8 576; MOVREL-NEXT: s_mov_b32 s7, s9 577; MOVREL-NEXT: v_mov_b32_e32 v5, s4 578; MOVREL-NEXT: v_mov_b32_e32 v6, s5 579; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 580; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 581; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 582; MOVREL-NEXT: s_mov_b32 s8, s10 583; MOVREL-NEXT: s_mov_b32 s9, s11 584; MOVREL-NEXT: v_mov_b32_e32 v7, s6 585; MOVREL-NEXT: v_mov_b32_e32 v8, s7 586; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 587; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 588; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 589; MOVREL-NEXT: s_mov_b32 s10, s12 590; MOVREL-NEXT: s_mov_b32 s11, s13 591; MOVREL-NEXT: v_mov_b32_e32 v9, s8 592; MOVREL-NEXT: v_mov_b32_e32 v10, s9 593; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 594; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 595; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 596; MOVREL-NEXT: v_mov_b32_e32 v11, s10 597; MOVREL-NEXT: v_mov_b32_e32 v12, s11 598; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 599; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 600; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 601; MOVREL-NEXT: v_mov_b32_e32 v13, s14 602; MOVREL-NEXT: v_mov_b32_e32 v14, s15 603; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 604; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 605; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 606; MOVREL-NEXT: v_mov_b32_e32 v15, s16 607; MOVREL-NEXT: v_mov_b32_e32 v16, s17 608; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 609; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc 610; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 611; MOVREL-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 612; MOVREL-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc 613; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 614; MOVREL-NEXT: s_endpgm 615; 616; GFX10-LABEL: dyn_extract_v8i64_s_v: 617; GFX10: ; %bb.0: ; %entry 618; GFX10-NEXT: s_mov_b32 s0, s2 619; GFX10-NEXT: s_mov_b32 s2, s4 620; GFX10-NEXT: s_mov_b32 s19, s5 621; GFX10-NEXT: v_mov_b32_e32 v1, s2 622; GFX10-NEXT: v_mov_b32_e32 v2, s19 623; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 624; GFX10-NEXT: s_mov_b32 s1, s3 625; GFX10-NEXT: s_mov_b32 s4, s6 626; GFX10-NEXT: s_mov_b32 s5, s7 627; GFX10-NEXT: s_mov_b32 s6, s8 628; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 629; GFX10-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo 630; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 631; GFX10-NEXT: s_mov_b32 s7, s9 632; GFX10-NEXT: s_mov_b32 s8, s10 633; GFX10-NEXT: s_mov_b32 s9, s11 634; GFX10-NEXT: s_mov_b32 s10, s12 635; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 636; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo 637; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 638; GFX10-NEXT: s_mov_b32 s11, s13 639; GFX10-NEXT: s_mov_b32 s12, s14 640; GFX10-NEXT: s_mov_b32 s13, s15 641; GFX10-NEXT: s_mov_b32 s14, s16 642; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 643; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 644; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 645; GFX10-NEXT: s_mov_b32 s15, s17 646; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 647; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 648; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 649; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 650; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 651; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 652; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 653; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo 654; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 655; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo 656; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s15, vcc_lo 657; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 658; GFX10-NEXT: s_endpgm 659; 660; GFX11-LABEL: dyn_extract_v8i64_s_v: 661; GFX11: ; %bb.0: ; %entry 662; GFX11-NEXT: s_mov_b32 s0, s2 663; GFX11-NEXT: s_mov_b32 s2, s4 664; GFX11-NEXT: s_mov_b32 s19, s5 665; GFX11-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s19 666; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 667; GFX11-NEXT: s_mov_b32 s1, s3 668; GFX11-NEXT: s_mov_b32 s4, s6 669; GFX11-NEXT: s_mov_b32 s5, s7 670; GFX11-NEXT: s_mov_b32 s6, s8 671; GFX11-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 672; GFX11-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo 673; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 674; GFX11-NEXT: s_mov_b32 s7, s9 675; GFX11-NEXT: s_mov_b32 s8, s10 676; GFX11-NEXT: s_mov_b32 s9, s11 677; GFX11-NEXT: s_mov_b32 s10, s12 678; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 679; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo 680; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 681; GFX11-NEXT: s_mov_b32 s11, s13 682; GFX11-NEXT: s_mov_b32 s12, s14 683; GFX11-NEXT: s_mov_b32 s13, s15 684; GFX11-NEXT: s_mov_b32 s14, s16 685; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 686; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 687; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 688; GFX11-NEXT: s_mov_b32 s15, s17 689; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 690; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 691; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 692; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 693; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 694; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 695; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 696; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo 697; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 698; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo 699; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s15, vcc_lo 700; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 701; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 702; GFX11-NEXT: s_endpgm 703entry: 704 %ext = extractelement <8 x i64> %vec, i32 %sel 705 store i64 %ext, i64 addrspace(1)* undef 706 ret void 707} 708 709define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) { 710; GCN-LABEL: dyn_extract_v8i64_v_v: 711; GCN: ; %bb.0: ; %entry 712; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 713; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 714; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 715; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 716; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 717; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 718; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 719; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 720; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 721; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 722; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 723; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 724; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 725; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 726; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 727; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 728; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 729; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 730; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 731; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 732; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 733; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 734; GCN-NEXT: s_setpc_b64 s[30:31] 735; 736; GFX10-LABEL: dyn_extract_v8i64_v_v: 737; GFX10: ; %bb.0: ; %entry 738; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 739; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 740; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 741; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 742; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 743; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 744; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 745; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 746; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 747; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 748; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 749; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 750; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 751; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 752; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 753; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 754; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 755; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 756; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 757; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo 758; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 759; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 760; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo 761; GFX10-NEXT: s_setpc_b64 s[30:31] 762; 763; GFX11-LABEL: dyn_extract_v8i64_v_v: 764; GFX11: ; %bb.0: ; %entry 765; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 766; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 767; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 768; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 769; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 770; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 771; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 772; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7 773; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 774; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9 775; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 776; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11 777; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 778; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13 779; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 780; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15 781; GFX11-NEXT: s_setpc_b64 s[30:31] 782entry: 783 %ext = extractelement <8 x i64> %vec, i32 %sel 784 ret i64 %ext 785} 786 787define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) { 788; GPRIDX-LABEL: dyn_extract_v8i64_v_s: 789; GPRIDX: ; %bb.0: ; %entry 790; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 791; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) 792; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 793; GPRIDX-NEXT: v_mov_b32_e32 v17, v1 794; GPRIDX-NEXT: s_set_gpr_idx_off 795; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off 796; GPRIDX-NEXT: s_endpgm 797; 798; MOVREL-LABEL: dyn_extract_v8i64_v_s: 799; MOVREL: ; %bb.0: ; %entry 800; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 801; MOVREL-NEXT: v_movrels_b32_e32 v16, v0 802; MOVREL-NEXT: v_movrels_b32_e32 v17, v1 803; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17] 804; MOVREL-NEXT: s_endpgm 805; 806; GFX10-LABEL: dyn_extract_v8i64_v_s: 807; GFX10: ; %bb.0: ; %entry 808; GFX10-NEXT: s_lshl_b32 m0, s2, 1 809; GFX10-NEXT: v_movrels_b32_e32 v16, v0 810; GFX10-NEXT: v_movrels_b32_e32 v17, v1 811; GFX10-NEXT: global_store_dwordx2 v[0:1], v[16:17], off 812; GFX10-NEXT: s_endpgm 813; 814; GFX11-LABEL: dyn_extract_v8i64_v_s: 815; GFX11: ; %bb.0: ; %entry 816; GFX11-NEXT: s_lshl_b32 m0, s2, 1 817; GFX11-NEXT: v_movrels_b32_e32 v16, v0 818; GFX11-NEXT: v_movrels_b32_e32 v17, v1 819; GFX11-NEXT: global_store_b64 v[0:1], v[16:17], off 820; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 821; GFX11-NEXT: s_endpgm 822entry: 823 %ext = extractelement <8 x i64> %vec, i32 %sel 824 store i64 %ext, i64 addrspace(1)* undef 825 ret void 826} 827 828define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) { 829; GPRIDX-LABEL: dyn_extract_v8i64_s_s: 830; GPRIDX: ; %bb.0: ; %entry 831; GPRIDX-NEXT: s_mov_b32 s0, s2 832; GPRIDX-NEXT: s_mov_b32 s1, s3 833; GPRIDX-NEXT: s_mov_b32 m0, s18 834; GPRIDX-NEXT: s_mov_b32 s2, s4 835; GPRIDX-NEXT: s_mov_b32 s3, s5 836; GPRIDX-NEXT: s_mov_b32 s4, s6 837; GPRIDX-NEXT: s_mov_b32 s5, s7 838; GPRIDX-NEXT: s_mov_b32 s6, s8 839; GPRIDX-NEXT: s_mov_b32 s7, s9 840; GPRIDX-NEXT: s_mov_b32 s8, s10 841; GPRIDX-NEXT: s_mov_b32 s9, s11 842; GPRIDX-NEXT: s_mov_b32 s10, s12 843; GPRIDX-NEXT: s_mov_b32 s11, s13 844; GPRIDX-NEXT: s_mov_b32 s12, s14 845; GPRIDX-NEXT: s_mov_b32 s13, s15 846; GPRIDX-NEXT: s_mov_b32 s14, s16 847; GPRIDX-NEXT: s_mov_b32 s15, s17 848; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] 849; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 850; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 851; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 852; GPRIDX-NEXT: s_endpgm 853; 854; MOVREL-LABEL: dyn_extract_v8i64_s_s: 855; MOVREL: ; %bb.0: ; %entry 856; MOVREL-NEXT: s_mov_b32 s0, s2 857; MOVREL-NEXT: s_mov_b32 s1, s3 858; MOVREL-NEXT: s_mov_b32 m0, s18 859; MOVREL-NEXT: s_mov_b32 s2, s4 860; MOVREL-NEXT: s_mov_b32 s3, s5 861; MOVREL-NEXT: s_mov_b32 s4, s6 862; MOVREL-NEXT: s_mov_b32 s5, s7 863; MOVREL-NEXT: s_mov_b32 s6, s8 864; MOVREL-NEXT: s_mov_b32 s7, s9 865; MOVREL-NEXT: s_mov_b32 s8, s10 866; MOVREL-NEXT: s_mov_b32 s9, s11 867; MOVREL-NEXT: s_mov_b32 s10, s12 868; MOVREL-NEXT: s_mov_b32 s11, s13 869; MOVREL-NEXT: s_mov_b32 s12, s14 870; MOVREL-NEXT: s_mov_b32 s13, s15 871; MOVREL-NEXT: s_mov_b32 s14, s16 872; MOVREL-NEXT: s_mov_b32 s15, s17 873; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] 874; MOVREL-NEXT: v_mov_b32_e32 v0, s0 875; MOVREL-NEXT: v_mov_b32_e32 v1, s1 876; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 877; MOVREL-NEXT: s_endpgm 878; 879; GFX10-LABEL: dyn_extract_v8i64_s_s: 880; GFX10: ; %bb.0: ; %entry 881; GFX10-NEXT: s_mov_b32 s0, s2 882; GFX10-NEXT: s_mov_b32 s1, s3 883; GFX10-NEXT: s_mov_b32 m0, s18 884; GFX10-NEXT: s_mov_b32 s2, s4 885; GFX10-NEXT: s_mov_b32 s3, s5 886; GFX10-NEXT: s_mov_b32 s4, s6 887; GFX10-NEXT: s_mov_b32 s5, s7 888; GFX10-NEXT: s_mov_b32 s6, s8 889; GFX10-NEXT: s_mov_b32 s7, s9 890; GFX10-NEXT: s_mov_b32 s8, s10 891; GFX10-NEXT: s_mov_b32 s9, s11 892; GFX10-NEXT: s_mov_b32 s10, s12 893; GFX10-NEXT: s_mov_b32 s11, s13 894; GFX10-NEXT: s_mov_b32 s12, s14 895; GFX10-NEXT: s_mov_b32 s13, s15 896; GFX10-NEXT: s_mov_b32 s14, s16 897; GFX10-NEXT: s_mov_b32 s15, s17 898; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] 899; GFX10-NEXT: v_mov_b32_e32 v0, s0 900; GFX10-NEXT: v_mov_b32_e32 v1, s1 901; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 902; GFX10-NEXT: s_endpgm 903; 904; GFX11-LABEL: dyn_extract_v8i64_s_s: 905; GFX11: ; %bb.0: ; %entry 906; GFX11-NEXT: s_mov_b32 s0, s2 907; GFX11-NEXT: s_mov_b32 s1, s3 908; GFX11-NEXT: s_mov_b32 m0, s18 909; GFX11-NEXT: s_mov_b32 s2, s4 910; GFX11-NEXT: s_mov_b32 s3, s5 911; GFX11-NEXT: s_mov_b32 s4, s6 912; GFX11-NEXT: s_mov_b32 s5, s7 913; GFX11-NEXT: s_mov_b32 s6, s8 914; GFX11-NEXT: s_mov_b32 s7, s9 915; GFX11-NEXT: s_mov_b32 s8, s10 916; GFX11-NEXT: s_mov_b32 s9, s11 917; GFX11-NEXT: s_mov_b32 s10, s12 918; GFX11-NEXT: s_mov_b32 s11, s13 919; GFX11-NEXT: s_mov_b32 s12, s14 920; GFX11-NEXT: s_mov_b32 s13, s15 921; GFX11-NEXT: s_mov_b32 s14, s16 922; GFX11-NEXT: s_mov_b32 s15, s17 923; GFX11-NEXT: s_movrels_b64 s[0:1], s[0:1] 924; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 925; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 926; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 927; GFX11-NEXT: s_endpgm 928entry: 929 %ext = extractelement <8 x i64> %vec, i32 %sel 930 store i64 %ext, i64 addrspace(1)* undef 931 ret void 932} 933 934define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) { 935; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3: 936; GPRIDX: ; %bb.0: ; %entry 937; GPRIDX-NEXT: s_add_i32 s10, s10, 3 938; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 939; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 940; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 941; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 942; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 943; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 944; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 945; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 946; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 947; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 948; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 949; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 950; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 951; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 952; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 953; GPRIDX-NEXT: ; return to shader part epilog 954; 955; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3: 956; MOVREL: ; %bb.0: ; %entry 957; MOVREL-NEXT: s_mov_b32 s0, s2 958; MOVREL-NEXT: s_mov_b32 s1, s3 959; MOVREL-NEXT: s_mov_b32 s3, s5 960; MOVREL-NEXT: s_mov_b32 m0, s10 961; MOVREL-NEXT: s_mov_b32 s2, s4 962; MOVREL-NEXT: s_mov_b32 s4, s6 963; MOVREL-NEXT: s_mov_b32 s5, s7 964; MOVREL-NEXT: s_mov_b32 s6, s8 965; MOVREL-NEXT: s_mov_b32 s7, s9 966; MOVREL-NEXT: s_movrels_b32 s0, s3 967; MOVREL-NEXT: v_mov_b32_e32 v0, s0 968; MOVREL-NEXT: ; return to shader part epilog 969; 970; GFX10PLUS-LABEL: dyn_extract_v8f32_s_s_offset3: 971; GFX10PLUS: ; %bb.0: ; %entry 972; GFX10PLUS-NEXT: s_mov_b32 s1, s3 973; GFX10PLUS-NEXT: s_mov_b32 s3, s5 974; GFX10PLUS-NEXT: s_mov_b32 m0, s10 975; GFX10PLUS-NEXT: s_mov_b32 s0, s2 976; GFX10PLUS-NEXT: s_mov_b32 s2, s4 977; GFX10PLUS-NEXT: s_mov_b32 s4, s6 978; GFX10PLUS-NEXT: s_mov_b32 s5, s7 979; GFX10PLUS-NEXT: s_mov_b32 s6, s8 980; GFX10PLUS-NEXT: s_mov_b32 s7, s9 981; GFX10PLUS-NEXT: s_movrels_b32 s0, s3 982; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 983; GFX10PLUS-NEXT: ; return to shader part epilog 984entry: 985 %add = add i32 %sel, 3 986 %ext = extractelement <8 x float> %vec, i32 %add 987 ret float %ext 988} 989 990define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) { 991; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3: 992; GPRIDX: ; %bb.0: ; %entry 993; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 994; GPRIDX-NEXT: v_add_u32_e32 v8, 3, v8 995; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 996; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 997; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 998; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 999; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 1000; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1001; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 1002; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1003; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 1004; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 1005; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 1006; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1007; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 1008; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 1009; GPRIDX-NEXT: s_setpc_b64 s[30:31] 1010; 1011; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3: 1012; MOVREL: ; %bb.0: ; %entry 1013; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1014; MOVREL-NEXT: v_add_u32_e32 v8, vcc, 3, v8 1015; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 1016; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1017; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 1018; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1019; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 1020; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1021; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 1022; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1023; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 1024; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 1025; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 1026; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1027; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 1028; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 1029; MOVREL-NEXT: s_setpc_b64 s[30:31] 1030; 1031; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v_offset3: 1032; GFX10PLUS: ; %bb.0: ; %entry 1033; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1034; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 1035; GFX10PLUS-NEXT: v_add_nc_u32_e32 v8, 3, v8 1036; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 1037; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1038; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 1039; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 1040; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 1041; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 1042; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 1043; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 1044; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 1045; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 1046; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 1047; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 1048; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 1049; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 1050; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1051entry: 1052 %add = add i32 %sel, 3 1053 %ext = extractelement <8 x float> %vec, i32 %add 1054 ret float %ext 1055} 1056 1057define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) { 1058; GCN-LABEL: dyn_extract_v8f64_s_s_offset1: 1059; GCN: ; %bb.0: ; %entry 1060; GCN-NEXT: s_mov_b32 s0, s2 1061; GCN-NEXT: s_mov_b32 s1, s3 1062; GCN-NEXT: s_mov_b32 s2, s4 1063; GCN-NEXT: s_mov_b32 s3, s5 1064; GCN-NEXT: s_mov_b32 m0, s18 1065; GCN-NEXT: s_mov_b32 s4, s6 1066; GCN-NEXT: s_mov_b32 s5, s7 1067; GCN-NEXT: s_mov_b32 s6, s8 1068; GCN-NEXT: s_mov_b32 s7, s9 1069; GCN-NEXT: s_mov_b32 s8, s10 1070; GCN-NEXT: s_mov_b32 s9, s11 1071; GCN-NEXT: s_mov_b32 s10, s12 1072; GCN-NEXT: s_mov_b32 s11, s13 1073; GCN-NEXT: s_mov_b32 s12, s14 1074; GCN-NEXT: s_mov_b32 s13, s15 1075; GCN-NEXT: s_mov_b32 s14, s16 1076; GCN-NEXT: s_mov_b32 s15, s17 1077; GCN-NEXT: s_movrels_b64 s[0:1], s[2:3] 1078; GCN-NEXT: ; return to shader part epilog 1079; 1080; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset1: 1081; GFX10PLUS: ; %bb.0: ; %entry 1082; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1083; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1084; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1085; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1086; GFX10PLUS-NEXT: s_mov_b32 m0, s18 1087; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1088; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1089; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1090; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1091; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1092; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1093; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1094; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1095; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1096; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1097; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1098; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1099; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[2:3] 1100; GFX10PLUS-NEXT: ; return to shader part epilog 1101entry: 1102 %add = add i32 %sel, 1 1103 %ext = extractelement <8 x double> %vec, i32 %add 1104 ret double %ext 1105} 1106 1107define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) { 1108; GCN-LABEL: dyn_extract_v8f64_s_s_offset2: 1109; GCN: ; %bb.0: ; %entry 1110; GCN-NEXT: s_mov_b32 s0, s2 1111; GCN-NEXT: s_mov_b32 s1, s3 1112; GCN-NEXT: s_mov_b32 s2, s4 1113; GCN-NEXT: s_mov_b32 s3, s5 1114; GCN-NEXT: s_mov_b32 s4, s6 1115; GCN-NEXT: s_mov_b32 s5, s7 1116; GCN-NEXT: s_mov_b32 m0, s18 1117; GCN-NEXT: s_mov_b32 s6, s8 1118; GCN-NEXT: s_mov_b32 s7, s9 1119; GCN-NEXT: s_mov_b32 s8, s10 1120; GCN-NEXT: s_mov_b32 s9, s11 1121; GCN-NEXT: s_mov_b32 s10, s12 1122; GCN-NEXT: s_mov_b32 s11, s13 1123; GCN-NEXT: s_mov_b32 s12, s14 1124; GCN-NEXT: s_mov_b32 s13, s15 1125; GCN-NEXT: s_mov_b32 s14, s16 1126; GCN-NEXT: s_mov_b32 s15, s17 1127; GCN-NEXT: s_movrels_b64 s[0:1], s[4:5] 1128; GCN-NEXT: ; return to shader part epilog 1129; 1130; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset2: 1131; GFX10PLUS: ; %bb.0: ; %entry 1132; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1133; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1134; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1135; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1136; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1137; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1138; GFX10PLUS-NEXT: s_mov_b32 m0, s18 1139; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1140; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1141; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1142; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1143; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1144; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1145; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1146; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1147; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1148; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1149; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[4:5] 1150; GFX10PLUS-NEXT: ; return to shader part epilog 1151entry: 1152 %add = add i32 %sel, 2 1153 %ext = extractelement <8 x double> %vec, i32 %add 1154 ret double %ext 1155} 1156 1157define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) { 1158; GCN-LABEL: dyn_extract_v8f64_s_s_offset3: 1159; GCN: ; %bb.0: ; %entry 1160; GCN-NEXT: s_mov_b32 s0, s2 1161; GCN-NEXT: s_mov_b32 s1, s3 1162; GCN-NEXT: s_mov_b32 s2, s4 1163; GCN-NEXT: s_mov_b32 s3, s5 1164; GCN-NEXT: s_mov_b32 s4, s6 1165; GCN-NEXT: s_mov_b32 s5, s7 1166; GCN-NEXT: s_mov_b32 s6, s8 1167; GCN-NEXT: s_mov_b32 s7, s9 1168; GCN-NEXT: s_mov_b32 m0, s18 1169; GCN-NEXT: s_mov_b32 s8, s10 1170; GCN-NEXT: s_mov_b32 s9, s11 1171; GCN-NEXT: s_mov_b32 s10, s12 1172; GCN-NEXT: s_mov_b32 s11, s13 1173; GCN-NEXT: s_mov_b32 s12, s14 1174; GCN-NEXT: s_mov_b32 s13, s15 1175; GCN-NEXT: s_mov_b32 s14, s16 1176; GCN-NEXT: s_mov_b32 s15, s17 1177; GCN-NEXT: s_movrels_b64 s[0:1], s[6:7] 1178; GCN-NEXT: ; return to shader part epilog 1179; 1180; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset3: 1181; GFX10PLUS: ; %bb.0: ; %entry 1182; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1183; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1184; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1185; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1186; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1187; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1188; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1189; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1190; GFX10PLUS-NEXT: s_mov_b32 m0, s18 1191; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1192; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1193; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1194; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1195; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1196; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1197; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1198; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1199; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[6:7] 1200; GFX10PLUS-NEXT: ; return to shader part epilog 1201entry: 1202 %add = add i32 %sel, 3 1203 %ext = extractelement <8 x double> %vec, i32 %add 1204 ret double %ext 1205} 1206 1207define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) { 1208; GCN-LABEL: dyn_extract_v8f64_s_s_offset4: 1209; GCN: ; %bb.0: ; %entry 1210; GCN-NEXT: s_mov_b32 s0, s2 1211; GCN-NEXT: s_mov_b32 s1, s3 1212; GCN-NEXT: s_mov_b32 s2, s4 1213; GCN-NEXT: s_mov_b32 s3, s5 1214; GCN-NEXT: s_mov_b32 s4, s6 1215; GCN-NEXT: s_mov_b32 s5, s7 1216; GCN-NEXT: s_mov_b32 s6, s8 1217; GCN-NEXT: s_mov_b32 s7, s9 1218; GCN-NEXT: s_mov_b32 s8, s10 1219; GCN-NEXT: s_mov_b32 s9, s11 1220; GCN-NEXT: s_mov_b32 m0, s18 1221; GCN-NEXT: s_mov_b32 s10, s12 1222; GCN-NEXT: s_mov_b32 s11, s13 1223; GCN-NEXT: s_mov_b32 s12, s14 1224; GCN-NEXT: s_mov_b32 s13, s15 1225; GCN-NEXT: s_mov_b32 s14, s16 1226; GCN-NEXT: s_mov_b32 s15, s17 1227; GCN-NEXT: s_movrels_b64 s[0:1], s[8:9] 1228; GCN-NEXT: ; return to shader part epilog 1229; 1230; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset4: 1231; GFX10PLUS: ; %bb.0: ; %entry 1232; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1233; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1234; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1235; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1236; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1237; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1238; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1239; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1240; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1241; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1242; GFX10PLUS-NEXT: s_mov_b32 m0, s18 1243; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1244; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1245; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1246; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1247; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1248; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1249; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[8:9] 1250; GFX10PLUS-NEXT: ; return to shader part epilog 1251entry: 1252 %add = add i32 %sel, 4 1253 %ext = extractelement <8 x double> %vec, i32 %add 1254 ret double %ext 1255} 1256 1257define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) { 1258; GCN-LABEL: dyn_extract_v8f64_s_s_offset5: 1259; GCN: ; %bb.0: ; %entry 1260; GCN-NEXT: s_mov_b32 s0, s2 1261; GCN-NEXT: s_mov_b32 s1, s3 1262; GCN-NEXT: s_mov_b32 s2, s4 1263; GCN-NEXT: s_mov_b32 s3, s5 1264; GCN-NEXT: s_mov_b32 s4, s6 1265; GCN-NEXT: s_mov_b32 s5, s7 1266; GCN-NEXT: s_mov_b32 s6, s8 1267; GCN-NEXT: s_mov_b32 s7, s9 1268; GCN-NEXT: s_mov_b32 s8, s10 1269; GCN-NEXT: s_mov_b32 s9, s11 1270; GCN-NEXT: s_mov_b32 s10, s12 1271; GCN-NEXT: s_mov_b32 s11, s13 1272; GCN-NEXT: s_mov_b32 m0, s18 1273; GCN-NEXT: s_mov_b32 s12, s14 1274; GCN-NEXT: s_mov_b32 s13, s15 1275; GCN-NEXT: s_mov_b32 s14, s16 1276; GCN-NEXT: s_mov_b32 s15, s17 1277; GCN-NEXT: s_movrels_b64 s[0:1], s[10:11] 1278; GCN-NEXT: ; return to shader part epilog 1279; 1280; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset5: 1281; GFX10PLUS: ; %bb.0: ; %entry 1282; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1283; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1284; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1285; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1286; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1287; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1288; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1289; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1290; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1291; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1292; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1293; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1294; GFX10PLUS-NEXT: s_mov_b32 m0, s18 1295; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1296; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1297; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1298; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1299; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[10:11] 1300; GFX10PLUS-NEXT: ; return to shader part epilog 1301entry: 1302 %add = add i32 %sel, 5 1303 %ext = extractelement <8 x double> %vec, i32 %add 1304 ret double %ext 1305} 1306 1307define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) { 1308; GCN-LABEL: dyn_extract_v8f64_s_s_offset6: 1309; GCN: ; %bb.0: ; %entry 1310; GCN-NEXT: s_mov_b32 s0, s2 1311; GCN-NEXT: s_mov_b32 s1, s3 1312; GCN-NEXT: s_mov_b32 s2, s4 1313; GCN-NEXT: s_mov_b32 s3, s5 1314; GCN-NEXT: s_mov_b32 s4, s6 1315; GCN-NEXT: s_mov_b32 s5, s7 1316; GCN-NEXT: s_mov_b32 s6, s8 1317; GCN-NEXT: s_mov_b32 s7, s9 1318; GCN-NEXT: s_mov_b32 s8, s10 1319; GCN-NEXT: s_mov_b32 s9, s11 1320; GCN-NEXT: s_mov_b32 s10, s12 1321; GCN-NEXT: s_mov_b32 s11, s13 1322; GCN-NEXT: s_mov_b32 s12, s14 1323; GCN-NEXT: s_mov_b32 s13, s15 1324; GCN-NEXT: s_mov_b32 m0, s18 1325; GCN-NEXT: s_mov_b32 s14, s16 1326; GCN-NEXT: s_mov_b32 s15, s17 1327; GCN-NEXT: s_movrels_b64 s[0:1], s[12:13] 1328; GCN-NEXT: ; return to shader part epilog 1329; 1330; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset6: 1331; GFX10PLUS: ; %bb.0: ; %entry 1332; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1333; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1334; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1335; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1336; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1337; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1338; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1339; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1340; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1341; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1342; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1343; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1344; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1345; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1346; GFX10PLUS-NEXT: s_mov_b32 m0, s18 1347; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1348; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1349; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[12:13] 1350; GFX10PLUS-NEXT: ; return to shader part epilog 1351entry: 1352 %add = add i32 %sel, 6 1353 %ext = extractelement <8 x double> %vec, i32 %add 1354 ret double %ext 1355} 1356 1357define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) { 1358; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7: 1359; GPRIDX: ; %bb.0: ; %entry 1360; GPRIDX-NEXT: s_mov_b32 s0, s2 1361; GPRIDX-NEXT: s_mov_b32 s1, s3 1362; GPRIDX-NEXT: s_mov_b32 s2, s4 1363; GPRIDX-NEXT: s_mov_b32 s3, s5 1364; GPRIDX-NEXT: s_mov_b32 s4, s6 1365; GPRIDX-NEXT: s_mov_b32 s5, s7 1366; GPRIDX-NEXT: s_mov_b32 s6, s8 1367; GPRIDX-NEXT: s_mov_b32 s7, s9 1368; GPRIDX-NEXT: s_mov_b32 s8, s10 1369; GPRIDX-NEXT: s_mov_b32 s9, s11 1370; GPRIDX-NEXT: s_mov_b32 s10, s12 1371; GPRIDX-NEXT: s_mov_b32 s11, s13 1372; GPRIDX-NEXT: s_mov_b32 s12, s14 1373; GPRIDX-NEXT: s_mov_b32 s13, s15 1374; GPRIDX-NEXT: s_mov_b32 s14, s16 1375; GPRIDX-NEXT: s_mov_b32 s15, s17 1376; GPRIDX-NEXT: s_mov_b32 m0, s18 1377; GPRIDX-NEXT: s_nop 0 1378; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[14:15] 1379; GPRIDX-NEXT: ; return to shader part epilog 1380; 1381; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7: 1382; MOVREL: ; %bb.0: ; %entry 1383; MOVREL-NEXT: s_mov_b32 s0, s2 1384; MOVREL-NEXT: s_mov_b32 s1, s3 1385; MOVREL-NEXT: s_mov_b32 s2, s4 1386; MOVREL-NEXT: s_mov_b32 s3, s5 1387; MOVREL-NEXT: s_mov_b32 s4, s6 1388; MOVREL-NEXT: s_mov_b32 s5, s7 1389; MOVREL-NEXT: s_mov_b32 s6, s8 1390; MOVREL-NEXT: s_mov_b32 s7, s9 1391; MOVREL-NEXT: s_mov_b32 s8, s10 1392; MOVREL-NEXT: s_mov_b32 s9, s11 1393; MOVREL-NEXT: s_mov_b32 s10, s12 1394; MOVREL-NEXT: s_mov_b32 s11, s13 1395; MOVREL-NEXT: s_mov_b32 s12, s14 1396; MOVREL-NEXT: s_mov_b32 s13, s15 1397; MOVREL-NEXT: s_mov_b32 s14, s16 1398; MOVREL-NEXT: s_mov_b32 s15, s17 1399; MOVREL-NEXT: s_mov_b32 m0, s18 1400; MOVREL-NEXT: s_movrels_b64 s[0:1], s[14:15] 1401; MOVREL-NEXT: ; return to shader part epilog 1402; 1403; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset7: 1404; GFX10PLUS: ; %bb.0: ; %entry 1405; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1406; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1407; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1408; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1409; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1410; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1411; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1412; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1413; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1414; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1415; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1416; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1417; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1418; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1419; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1420; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1421; GFX10PLUS-NEXT: s_mov_b32 m0, s18 1422; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[14:15] 1423; GFX10PLUS-NEXT: ; return to shader part epilog 1424entry: 1425 %add = add i32 %sel, 7 1426 %ext = extractelement <8 x double> %vec, i32 %add 1427 ret double %ext 1428} 1429 1430define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) { 1431; GCN-LABEL: dyn_extract_v8f64_s_s_offsetm1: 1432; GCN: ; %bb.0: ; %entry 1433; GCN-NEXT: s_mov_b32 s0, s2 1434; GCN-NEXT: s_mov_b32 s1, s3 1435; GCN-NEXT: s_add_i32 m0, s18, -1 1436; GCN-NEXT: s_mov_b32 s2, s4 1437; GCN-NEXT: s_mov_b32 s3, s5 1438; GCN-NEXT: s_mov_b32 s4, s6 1439; GCN-NEXT: s_mov_b32 s5, s7 1440; GCN-NEXT: s_mov_b32 s6, s8 1441; GCN-NEXT: s_mov_b32 s7, s9 1442; GCN-NEXT: s_mov_b32 s8, s10 1443; GCN-NEXT: s_mov_b32 s9, s11 1444; GCN-NEXT: s_mov_b32 s10, s12 1445; GCN-NEXT: s_mov_b32 s11, s13 1446; GCN-NEXT: s_mov_b32 s12, s14 1447; GCN-NEXT: s_mov_b32 s13, s15 1448; GCN-NEXT: s_mov_b32 s14, s16 1449; GCN-NEXT: s_mov_b32 s15, s17 1450; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] 1451; GCN-NEXT: ; return to shader part epilog 1452; 1453; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offsetm1: 1454; GFX10PLUS: ; %bb.0: ; %entry 1455; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1456; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1457; GFX10PLUS-NEXT: s_add_i32 m0, s18, -1 1458; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1459; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1460; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1461; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1462; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1463; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1464; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1465; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1466; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1467; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1468; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1469; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1470; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1471; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1472; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[0:1] 1473; GFX10PLUS-NEXT: ; return to shader part epilog 1474entry: 1475 %add = add i32 %sel, -1 1476 %ext = extractelement <8 x double> %vec, i32 %add 1477 ret double %ext 1478} 1479 1480define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) { 1481; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3: 1482; GPRIDX: ; %bb.0: ; %entry 1483; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1484; GPRIDX-NEXT: v_add_u32_e32 v16, 3, v16 1485; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 1486; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1487; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1488; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 1489; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1490; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1491; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 1492; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1493; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1494; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 1495; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 1496; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 1497; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 1498; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 1499; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 1500; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 1501; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 1502; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 1503; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 1504; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 1505; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 1506; GPRIDX-NEXT: s_setpc_b64 s[30:31] 1507; 1508; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3: 1509; MOVREL: ; %bb.0: ; %entry 1510; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1511; MOVREL-NEXT: v_add_u32_e32 v16, vcc, 3, v16 1512; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 1513; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1514; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1515; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 1516; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1517; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1518; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 1519; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1520; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1521; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 1522; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 1523; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 1524; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 1525; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 1526; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 1527; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 1528; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 1529; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 1530; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 1531; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 1532; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 1533; MOVREL-NEXT: s_setpc_b64 s[30:31] 1534; 1535; GFX10-LABEL: dyn_extract_v8f64_v_v_offset3: 1536; GFX10: ; %bb.0: ; %entry 1537; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1538; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1539; GFX10-NEXT: v_add_nc_u32_e32 v16, 3, v16 1540; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 1541; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 1542; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 1543; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 1544; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 1545; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 1546; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 1547; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 1548; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 1549; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 1550; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 1551; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 1552; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 1553; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 1554; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 1555; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 1556; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 1557; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo 1558; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 1559; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 1560; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo 1561; GFX10-NEXT: s_setpc_b64 s[30:31] 1562; 1563; GFX11-LABEL: dyn_extract_v8f64_v_v_offset3: 1564; GFX11: ; %bb.0: ; %entry 1565; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1566; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1567; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v16 1568; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 1569; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2 1570; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 1571; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4 1572; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 1573; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6 1574; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 1575; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8 1576; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 1577; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v11 :: v_dual_cndmask_b32 v0, v0, v10 1578; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 1579; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v13 :: v_dual_cndmask_b32 v0, v0, v12 1580; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 1581; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v15 :: v_dual_cndmask_b32 v0, v0, v14 1582; GFX11-NEXT: s_setpc_b64 s[30:31] 1583entry: 1584 %add = add i32 %sel, 3 1585 %ext = extractelement <8 x double> %vec, i32 %add 1586 ret double %ext 1587} 1588 1589define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) { 1590; GCN-LABEL: dyn_extract_v8p3_v_v: 1591; GCN: ; %bb.0: ; %entry 1592; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1593; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 1594; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1595; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 1596; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1597; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 1598; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1599; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 1600; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1601; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 1602; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 1603; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 1604; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1605; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 1606; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 1607; GCN-NEXT: s_setpc_b64 s[30:31] 1608; 1609; GFX10PLUS-LABEL: dyn_extract_v8p3_v_v: 1610; GFX10PLUS: ; %bb.0: ; %entry 1611; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1612; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 1613; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 1614; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1615; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 1616; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 1617; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 1618; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 1619; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 1620; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 1621; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 1622; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 1623; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 1624; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 1625; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 1626; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 1627; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1628entry: 1629 %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx 1630 ret i8 addrspace(3)* %ext 1631} 1632 1633define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) { 1634; GPRIDX-LABEL: dyn_extract_v8p3_s_s: 1635; GPRIDX: ; %bb.0: ; %entry 1636; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 1637; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 1638; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 1639; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 1640; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 1641; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 1642; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 1643; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 1644; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 1645; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 1646; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 1647; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 1648; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 1649; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 1650; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 1651; GPRIDX-NEXT: ds_write_b32 v0, v0 1652; GPRIDX-NEXT: s_endpgm 1653; 1654; MOVREL-LABEL: dyn_extract_v8p3_s_s: 1655; MOVREL: ; %bb.0: ; %entry 1656; MOVREL-NEXT: s_mov_b32 s0, s2 1657; MOVREL-NEXT: s_mov_b32 m0, s10 1658; MOVREL-NEXT: s_mov_b32 s1, s3 1659; MOVREL-NEXT: s_mov_b32 s2, s4 1660; MOVREL-NEXT: s_mov_b32 s3, s5 1661; MOVREL-NEXT: s_mov_b32 s4, s6 1662; MOVREL-NEXT: s_mov_b32 s5, s7 1663; MOVREL-NEXT: s_mov_b32 s6, s8 1664; MOVREL-NEXT: s_mov_b32 s7, s9 1665; MOVREL-NEXT: s_movrels_b32 s0, s0 1666; MOVREL-NEXT: v_mov_b32_e32 v0, s0 1667; MOVREL-NEXT: s_mov_b32 m0, -1 1668; MOVREL-NEXT: ds_write_b32 v0, v0 1669; MOVREL-NEXT: s_endpgm 1670; 1671; GFX10-LABEL: dyn_extract_v8p3_s_s: 1672; GFX10: ; %bb.0: ; %entry 1673; GFX10-NEXT: s_mov_b32 s0, s2 1674; GFX10-NEXT: s_mov_b32 m0, s10 1675; GFX10-NEXT: s_mov_b32 s1, s3 1676; GFX10-NEXT: s_mov_b32 s2, s4 1677; GFX10-NEXT: s_mov_b32 s3, s5 1678; GFX10-NEXT: s_mov_b32 s4, s6 1679; GFX10-NEXT: s_mov_b32 s5, s7 1680; GFX10-NEXT: s_mov_b32 s6, s8 1681; GFX10-NEXT: s_mov_b32 s7, s9 1682; GFX10-NEXT: s_movrels_b32 s0, s0 1683; GFX10-NEXT: v_mov_b32_e32 v0, s0 1684; GFX10-NEXT: ds_write_b32 v0, v0 1685; GFX10-NEXT: s_endpgm 1686; 1687; GFX11-LABEL: dyn_extract_v8p3_s_s: 1688; GFX11: ; %bb.0: ; %entry 1689; GFX11-NEXT: s_mov_b32 s0, s2 1690; GFX11-NEXT: s_mov_b32 m0, s10 1691; GFX11-NEXT: s_mov_b32 s1, s3 1692; GFX11-NEXT: s_mov_b32 s2, s4 1693; GFX11-NEXT: s_mov_b32 s3, s5 1694; GFX11-NEXT: s_mov_b32 s4, s6 1695; GFX11-NEXT: s_mov_b32 s5, s7 1696; GFX11-NEXT: s_mov_b32 s6, s8 1697; GFX11-NEXT: s_mov_b32 s7, s9 1698; GFX11-NEXT: s_movrels_b32 s0, s0 1699; GFX11-NEXT: v_mov_b32_e32 v0, s0 1700; GFX11-NEXT: ds_store_b32 v0, v0 1701; GFX11-NEXT: s_endpgm 1702entry: 1703 %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx 1704 store i8 addrspace(3)* %ext, i8 addrspace(3)* addrspace(3)* undef 1705 ret void 1706} 1707 1708define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) { 1709; GCN-LABEL: dyn_extract_v8p1_v_v: 1710; GCN: ; %bb.0: ; %entry 1711; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1712; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 1713; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1714; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1715; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 1716; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1717; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1718; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 1719; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1720; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1721; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 1722; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 1723; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 1724; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 1725; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 1726; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 1727; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 1728; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 1729; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 1730; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 1731; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 1732; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 1733; GCN-NEXT: s_setpc_b64 s[30:31] 1734; 1735; GFX10-LABEL: dyn_extract_v8p1_v_v: 1736; GFX10: ; %bb.0: ; %entry 1737; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1738; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1739; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 1740; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 1741; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 1742; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 1743; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 1744; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 1745; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 1746; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 1747; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 1748; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 1749; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 1750; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 1751; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 1752; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 1753; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 1754; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 1755; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 1756; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo 1757; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 1758; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 1759; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo 1760; GFX10-NEXT: s_setpc_b64 s[30:31] 1761; 1762; GFX11-LABEL: dyn_extract_v8p1_v_v: 1763; GFX11: ; %bb.0: ; %entry 1764; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1765; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1766; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 1767; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 1768; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 1769; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 1770; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 1771; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7 1772; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 1773; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9 1774; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 1775; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11 1776; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 1777; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13 1778; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 1779; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15 1780; GFX11-NEXT: s_setpc_b64 s[30:31] 1781entry: 1782 %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx 1783 ret i8 addrspace(1)* %ext 1784} 1785 1786define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) { 1787; GPRIDX-LABEL: dyn_extract_v8p1_s_s: 1788; GPRIDX: ; %bb.0: ; %entry 1789; GPRIDX-NEXT: s_mov_b32 s0, s2 1790; GPRIDX-NEXT: s_mov_b32 s1, s3 1791; GPRIDX-NEXT: s_mov_b32 m0, s18 1792; GPRIDX-NEXT: s_mov_b32 s2, s4 1793; GPRIDX-NEXT: s_mov_b32 s3, s5 1794; GPRIDX-NEXT: s_mov_b32 s4, s6 1795; GPRIDX-NEXT: s_mov_b32 s5, s7 1796; GPRIDX-NEXT: s_mov_b32 s6, s8 1797; GPRIDX-NEXT: s_mov_b32 s7, s9 1798; GPRIDX-NEXT: s_mov_b32 s8, s10 1799; GPRIDX-NEXT: s_mov_b32 s9, s11 1800; GPRIDX-NEXT: s_mov_b32 s10, s12 1801; GPRIDX-NEXT: s_mov_b32 s11, s13 1802; GPRIDX-NEXT: s_mov_b32 s12, s14 1803; GPRIDX-NEXT: s_mov_b32 s13, s15 1804; GPRIDX-NEXT: s_mov_b32 s14, s16 1805; GPRIDX-NEXT: s_mov_b32 s15, s17 1806; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] 1807; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 1808; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 1809; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1810; GPRIDX-NEXT: s_endpgm 1811; 1812; MOVREL-LABEL: dyn_extract_v8p1_s_s: 1813; MOVREL: ; %bb.0: ; %entry 1814; MOVREL-NEXT: s_mov_b32 s0, s2 1815; MOVREL-NEXT: s_mov_b32 s1, s3 1816; MOVREL-NEXT: s_mov_b32 m0, s18 1817; MOVREL-NEXT: s_mov_b32 s2, s4 1818; MOVREL-NEXT: s_mov_b32 s3, s5 1819; MOVREL-NEXT: s_mov_b32 s4, s6 1820; MOVREL-NEXT: s_mov_b32 s5, s7 1821; MOVREL-NEXT: s_mov_b32 s6, s8 1822; MOVREL-NEXT: s_mov_b32 s7, s9 1823; MOVREL-NEXT: s_mov_b32 s8, s10 1824; MOVREL-NEXT: s_mov_b32 s9, s11 1825; MOVREL-NEXT: s_mov_b32 s10, s12 1826; MOVREL-NEXT: s_mov_b32 s11, s13 1827; MOVREL-NEXT: s_mov_b32 s12, s14 1828; MOVREL-NEXT: s_mov_b32 s13, s15 1829; MOVREL-NEXT: s_mov_b32 s14, s16 1830; MOVREL-NEXT: s_mov_b32 s15, s17 1831; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] 1832; MOVREL-NEXT: v_mov_b32_e32 v0, s0 1833; MOVREL-NEXT: v_mov_b32_e32 v1, s1 1834; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1835; MOVREL-NEXT: s_endpgm 1836; 1837; GFX10-LABEL: dyn_extract_v8p1_s_s: 1838; GFX10: ; %bb.0: ; %entry 1839; GFX10-NEXT: s_mov_b32 s0, s2 1840; GFX10-NEXT: s_mov_b32 s1, s3 1841; GFX10-NEXT: s_mov_b32 m0, s18 1842; GFX10-NEXT: s_mov_b32 s2, s4 1843; GFX10-NEXT: s_mov_b32 s3, s5 1844; GFX10-NEXT: s_mov_b32 s4, s6 1845; GFX10-NEXT: s_mov_b32 s5, s7 1846; GFX10-NEXT: s_mov_b32 s6, s8 1847; GFX10-NEXT: s_mov_b32 s7, s9 1848; GFX10-NEXT: s_mov_b32 s8, s10 1849; GFX10-NEXT: s_mov_b32 s9, s11 1850; GFX10-NEXT: s_mov_b32 s10, s12 1851; GFX10-NEXT: s_mov_b32 s11, s13 1852; GFX10-NEXT: s_mov_b32 s12, s14 1853; GFX10-NEXT: s_mov_b32 s13, s15 1854; GFX10-NEXT: s_mov_b32 s14, s16 1855; GFX10-NEXT: s_mov_b32 s15, s17 1856; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] 1857; GFX10-NEXT: v_mov_b32_e32 v0, s0 1858; GFX10-NEXT: v_mov_b32_e32 v1, s1 1859; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1860; GFX10-NEXT: s_endpgm 1861; 1862; GFX11-LABEL: dyn_extract_v8p1_s_s: 1863; GFX11: ; %bb.0: ; %entry 1864; GFX11-NEXT: s_mov_b32 s0, s2 1865; GFX11-NEXT: s_mov_b32 s1, s3 1866; GFX11-NEXT: s_mov_b32 m0, s18 1867; GFX11-NEXT: s_mov_b32 s2, s4 1868; GFX11-NEXT: s_mov_b32 s3, s5 1869; GFX11-NEXT: s_mov_b32 s4, s6 1870; GFX11-NEXT: s_mov_b32 s5, s7 1871; GFX11-NEXT: s_mov_b32 s6, s8 1872; GFX11-NEXT: s_mov_b32 s7, s9 1873; GFX11-NEXT: s_mov_b32 s8, s10 1874; GFX11-NEXT: s_mov_b32 s9, s11 1875; GFX11-NEXT: s_mov_b32 s10, s12 1876; GFX11-NEXT: s_mov_b32 s11, s13 1877; GFX11-NEXT: s_mov_b32 s12, s14 1878; GFX11-NEXT: s_mov_b32 s13, s15 1879; GFX11-NEXT: s_mov_b32 s14, s16 1880; GFX11-NEXT: s_mov_b32 s15, s17 1881; GFX11-NEXT: s_movrels_b64 s[0:1], s[0:1] 1882; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1883; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 1884; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1885; GFX11-NEXT: s_endpgm 1886entry: 1887 %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx 1888 store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef 1889 ret void 1890} 1891 1892define amdgpu_ps float @dyn_extract_v16f32_v_s(<16 x float> %vec, i32 inreg %sel) { 1893; GPRIDX-LABEL: dyn_extract_v16f32_v_s: 1894; GPRIDX: ; %bb.0: ; %entry 1895; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) 1896; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 1897; GPRIDX-NEXT: s_set_gpr_idx_off 1898; GPRIDX-NEXT: ; return to shader part epilog 1899; 1900; MOVREL-LABEL: dyn_extract_v16f32_v_s: 1901; MOVREL: ; %bb.0: ; %entry 1902; MOVREL-NEXT: s_mov_b32 m0, s2 1903; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 1904; MOVREL-NEXT: ; return to shader part epilog 1905; 1906; GFX10PLUS-LABEL: dyn_extract_v16f32_v_s: 1907; GFX10PLUS: ; %bb.0: ; %entry 1908; GFX10PLUS-NEXT: s_mov_b32 m0, s2 1909; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0 1910; GFX10PLUS-NEXT: ; return to shader part epilog 1911entry: 1912 %ext = extractelement <16 x float> %vec, i32 %sel 1913 ret float %ext 1914} 1915 1916define amdgpu_ps float @dyn_extract_v32f32_v_s(<32 x float> %vec, i32 inreg %sel) { 1917; GPRIDX-LABEL: dyn_extract_v32f32_v_s: 1918; GPRIDX: ; %bb.0: ; %entry 1919; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) 1920; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 1921; GPRIDX-NEXT: s_set_gpr_idx_off 1922; GPRIDX-NEXT: ; return to shader part epilog 1923; 1924; MOVREL-LABEL: dyn_extract_v32f32_v_s: 1925; MOVREL: ; %bb.0: ; %entry 1926; MOVREL-NEXT: s_mov_b32 m0, s2 1927; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 1928; MOVREL-NEXT: ; return to shader part epilog 1929; 1930; GFX10PLUS-LABEL: dyn_extract_v32f32_v_s: 1931; GFX10PLUS: ; %bb.0: ; %entry 1932; GFX10PLUS-NEXT: s_mov_b32 m0, s2 1933; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0 1934; GFX10PLUS-NEXT: ; return to shader part epilog 1935entry: 1936 %ext = extractelement <32 x float> %vec, i32 %sel 1937 ret float %ext 1938} 1939 1940define amdgpu_ps double @dyn_extract_v16f64_v_s(<16 x double> %vec, i32 inreg %sel) { 1941; GPRIDX-LABEL: dyn_extract_v16f64_v_s: 1942; GPRIDX: ; %bb.0: ; %entry 1943; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 1944; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) 1945; GPRIDX-NEXT: v_mov_b32_e32 v32, v0 1946; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 1947; GPRIDX-NEXT: s_set_gpr_idx_off 1948; GPRIDX-NEXT: v_readfirstlane_b32 s0, v32 1949; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 1950; GPRIDX-NEXT: ; return to shader part epilog 1951; 1952; MOVREL-LABEL: dyn_extract_v16f64_v_s: 1953; MOVREL: ; %bb.0: ; %entry 1954; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 1955; MOVREL-NEXT: v_movrels_b32_e32 v32, v0 1956; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 1957; MOVREL-NEXT: v_readfirstlane_b32 s0, v32 1958; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 1959; MOVREL-NEXT: ; return to shader part epilog 1960; 1961; GFX10PLUS-LABEL: dyn_extract_v16f64_v_s: 1962; GFX10PLUS: ; %bb.0: ; %entry 1963; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1 1964; GFX10PLUS-NEXT: v_movrels_b32_e32 v32, v0 1965; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v1 1966; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v32 1967; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v0 1968; GFX10PLUS-NEXT: ; return to shader part epilog 1969entry: 1970 %ext = extractelement <16 x double> %vec, i32 %sel 1971 ret double %ext 1972} 1973 1974define amdgpu_ps float @dyn_extract_v16f32_s_s(i32 inreg %sel) { 1975; GCN-LABEL: dyn_extract_v16f32_s_s: 1976; GCN: ; %bb.0: ; %entry 1977; GCN-NEXT: s_mov_b32 s4, 1.0 1978; GCN-NEXT: s_mov_b32 m0, s2 1979; GCN-NEXT: s_mov_b32 s19, 0x41800000 1980; GCN-NEXT: s_mov_b32 s18, 0x41700000 1981; GCN-NEXT: s_mov_b32 s17, 0x41600000 1982; GCN-NEXT: s_mov_b32 s16, 0x41500000 1983; GCN-NEXT: s_mov_b32 s15, 0x41400000 1984; GCN-NEXT: s_mov_b32 s14, 0x41300000 1985; GCN-NEXT: s_mov_b32 s13, 0x41200000 1986; GCN-NEXT: s_mov_b32 s12, 0x41100000 1987; GCN-NEXT: s_mov_b32 s11, 0x41000000 1988; GCN-NEXT: s_mov_b32 s10, 0x40e00000 1989; GCN-NEXT: s_mov_b32 s9, 0x40c00000 1990; GCN-NEXT: s_mov_b32 s8, 0x40a00000 1991; GCN-NEXT: s_mov_b32 s7, 4.0 1992; GCN-NEXT: s_mov_b32 s6, 0x40400000 1993; GCN-NEXT: s_mov_b32 s5, 2.0 1994; GCN-NEXT: s_movrels_b32 s0, s4 1995; GCN-NEXT: v_mov_b32_e32 v0, s0 1996; GCN-NEXT: ; return to shader part epilog 1997; 1998; GFX10PLUS-LABEL: dyn_extract_v16f32_s_s: 1999; GFX10PLUS: ; %bb.0: ; %entry 2000; GFX10PLUS-NEXT: s_mov_b32 s4, 1.0 2001; GFX10PLUS-NEXT: s_mov_b32 m0, s2 2002; GFX10PLUS-NEXT: s_mov_b32 s19, 0x41800000 2003; GFX10PLUS-NEXT: s_mov_b32 s18, 0x41700000 2004; GFX10PLUS-NEXT: s_mov_b32 s17, 0x41600000 2005; GFX10PLUS-NEXT: s_mov_b32 s16, 0x41500000 2006; GFX10PLUS-NEXT: s_mov_b32 s15, 0x41400000 2007; GFX10PLUS-NEXT: s_mov_b32 s14, 0x41300000 2008; GFX10PLUS-NEXT: s_mov_b32 s13, 0x41200000 2009; GFX10PLUS-NEXT: s_mov_b32 s12, 0x41100000 2010; GFX10PLUS-NEXT: s_mov_b32 s11, 0x41000000 2011; GFX10PLUS-NEXT: s_mov_b32 s10, 0x40e00000 2012; GFX10PLUS-NEXT: s_mov_b32 s9, 0x40c00000 2013; GFX10PLUS-NEXT: s_mov_b32 s8, 0x40a00000 2014; GFX10PLUS-NEXT: s_mov_b32 s7, 4.0 2015; GFX10PLUS-NEXT: s_mov_b32 s6, 0x40400000 2016; GFX10PLUS-NEXT: s_mov_b32 s5, 2.0 2017; GFX10PLUS-NEXT: s_movrels_b32 s0, s4 2018; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 2019; GFX10PLUS-NEXT: ; return to shader part epilog 2020entry: 2021 %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel 2022 ret float %ext 2023} 2024 2025define amdgpu_ps float @dyn_extract_v32f32_s_s(i32 inreg %sel) { 2026; GCN-LABEL: dyn_extract_v32f32_s_s: 2027; GCN: ; %bb.0: ; %entry 2028; GCN-NEXT: s_mov_b32 s36, 1.0 2029; GCN-NEXT: s_mov_b32 m0, s2 2030; GCN-NEXT: s_mov_b32 s67, 0x42000000 2031; GCN-NEXT: s_mov_b32 s66, 0x41f80000 2032; GCN-NEXT: s_mov_b32 s65, 0x41f00000 2033; GCN-NEXT: s_mov_b32 s64, 0x41e80000 2034; GCN-NEXT: s_mov_b32 s63, 0x41e00000 2035; GCN-NEXT: s_mov_b32 s62, 0x41d80000 2036; GCN-NEXT: s_mov_b32 s61, 0x41d00000 2037; GCN-NEXT: s_mov_b32 s60, 0x41c80000 2038; GCN-NEXT: s_mov_b32 s59, 0x41c00000 2039; GCN-NEXT: s_mov_b32 s58, 0x41b80000 2040; GCN-NEXT: s_mov_b32 s57, 0x41b00000 2041; GCN-NEXT: s_mov_b32 s56, 0x41a80000 2042; GCN-NEXT: s_mov_b32 s55, 0x41a00000 2043; GCN-NEXT: s_mov_b32 s54, 0x41980000 2044; GCN-NEXT: s_mov_b32 s53, 0x41900000 2045; GCN-NEXT: s_mov_b32 s52, 0x41880000 2046; GCN-NEXT: s_mov_b32 s51, 0x41800000 2047; GCN-NEXT: s_mov_b32 s50, 0x41700000 2048; GCN-NEXT: s_mov_b32 s49, 0x41600000 2049; GCN-NEXT: s_mov_b32 s48, 0x41500000 2050; GCN-NEXT: s_mov_b32 s47, 0x41400000 2051; GCN-NEXT: s_mov_b32 s46, 0x41300000 2052; GCN-NEXT: s_mov_b32 s45, 0x41200000 2053; GCN-NEXT: s_mov_b32 s44, 0x41100000 2054; GCN-NEXT: s_mov_b32 s43, 0x41000000 2055; GCN-NEXT: s_mov_b32 s42, 0x40e00000 2056; GCN-NEXT: s_mov_b32 s41, 0x40c00000 2057; GCN-NEXT: s_mov_b32 s40, 0x40a00000 2058; GCN-NEXT: s_mov_b32 s39, 4.0 2059; GCN-NEXT: s_mov_b32 s38, 0x40400000 2060; GCN-NEXT: s_mov_b32 s37, 2.0 2061; GCN-NEXT: s_movrels_b32 s0, s36 2062; GCN-NEXT: v_mov_b32_e32 v0, s0 2063; GCN-NEXT: ; return to shader part epilog 2064; 2065; GFX10PLUS-LABEL: dyn_extract_v32f32_s_s: 2066; GFX10PLUS: ; %bb.0: ; %entry 2067; GFX10PLUS-NEXT: s_mov_b32 s36, 1.0 2068; GFX10PLUS-NEXT: s_mov_b32 m0, s2 2069; GFX10PLUS-NEXT: s_mov_b32 s67, 0x42000000 2070; GFX10PLUS-NEXT: s_mov_b32 s66, 0x41f80000 2071; GFX10PLUS-NEXT: s_mov_b32 s65, 0x41f00000 2072; GFX10PLUS-NEXT: s_mov_b32 s64, 0x41e80000 2073; GFX10PLUS-NEXT: s_mov_b32 s63, 0x41e00000 2074; GFX10PLUS-NEXT: s_mov_b32 s62, 0x41d80000 2075; GFX10PLUS-NEXT: s_mov_b32 s61, 0x41d00000 2076; GFX10PLUS-NEXT: s_mov_b32 s60, 0x41c80000 2077; GFX10PLUS-NEXT: s_mov_b32 s59, 0x41c00000 2078; GFX10PLUS-NEXT: s_mov_b32 s58, 0x41b80000 2079; GFX10PLUS-NEXT: s_mov_b32 s57, 0x41b00000 2080; GFX10PLUS-NEXT: s_mov_b32 s56, 0x41a80000 2081; GFX10PLUS-NEXT: s_mov_b32 s55, 0x41a00000 2082; GFX10PLUS-NEXT: s_mov_b32 s54, 0x41980000 2083; GFX10PLUS-NEXT: s_mov_b32 s53, 0x41900000 2084; GFX10PLUS-NEXT: s_mov_b32 s52, 0x41880000 2085; GFX10PLUS-NEXT: s_mov_b32 s51, 0x41800000 2086; GFX10PLUS-NEXT: s_mov_b32 s50, 0x41700000 2087; GFX10PLUS-NEXT: s_mov_b32 s49, 0x41600000 2088; GFX10PLUS-NEXT: s_mov_b32 s48, 0x41500000 2089; GFX10PLUS-NEXT: s_mov_b32 s47, 0x41400000 2090; GFX10PLUS-NEXT: s_mov_b32 s46, 0x41300000 2091; GFX10PLUS-NEXT: s_mov_b32 s45, 0x41200000 2092; GFX10PLUS-NEXT: s_mov_b32 s44, 0x41100000 2093; GFX10PLUS-NEXT: s_mov_b32 s43, 0x41000000 2094; GFX10PLUS-NEXT: s_mov_b32 s42, 0x40e00000 2095; GFX10PLUS-NEXT: s_mov_b32 s41, 0x40c00000 2096; GFX10PLUS-NEXT: s_mov_b32 s40, 0x40a00000 2097; GFX10PLUS-NEXT: s_mov_b32 s39, 4.0 2098; GFX10PLUS-NEXT: s_mov_b32 s38, 0x40400000 2099; GFX10PLUS-NEXT: s_mov_b32 s37, 2.0 2100; GFX10PLUS-NEXT: s_movrels_b32 s0, s36 2101; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 2102; GFX10PLUS-NEXT: ; return to shader part epilog 2103entry: 2104 %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel 2105 ret float %ext 2106} 2107 2108define amdgpu_ps double @dyn_extract_v16f64_s_s(i32 inreg %sel) { 2109; GCN-LABEL: dyn_extract_v16f64_s_s: 2110; GCN: ; %bb.0: ; %entry 2111; GCN-NEXT: s_mov_b32 s66, 0 2112; GCN-NEXT: s_mov_b64 s[36:37], 1.0 2113; GCN-NEXT: s_mov_b32 m0, s2 2114; GCN-NEXT: s_mov_b32 s67, 0x40300000 2115; GCN-NEXT: s_mov_b32 s65, 0x402e0000 2116; GCN-NEXT: s_mov_b32 s64, s66 2117; GCN-NEXT: s_mov_b32 s63, 0x402c0000 2118; GCN-NEXT: s_mov_b32 s62, s66 2119; GCN-NEXT: s_mov_b32 s61, 0x402a0000 2120; GCN-NEXT: s_mov_b32 s60, s66 2121; GCN-NEXT: s_mov_b32 s59, 0x40280000 2122; GCN-NEXT: s_mov_b32 s58, s66 2123; GCN-NEXT: s_mov_b32 s57, 0x40260000 2124; GCN-NEXT: s_mov_b32 s56, s66 2125; GCN-NEXT: s_mov_b32 s55, 0x40240000 2126; GCN-NEXT: s_mov_b32 s54, s66 2127; GCN-NEXT: s_mov_b32 s53, 0x40220000 2128; GCN-NEXT: s_mov_b32 s52, s66 2129; GCN-NEXT: s_mov_b32 s51, 0x40200000 2130; GCN-NEXT: s_mov_b32 s50, s66 2131; GCN-NEXT: s_mov_b32 s49, 0x401c0000 2132; GCN-NEXT: s_mov_b32 s48, s66 2133; GCN-NEXT: s_mov_b32 s47, 0x40180000 2134; GCN-NEXT: s_mov_b32 s46, s66 2135; GCN-NEXT: s_mov_b32 s45, 0x40140000 2136; GCN-NEXT: s_mov_b32 s44, s66 2137; GCN-NEXT: s_mov_b64 s[42:43], 4.0 2138; GCN-NEXT: s_mov_b32 s41, 0x40080000 2139; GCN-NEXT: s_mov_b32 s40, s66 2140; GCN-NEXT: s_mov_b64 s[38:39], 2.0 2141; GCN-NEXT: s_movrels_b64 s[0:1], s[36:37] 2142; GCN-NEXT: ; return to shader part epilog 2143; 2144; GFX10PLUS-LABEL: dyn_extract_v16f64_s_s: 2145; GFX10PLUS: ; %bb.0: ; %entry 2146; GFX10PLUS-NEXT: s_mov_b32 s66, 0 2147; GFX10PLUS-NEXT: s_mov_b64 s[36:37], 1.0 2148; GFX10PLUS-NEXT: s_mov_b32 m0, s2 2149; GFX10PLUS-NEXT: s_mov_b32 s67, 0x40300000 2150; GFX10PLUS-NEXT: s_mov_b32 s65, 0x402e0000 2151; GFX10PLUS-NEXT: s_mov_b32 s64, s66 2152; GFX10PLUS-NEXT: s_mov_b32 s63, 0x402c0000 2153; GFX10PLUS-NEXT: s_mov_b32 s62, s66 2154; GFX10PLUS-NEXT: s_mov_b32 s61, 0x402a0000 2155; GFX10PLUS-NEXT: s_mov_b32 s60, s66 2156; GFX10PLUS-NEXT: s_mov_b32 s59, 0x40280000 2157; GFX10PLUS-NEXT: s_mov_b32 s58, s66 2158; GFX10PLUS-NEXT: s_mov_b32 s57, 0x40260000 2159; GFX10PLUS-NEXT: s_mov_b32 s56, s66 2160; GFX10PLUS-NEXT: s_mov_b32 s55, 0x40240000 2161; GFX10PLUS-NEXT: s_mov_b32 s54, s66 2162; GFX10PLUS-NEXT: s_mov_b32 s53, 0x40220000 2163; GFX10PLUS-NEXT: s_mov_b32 s52, s66 2164; GFX10PLUS-NEXT: s_mov_b32 s51, 0x40200000 2165; GFX10PLUS-NEXT: s_mov_b32 s50, s66 2166; GFX10PLUS-NEXT: s_mov_b32 s49, 0x401c0000 2167; GFX10PLUS-NEXT: s_mov_b32 s48, s66 2168; GFX10PLUS-NEXT: s_mov_b32 s47, 0x40180000 2169; GFX10PLUS-NEXT: s_mov_b32 s46, s66 2170; GFX10PLUS-NEXT: s_mov_b32 s45, 0x40140000 2171; GFX10PLUS-NEXT: s_mov_b32 s44, s66 2172; GFX10PLUS-NEXT: s_mov_b64 s[42:43], 4.0 2173; GFX10PLUS-NEXT: s_mov_b32 s41, 0x40080000 2174; GFX10PLUS-NEXT: s_mov_b32 s40, s66 2175; GFX10PLUS-NEXT: s_mov_b64 s[38:39], 2.0 2176; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[36:37] 2177; GFX10PLUS-NEXT: ; return to shader part epilog 2178entry: 2179 %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel 2180 ret double %ext 2181} 2182 2183define amdgpu_ps float @dyn_extract_v6f32_s_v(<6 x float> inreg %vec, i32 %sel) { 2184; GCN-LABEL: dyn_extract_v6f32_s_v: 2185; GCN: ; %bb.0: ; %entry 2186; GCN-NEXT: s_mov_b32 s0, s2 2187; GCN-NEXT: s_mov_b32 s1, s3 2188; GCN-NEXT: v_mov_b32_e32 v1, s0 2189; GCN-NEXT: v_mov_b32_e32 v2, s1 2190; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2191; GCN-NEXT: v_mov_b32_e32 v3, s4 2192; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2193; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2194; GCN-NEXT: v_mov_b32_e32 v4, s5 2195; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2196; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2197; GCN-NEXT: v_mov_b32_e32 v5, s6 2198; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2199; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2200; GCN-NEXT: v_mov_b32_e32 v6, s7 2201; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2202; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2203; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v6, vcc 2204; GCN-NEXT: ; return to shader part epilog 2205; 2206; GFX10PLUS-LABEL: dyn_extract_v6f32_s_v: 2207; GFX10PLUS: ; %bb.0: ; %entry 2208; GFX10PLUS-NEXT: s_mov_b32 s1, s3 2209; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2210; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s1 2211; GFX10PLUS-NEXT: s_mov_b32 s0, s2 2212; GFX10PLUS-NEXT: s_mov_b32 s2, s4 2213; GFX10PLUS-NEXT: s_mov_b32 s3, s5 2214; GFX10PLUS-NEXT: s_mov_b32 s4, s6 2215; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 2216; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2217; GFX10PLUS-NEXT: s_mov_b32 s5, s7 2218; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo 2219; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2220; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo 2221; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2222; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 2223; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2224; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s5, vcc_lo 2225; GFX10PLUS-NEXT: ; return to shader part epilog 2226entry: 2227 %ext = extractelement <6 x float> %vec, i32 %sel 2228 ret float %ext 2229} 2230 2231define float @dyn_extract_v6f32_v_v(<6 x float> %vec, i32 %sel) { 2232; GCN-LABEL: dyn_extract_v6f32_v_v: 2233; GCN: ; %bb.0: ; %entry 2234; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2235; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 2236; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2237; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v6 2238; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2239; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v6 2240; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2241; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v6 2242; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2243; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v6 2244; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 2245; GCN-NEXT: s_setpc_b64 s[30:31] 2246; 2247; GFX10PLUS-LABEL: dyn_extract_v6f32_v_v: 2248; GFX10PLUS: ; %bb.0: ; %entry 2249; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2250; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 2251; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 2252; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2253; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v6 2254; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2255; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v6 2256; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2257; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v6 2258; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2259; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v6 2260; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2261; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 2262entry: 2263 %ext = extractelement <6 x float> %vec, i32 %sel 2264 ret float %ext 2265} 2266 2267define amdgpu_ps float @dyn_extract_v6f32_v_s(<6 x float> %vec, i32 inreg %sel) { 2268; GCN-LABEL: dyn_extract_v6f32_v_s: 2269; GCN: ; %bb.0: ; %entry 2270; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 2271; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2272; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 2273; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2274; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 2275; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2276; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 2277; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2278; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 2279; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 2280; GCN-NEXT: ; return to shader part epilog 2281; 2282; GFX10PLUS-LABEL: dyn_extract_v6f32_v_s: 2283; GFX10PLUS: ; %bb.0: ; %entry 2284; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 2285; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2286; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 2287; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2288; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 2289; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2290; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 2291; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2292; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 2293; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2294; GFX10PLUS-NEXT: ; return to shader part epilog 2295entry: 2296 %ext = extractelement <6 x float> %vec, i32 %sel 2297 ret float %ext 2298} 2299 2300define amdgpu_ps float @dyn_extract_v6f32_s_s(<6 x float> inreg %vec, i32 inreg %sel) { 2301; GCN-LABEL: dyn_extract_v6f32_s_s: 2302; GCN: ; %bb.0: ; %entry 2303; GCN-NEXT: s_cmp_eq_u32 s8, 1 2304; GCN-NEXT: s_cselect_b32 s0, s3, s2 2305; GCN-NEXT: s_cmp_eq_u32 s8, 2 2306; GCN-NEXT: s_cselect_b32 s0, s4, s0 2307; GCN-NEXT: s_cmp_eq_u32 s8, 3 2308; GCN-NEXT: s_cselect_b32 s0, s5, s0 2309; GCN-NEXT: s_cmp_eq_u32 s8, 4 2310; GCN-NEXT: s_cselect_b32 s0, s6, s0 2311; GCN-NEXT: s_cmp_eq_u32 s8, 5 2312; GCN-NEXT: s_cselect_b32 s0, s7, s0 2313; GCN-NEXT: v_mov_b32_e32 v0, s0 2314; GCN-NEXT: ; return to shader part epilog 2315; 2316; GFX10PLUS-LABEL: dyn_extract_v6f32_s_s: 2317; GFX10PLUS: ; %bb.0: ; %entry 2318; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 1 2319; GFX10PLUS-NEXT: s_cselect_b32 s0, s3, s2 2320; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 2 2321; GFX10PLUS-NEXT: s_cselect_b32 s0, s4, s0 2322; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 3 2323; GFX10PLUS-NEXT: s_cselect_b32 s0, s5, s0 2324; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 4 2325; GFX10PLUS-NEXT: s_cselect_b32 s0, s6, s0 2326; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 5 2327; GFX10PLUS-NEXT: s_cselect_b32 s0, s7, s0 2328; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 2329; GFX10PLUS-NEXT: ; return to shader part epilog 2330entry: 2331 %ext = extractelement <6 x float> %vec, i32 %sel 2332 ret float %ext 2333} 2334 2335define amdgpu_ps float @dyn_extract_v7f32_s_v(<7 x float> inreg %vec, i32 %sel) { 2336; GCN-LABEL: dyn_extract_v7f32_s_v: 2337; GCN: ; %bb.0: ; %entry 2338; GCN-NEXT: s_mov_b32 s0, s2 2339; GCN-NEXT: s_mov_b32 s1, s3 2340; GCN-NEXT: s_mov_b32 s2, s4 2341; GCN-NEXT: v_mov_b32_e32 v1, s0 2342; GCN-NEXT: v_mov_b32_e32 v2, s1 2343; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2344; GCN-NEXT: v_mov_b32_e32 v3, s2 2345; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2346; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2347; GCN-NEXT: v_mov_b32_e32 v4, s5 2348; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2349; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2350; GCN-NEXT: v_mov_b32_e32 v5, s6 2351; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2352; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2353; GCN-NEXT: v_mov_b32_e32 v6, s7 2354; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2355; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2356; GCN-NEXT: v_mov_b32_e32 v7, s8 2357; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 2358; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 2359; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v7, vcc 2360; GCN-NEXT: ; return to shader part epilog 2361; 2362; GFX10PLUS-LABEL: dyn_extract_v7f32_s_v: 2363; GFX10PLUS: ; %bb.0: ; %entry 2364; GFX10PLUS-NEXT: s_mov_b32 s1, s3 2365; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2366; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s1 2367; GFX10PLUS-NEXT: s_mov_b32 s0, s2 2368; GFX10PLUS-NEXT: s_mov_b32 s2, s4 2369; GFX10PLUS-NEXT: s_mov_b32 s3, s5 2370; GFX10PLUS-NEXT: s_mov_b32 s4, s6 2371; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 2372; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2373; GFX10PLUS-NEXT: s_mov_b32 s5, s7 2374; GFX10PLUS-NEXT: s_mov_b32 s6, s8 2375; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo 2376; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2377; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo 2378; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2379; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 2380; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2381; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo 2382; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 2383; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s6, vcc_lo 2384; GFX10PLUS-NEXT: ; return to shader part epilog 2385entry: 2386 %ext = extractelement <7 x float> %vec, i32 %sel 2387 ret float %ext 2388} 2389 2390define float @dyn_extract_v7f32_v_v(<7 x float> %vec, i32 %sel) { 2391; GCN-LABEL: dyn_extract_v7f32_v_v: 2392; GCN: ; %bb.0: ; %entry 2393; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2394; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 2395; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2396; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v7 2397; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2398; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v7 2399; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2400; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v7 2401; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2402; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v7 2403; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 2404; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v7 2405; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 2406; GCN-NEXT: s_setpc_b64 s[30:31] 2407; 2408; GFX10PLUS-LABEL: dyn_extract_v7f32_v_v: 2409; GFX10PLUS: ; %bb.0: ; %entry 2410; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2411; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 2412; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7 2413; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2414; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v7 2415; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2416; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v7 2417; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2418; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v7 2419; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2420; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v7 2421; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2422; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v7 2423; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 2424; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 2425entry: 2426 %ext = extractelement <7 x float> %vec, i32 %sel 2427 ret float %ext 2428} 2429 2430define amdgpu_ps float @dyn_extract_v7f32_v_s(<7 x float> %vec, i32 inreg %sel) { 2431; GCN-LABEL: dyn_extract_v7f32_v_s: 2432; GCN: ; %bb.0: ; %entry 2433; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 2434; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2435; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 2436; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2437; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 2438; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2439; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 2440; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2441; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 2442; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 2443; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 2444; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 2445; GCN-NEXT: ; return to shader part epilog 2446; 2447; GFX10PLUS-LABEL: dyn_extract_v7f32_v_s: 2448; GFX10PLUS: ; %bb.0: ; %entry 2449; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 2450; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2451; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 2452; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2453; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 2454; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2455; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 2456; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2457; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 2458; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2459; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 6 2460; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 2461; GFX10PLUS-NEXT: ; return to shader part epilog 2462entry: 2463 %ext = extractelement <7 x float> %vec, i32 %sel 2464 ret float %ext 2465} 2466 2467define amdgpu_ps float @dyn_extract_v7f32_s_s(<7 x float> inreg %vec, i32 inreg %sel) { 2468; GCN-LABEL: dyn_extract_v7f32_s_s: 2469; GCN: ; %bb.0: ; %entry 2470; GCN-NEXT: s_cmp_eq_u32 s9, 1 2471; GCN-NEXT: s_cselect_b32 s0, s3, s2 2472; GCN-NEXT: s_cmp_eq_u32 s9, 2 2473; GCN-NEXT: s_cselect_b32 s0, s4, s0 2474; GCN-NEXT: s_cmp_eq_u32 s9, 3 2475; GCN-NEXT: s_cselect_b32 s0, s5, s0 2476; GCN-NEXT: s_cmp_eq_u32 s9, 4 2477; GCN-NEXT: s_cselect_b32 s0, s6, s0 2478; GCN-NEXT: s_cmp_eq_u32 s9, 5 2479; GCN-NEXT: s_cselect_b32 s0, s7, s0 2480; GCN-NEXT: s_cmp_eq_u32 s9, 6 2481; GCN-NEXT: s_cselect_b32 s0, s8, s0 2482; GCN-NEXT: v_mov_b32_e32 v0, s0 2483; GCN-NEXT: ; return to shader part epilog 2484; 2485; GFX10PLUS-LABEL: dyn_extract_v7f32_s_s: 2486; GFX10PLUS: ; %bb.0: ; %entry 2487; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 1 2488; GFX10PLUS-NEXT: s_cselect_b32 s0, s3, s2 2489; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 2 2490; GFX10PLUS-NEXT: s_cselect_b32 s0, s4, s0 2491; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 3 2492; GFX10PLUS-NEXT: s_cselect_b32 s0, s5, s0 2493; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 4 2494; GFX10PLUS-NEXT: s_cselect_b32 s0, s6, s0 2495; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 5 2496; GFX10PLUS-NEXT: s_cselect_b32 s0, s7, s0 2497; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 6 2498; GFX10PLUS-NEXT: s_cselect_b32 s0, s8, s0 2499; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 2500; GFX10PLUS-NEXT: ; return to shader part epilog 2501entry: 2502 %ext = extractelement <7 x float> %vec, i32 %sel 2503 ret float %ext 2504} 2505 2506define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel) { 2507; GCN-LABEL: dyn_extract_v6f64_s_v: 2508; GCN: ; %bb.0: ; %entry 2509; GCN-NEXT: s_mov_b32 s0, s2 2510; GCN-NEXT: s_mov_b32 s1, s3 2511; GCN-NEXT: s_mov_b32 s2, s4 2512; GCN-NEXT: s_mov_b32 s3, s5 2513; GCN-NEXT: s_mov_b32 s4, s6 2514; GCN-NEXT: s_mov_b32 s5, s7 2515; GCN-NEXT: v_mov_b32_e32 v1, s0 2516; GCN-NEXT: v_mov_b32_e32 v2, s1 2517; GCN-NEXT: v_mov_b32_e32 v3, s2 2518; GCN-NEXT: v_mov_b32_e32 v4, s3 2519; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2520; GCN-NEXT: s_mov_b32 s6, s8 2521; GCN-NEXT: s_mov_b32 s7, s9 2522; GCN-NEXT: v_mov_b32_e32 v5, s4 2523; GCN-NEXT: v_mov_b32_e32 v6, s5 2524; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2525; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2526; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2527; GCN-NEXT: v_mov_b32_e32 v7, s6 2528; GCN-NEXT: v_mov_b32_e32 v8, s7 2529; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2530; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 2531; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2532; GCN-NEXT: v_mov_b32_e32 v9, s10 2533; GCN-NEXT: v_mov_b32_e32 v10, s11 2534; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2535; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 2536; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2537; GCN-NEXT: v_mov_b32_e32 v11, s12 2538; GCN-NEXT: v_mov_b32_e32 v12, s13 2539; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2540; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 2541; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2542; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v11, vcc 2543; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v12, vcc 2544; GCN-NEXT: v_readfirstlane_b32 s0, v0 2545; GCN-NEXT: v_readfirstlane_b32 s1, v1 2546; GCN-NEXT: ; return to shader part epilog 2547; 2548; GFX10-LABEL: dyn_extract_v6f64_s_v: 2549; GFX10: ; %bb.0: ; %entry 2550; GFX10-NEXT: s_mov_b32 s0, s2 2551; GFX10-NEXT: s_mov_b32 s2, s4 2552; GFX10-NEXT: s_mov_b32 s15, s5 2553; GFX10-NEXT: v_mov_b32_e32 v1, s2 2554; GFX10-NEXT: v_mov_b32_e32 v2, s15 2555; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2556; GFX10-NEXT: s_mov_b32 s1, s3 2557; GFX10-NEXT: s_mov_b32 s4, s6 2558; GFX10-NEXT: s_mov_b32 s5, s7 2559; GFX10-NEXT: s_mov_b32 s6, s8 2560; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 2561; GFX10-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo 2562; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2563; GFX10-NEXT: s_mov_b32 s7, s9 2564; GFX10-NEXT: s_mov_b32 s8, s10 2565; GFX10-NEXT: s_mov_b32 s9, s11 2566; GFX10-NEXT: s_mov_b32 s10, s12 2567; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 2568; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo 2569; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2570; GFX10-NEXT: s_mov_b32 s11, s13 2571; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 2572; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 2573; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2574; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 2575; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 2576; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2577; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s10, vcc_lo 2578; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s11, vcc_lo 2579; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2580; GFX10-NEXT: v_readfirstlane_b32 s1, v1 2581; GFX10-NEXT: ; return to shader part epilog 2582; 2583; GFX11-LABEL: dyn_extract_v6f64_s_v: 2584; GFX11: ; %bb.0: ; %entry 2585; GFX11-NEXT: s_mov_b32 s0, s2 2586; GFX11-NEXT: s_mov_b32 s2, s4 2587; GFX11-NEXT: s_mov_b32 s15, s5 2588; GFX11-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s15 2589; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2590; GFX11-NEXT: s_mov_b32 s1, s3 2591; GFX11-NEXT: s_mov_b32 s4, s6 2592; GFX11-NEXT: s_mov_b32 s5, s7 2593; GFX11-NEXT: s_mov_b32 s6, s8 2594; GFX11-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 2595; GFX11-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo 2596; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2597; GFX11-NEXT: s_mov_b32 s7, s9 2598; GFX11-NEXT: s_mov_b32 s8, s10 2599; GFX11-NEXT: s_mov_b32 s9, s11 2600; GFX11-NEXT: s_mov_b32 s10, s12 2601; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 2602; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo 2603; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2604; GFX11-NEXT: s_mov_b32 s11, s13 2605; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 2606; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 2607; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2608; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 2609; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 2610; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2611; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s10, vcc_lo 2612; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s11, vcc_lo 2613; GFX11-NEXT: v_readfirstlane_b32 s0, v0 2614; GFX11-NEXT: v_readfirstlane_b32 s1, v1 2615; GFX11-NEXT: ; return to shader part epilog 2616entry: 2617 %ext = extractelement <6 x double> %vec, i32 %sel 2618 ret double %ext 2619} 2620 2621define double @dyn_extract_v6f64_v_v(<6 x double> %vec, i32 %sel) { 2622; GCN-LABEL: dyn_extract_v6f64_v_v: 2623; GCN: ; %bb.0: ; %entry 2624; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2625; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v12 2626; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2627; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2628; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v12 2629; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2630; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2631; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v12 2632; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 2633; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2634; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v12 2635; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 2636; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2637; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v12 2638; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 2639; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 2640; GCN-NEXT: s_setpc_b64 s[30:31] 2641; 2642; GFX10-LABEL: dyn_extract_v6f64_v_v: 2643; GFX10: ; %bb.0: ; %entry 2644; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2645; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2646; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 2647; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2648; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2649; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12 2650; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2651; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 2652; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12 2653; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 2654; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 2655; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12 2656; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 2657; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 2658; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v12 2659; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 2660; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 2661; GFX10-NEXT: s_setpc_b64 s[30:31] 2662; 2663; GFX11-LABEL: dyn_extract_v6f64_v_v: 2664; GFX11: ; %bb.0: ; %entry 2665; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2666; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2667; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 2668; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 2669; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12 2670; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 2671; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12 2672; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7 2673; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12 2674; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9 2675; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v12 2676; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11 2677; GFX11-NEXT: s_setpc_b64 s[30:31] 2678entry: 2679 %ext = extractelement <6 x double> %vec, i32 %sel 2680 ret double %ext 2681} 2682 2683define amdgpu_ps double @dyn_extract_v6f64_v_s(<6 x double> %vec, i32 inreg %sel) { 2684; GPRIDX-LABEL: dyn_extract_v6f64_v_s: 2685; GPRIDX: ; %bb.0: ; %entry 2686; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 2687; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) 2688; GPRIDX-NEXT: v_mov_b32_e32 v12, v0 2689; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 2690; GPRIDX-NEXT: s_set_gpr_idx_off 2691; GPRIDX-NEXT: v_readfirstlane_b32 s0, v12 2692; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 2693; GPRIDX-NEXT: ; return to shader part epilog 2694; 2695; MOVREL-LABEL: dyn_extract_v6f64_v_s: 2696; MOVREL: ; %bb.0: ; %entry 2697; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 2698; MOVREL-NEXT: v_movrels_b32_e32 v12, v0 2699; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 2700; MOVREL-NEXT: v_readfirstlane_b32 s0, v12 2701; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 2702; MOVREL-NEXT: ; return to shader part epilog 2703; 2704; GFX10PLUS-LABEL: dyn_extract_v6f64_v_s: 2705; GFX10PLUS: ; %bb.0: ; %entry 2706; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1 2707; GFX10PLUS-NEXT: v_movrels_b32_e32 v12, v0 2708; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v1 2709; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v12 2710; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v0 2711; GFX10PLUS-NEXT: ; return to shader part epilog 2712entry: 2713 %ext = extractelement <6 x double> %vec, i32 %sel 2714 ret double %ext 2715} 2716 2717define amdgpu_ps double @dyn_extract_v6f64_s_s(<6 x double> inreg %vec, i32 inreg %sel) { 2718; GCN-LABEL: dyn_extract_v6f64_s_s: 2719; GCN: ; %bb.0: ; %entry 2720; GCN-NEXT: s_mov_b32 s0, s2 2721; GCN-NEXT: s_mov_b32 s1, s3 2722; GCN-NEXT: s_mov_b32 m0, s14 2723; GCN-NEXT: s_mov_b32 s2, s4 2724; GCN-NEXT: s_mov_b32 s3, s5 2725; GCN-NEXT: s_mov_b32 s4, s6 2726; GCN-NEXT: s_mov_b32 s5, s7 2727; GCN-NEXT: s_mov_b32 s6, s8 2728; GCN-NEXT: s_mov_b32 s7, s9 2729; GCN-NEXT: s_mov_b32 s8, s10 2730; GCN-NEXT: s_mov_b32 s9, s11 2731; GCN-NEXT: s_mov_b32 s10, s12 2732; GCN-NEXT: s_mov_b32 s11, s13 2733; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] 2734; GCN-NEXT: ; return to shader part epilog 2735; 2736; GFX10PLUS-LABEL: dyn_extract_v6f64_s_s: 2737; GFX10PLUS: ; %bb.0: ; %entry 2738; GFX10PLUS-NEXT: s_mov_b32 s0, s2 2739; GFX10PLUS-NEXT: s_mov_b32 s1, s3 2740; GFX10PLUS-NEXT: s_mov_b32 m0, s14 2741; GFX10PLUS-NEXT: s_mov_b32 s2, s4 2742; GFX10PLUS-NEXT: s_mov_b32 s3, s5 2743; GFX10PLUS-NEXT: s_mov_b32 s4, s6 2744; GFX10PLUS-NEXT: s_mov_b32 s5, s7 2745; GFX10PLUS-NEXT: s_mov_b32 s6, s8 2746; GFX10PLUS-NEXT: s_mov_b32 s7, s9 2747; GFX10PLUS-NEXT: s_mov_b32 s8, s10 2748; GFX10PLUS-NEXT: s_mov_b32 s9, s11 2749; GFX10PLUS-NEXT: s_mov_b32 s10, s12 2750; GFX10PLUS-NEXT: s_mov_b32 s11, s13 2751; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[0:1] 2752; GFX10PLUS-NEXT: ; return to shader part epilog 2753entry: 2754 %ext = extractelement <6 x double> %vec, i32 %sel 2755 ret double %ext 2756} 2757 2758define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel) { 2759; GCN-LABEL: dyn_extract_v7f64_s_v: 2760; GCN: ; %bb.0: ; %entry 2761; GCN-NEXT: s_mov_b32 s0, s2 2762; GCN-NEXT: s_mov_b32 s1, s3 2763; GCN-NEXT: s_mov_b32 s2, s4 2764; GCN-NEXT: s_mov_b32 s3, s5 2765; GCN-NEXT: s_mov_b32 s4, s6 2766; GCN-NEXT: s_mov_b32 s5, s7 2767; GCN-NEXT: v_mov_b32_e32 v1, s0 2768; GCN-NEXT: v_mov_b32_e32 v2, s1 2769; GCN-NEXT: v_mov_b32_e32 v3, s2 2770; GCN-NEXT: v_mov_b32_e32 v4, s3 2771; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2772; GCN-NEXT: s_mov_b32 s6, s8 2773; GCN-NEXT: s_mov_b32 s7, s9 2774; GCN-NEXT: v_mov_b32_e32 v5, s4 2775; GCN-NEXT: v_mov_b32_e32 v6, s5 2776; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2777; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2778; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2779; GCN-NEXT: s_mov_b32 s8, s10 2780; GCN-NEXT: s_mov_b32 s9, s11 2781; GCN-NEXT: v_mov_b32_e32 v7, s6 2782; GCN-NEXT: v_mov_b32_e32 v8, s7 2783; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2784; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 2785; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2786; GCN-NEXT: v_mov_b32_e32 v9, s8 2787; GCN-NEXT: v_mov_b32_e32 v10, s9 2788; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2789; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 2790; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2791; GCN-NEXT: v_mov_b32_e32 v11, s12 2792; GCN-NEXT: v_mov_b32_e32 v12, s13 2793; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2794; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 2795; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2796; GCN-NEXT: v_mov_b32_e32 v13, s14 2797; GCN-NEXT: v_mov_b32_e32 v14, s15 2798; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 2799; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 2800; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 2801; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v13, vcc 2802; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v14, vcc 2803; GCN-NEXT: v_readfirstlane_b32 s0, v0 2804; GCN-NEXT: v_readfirstlane_b32 s1, v1 2805; GCN-NEXT: ; return to shader part epilog 2806; 2807; GFX10-LABEL: dyn_extract_v7f64_s_v: 2808; GFX10: ; %bb.0: ; %entry 2809; GFX10-NEXT: s_mov_b32 s0, s2 2810; GFX10-NEXT: s_mov_b32 s2, s4 2811; GFX10-NEXT: s_mov_b32 s19, s5 2812; GFX10-NEXT: v_mov_b32_e32 v1, s2 2813; GFX10-NEXT: v_mov_b32_e32 v2, s19 2814; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2815; GFX10-NEXT: s_mov_b32 s1, s3 2816; GFX10-NEXT: s_mov_b32 s4, s6 2817; GFX10-NEXT: s_mov_b32 s5, s7 2818; GFX10-NEXT: s_mov_b32 s6, s8 2819; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 2820; GFX10-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo 2821; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2822; GFX10-NEXT: s_mov_b32 s7, s9 2823; GFX10-NEXT: s_mov_b32 s8, s10 2824; GFX10-NEXT: s_mov_b32 s9, s11 2825; GFX10-NEXT: s_mov_b32 s10, s12 2826; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 2827; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo 2828; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2829; GFX10-NEXT: s_mov_b32 s11, s13 2830; GFX10-NEXT: s_mov_b32 s12, s14 2831; GFX10-NEXT: s_mov_b32 s13, s15 2832; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 2833; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 2834; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2835; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 2836; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 2837; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2838; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 2839; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 2840; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 2841; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s12, vcc_lo 2842; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s13, vcc_lo 2843; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2844; GFX10-NEXT: v_readfirstlane_b32 s1, v1 2845; GFX10-NEXT: ; return to shader part epilog 2846; 2847; GFX11-LABEL: dyn_extract_v7f64_s_v: 2848; GFX11: ; %bb.0: ; %entry 2849; GFX11-NEXT: s_mov_b32 s0, s2 2850; GFX11-NEXT: s_mov_b32 s2, s4 2851; GFX11-NEXT: s_mov_b32 s19, s5 2852; GFX11-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s19 2853; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2854; GFX11-NEXT: s_mov_b32 s1, s3 2855; GFX11-NEXT: s_mov_b32 s4, s6 2856; GFX11-NEXT: s_mov_b32 s5, s7 2857; GFX11-NEXT: s_mov_b32 s6, s8 2858; GFX11-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 2859; GFX11-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo 2860; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2861; GFX11-NEXT: s_mov_b32 s7, s9 2862; GFX11-NEXT: s_mov_b32 s8, s10 2863; GFX11-NEXT: s_mov_b32 s9, s11 2864; GFX11-NEXT: s_mov_b32 s10, s12 2865; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 2866; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo 2867; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2868; GFX11-NEXT: s_mov_b32 s11, s13 2869; GFX11-NEXT: s_mov_b32 s12, s14 2870; GFX11-NEXT: s_mov_b32 s13, s15 2871; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 2872; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 2873; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2874; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 2875; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 2876; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2877; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 2878; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 2879; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 2880; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s12, vcc_lo 2881; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s13, vcc_lo 2882; GFX11-NEXT: v_readfirstlane_b32 s0, v0 2883; GFX11-NEXT: v_readfirstlane_b32 s1, v1 2884; GFX11-NEXT: ; return to shader part epilog 2885entry: 2886 %ext = extractelement <7 x double> %vec, i32 %sel 2887 ret double %ext 2888} 2889 2890define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) { 2891; GCN-LABEL: dyn_extract_v7f64_v_v: 2892; GCN: ; %bb.0: ; %entry 2893; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2894; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v14 2895; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2896; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2897; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v14 2898; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2899; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2900; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v14 2901; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 2902; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2903; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v14 2904; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 2905; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2906; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v14 2907; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 2908; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 2909; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v14 2910; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 2911; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 2912; GCN-NEXT: s_setpc_b64 s[30:31] 2913; 2914; GFX10-LABEL: dyn_extract_v7f64_v_v: 2915; GFX10: ; %bb.0: ; %entry 2916; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2917; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2918; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14 2919; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2920; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2921; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v14 2922; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2923; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 2924; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v14 2925; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 2926; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 2927; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v14 2928; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 2929; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 2930; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v14 2931; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 2932; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 2933; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v14 2934; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 2935; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo 2936; GFX10-NEXT: s_setpc_b64 s[30:31] 2937; 2938; GFX11-LABEL: dyn_extract_v7f64_v_v: 2939; GFX11: ; %bb.0: ; %entry 2940; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2941; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2942; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14 2943; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 2944; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v14 2945; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 2946; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v14 2947; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7 2948; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v14 2949; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9 2950; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v14 2951; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11 2952; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v14 2953; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13 2954; GFX11-NEXT: s_setpc_b64 s[30:31] 2955entry: 2956 %ext = extractelement <7 x double> %vec, i32 %sel 2957 ret double %ext 2958} 2959 2960define amdgpu_ps double @dyn_extract_v7f64_v_s(<7 x double> %vec, i32 inreg %sel) { 2961; GPRIDX-LABEL: dyn_extract_v7f64_v_s: 2962; GPRIDX: ; %bb.0: ; %entry 2963; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 2964; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) 2965; GPRIDX-NEXT: v_mov_b32_e32 v14, v0 2966; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 2967; GPRIDX-NEXT: s_set_gpr_idx_off 2968; GPRIDX-NEXT: v_readfirstlane_b32 s0, v14 2969; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 2970; GPRIDX-NEXT: ; return to shader part epilog 2971; 2972; MOVREL-LABEL: dyn_extract_v7f64_v_s: 2973; MOVREL: ; %bb.0: ; %entry 2974; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 2975; MOVREL-NEXT: v_movrels_b32_e32 v14, v0 2976; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 2977; MOVREL-NEXT: v_readfirstlane_b32 s0, v14 2978; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 2979; MOVREL-NEXT: ; return to shader part epilog 2980; 2981; GFX10PLUS-LABEL: dyn_extract_v7f64_v_s: 2982; GFX10PLUS: ; %bb.0: ; %entry 2983; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1 2984; GFX10PLUS-NEXT: v_movrels_b32_e32 v14, v0 2985; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v1 2986; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v14 2987; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v0 2988; GFX10PLUS-NEXT: ; return to shader part epilog 2989entry: 2990 %ext = extractelement <7 x double> %vec, i32 %sel 2991 ret double %ext 2992} 2993 2994define amdgpu_ps double @dyn_extract_v7f64_s_s(<7 x double> inreg %vec, i32 inreg %sel) { 2995; GCN-LABEL: dyn_extract_v7f64_s_s: 2996; GCN: ; %bb.0: ; %entry 2997; GCN-NEXT: s_mov_b32 s0, s2 2998; GCN-NEXT: s_mov_b32 s1, s3 2999; GCN-NEXT: s_mov_b32 m0, s16 3000; GCN-NEXT: s_mov_b32 s2, s4 3001; GCN-NEXT: s_mov_b32 s3, s5 3002; GCN-NEXT: s_mov_b32 s4, s6 3003; GCN-NEXT: s_mov_b32 s5, s7 3004; GCN-NEXT: s_mov_b32 s6, s8 3005; GCN-NEXT: s_mov_b32 s7, s9 3006; GCN-NEXT: s_mov_b32 s8, s10 3007; GCN-NEXT: s_mov_b32 s9, s11 3008; GCN-NEXT: s_mov_b32 s10, s12 3009; GCN-NEXT: s_mov_b32 s11, s13 3010; GCN-NEXT: s_mov_b32 s12, s14 3011; GCN-NEXT: s_mov_b32 s13, s15 3012; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] 3013; GCN-NEXT: ; return to shader part epilog 3014; 3015; GFX10PLUS-LABEL: dyn_extract_v7f64_s_s: 3016; GFX10PLUS: ; %bb.0: ; %entry 3017; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3018; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3019; GFX10PLUS-NEXT: s_mov_b32 m0, s16 3020; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3021; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3022; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3023; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3024; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3025; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3026; GFX10PLUS-NEXT: s_mov_b32 s8, s10 3027; GFX10PLUS-NEXT: s_mov_b32 s9, s11 3028; GFX10PLUS-NEXT: s_mov_b32 s10, s12 3029; GFX10PLUS-NEXT: s_mov_b32 s11, s13 3030; GFX10PLUS-NEXT: s_mov_b32 s12, s14 3031; GFX10PLUS-NEXT: s_mov_b32 s13, s15 3032; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[0:1] 3033; GFX10PLUS-NEXT: ; return to shader part epilog 3034entry: 3035 %ext = extractelement <7 x double> %vec, i32 %sel 3036 ret double %ext 3037} 3038 3039define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32 %sel) { 3040; GPRIDX-LABEL: dyn_extract_v5f64_s_s: 3041; GPRIDX: .amd_kernel_code_t 3042; GPRIDX-NEXT: amd_code_version_major = 1 3043; GPRIDX-NEXT: amd_code_version_minor = 2 3044; GPRIDX-NEXT: amd_machine_kind = 1 3045; GPRIDX-NEXT: amd_machine_version_major = 9 3046; GPRIDX-NEXT: amd_machine_version_minor = 0 3047; GPRIDX-NEXT: amd_machine_version_stepping = 0 3048; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 3049; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 3050; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 3051; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1 3052; GPRIDX-NEXT: priority = 0 3053; GPRIDX-NEXT: float_mode = 240 3054; GPRIDX-NEXT: priv = 0 3055; GPRIDX-NEXT: enable_dx10_clamp = 1 3056; GPRIDX-NEXT: debug_mode = 0 3057; GPRIDX-NEXT: enable_ieee_mode = 1 3058; GPRIDX-NEXT: enable_wgp_mode = 0 3059; GPRIDX-NEXT: enable_mem_ordered = 0 3060; GPRIDX-NEXT: enable_fwd_progress = 0 3061; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3062; GPRIDX-NEXT: user_sgpr_count = 6 3063; GPRIDX-NEXT: enable_trap_handler = 0 3064; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 3065; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0 3066; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0 3067; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 3068; GPRIDX-NEXT: enable_vgpr_workitem_id = 0 3069; GPRIDX-NEXT: enable_exception_msb = 0 3070; GPRIDX-NEXT: granulated_lds_size = 0 3071; GPRIDX-NEXT: enable_exception = 0 3072; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 3073; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0 3074; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0 3075; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3076; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0 3077; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 3078; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 3079; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3080; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3081; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3082; GPRIDX-NEXT: enable_wavefront_size32 = 0 3083; GPRIDX-NEXT: enable_ordered_append_gds = 0 3084; GPRIDX-NEXT: private_element_size = 1 3085; GPRIDX-NEXT: is_ptr64 = 1 3086; GPRIDX-NEXT: is_dynamic_callstack = 0 3087; GPRIDX-NEXT: is_debug_enabled = 0 3088; GPRIDX-NEXT: is_xnack_enabled = 1 3089; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 3090; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 3091; GPRIDX-NEXT: gds_segment_byte_size = 0 3092; GPRIDX-NEXT: kernarg_segment_byte_size = 12 3093; GPRIDX-NEXT: workgroup_fbarrier_count = 0 3094; GPRIDX-NEXT: wavefront_sgpr_count = 9 3095; GPRIDX-NEXT: workitem_vgpr_count = 3 3096; GPRIDX-NEXT: reserved_vgpr_first = 0 3097; GPRIDX-NEXT: reserved_vgpr_count = 0 3098; GPRIDX-NEXT: reserved_sgpr_first = 0 3099; GPRIDX-NEXT: reserved_sgpr_count = 0 3100; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3101; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 3102; GPRIDX-NEXT: kernarg_segment_alignment = 4 3103; GPRIDX-NEXT: group_segment_alignment = 4 3104; GPRIDX-NEXT: private_segment_alignment = 4 3105; GPRIDX-NEXT: wavefront_size = 6 3106; GPRIDX-NEXT: call_convention = -1 3107; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 3108; GPRIDX-NEXT: .end_amd_kernel_code_t 3109; GPRIDX-NEXT: ; %bb.0: ; %entry 3110; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3111; GPRIDX-NEXT: s_load_dword s8, s[4:5], 0x8 3112; GPRIDX-NEXT: s_mov_b32 s2, 0 3113; GPRIDX-NEXT: s_mov_b32 s3, 0x40140000 3114; GPRIDX-NEXT: s_mov_b32 s5, 0x40080000 3115; GPRIDX-NEXT: s_mov_b32 s4, s2 3116; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) 3117; GPRIDX-NEXT: s_cmp_eq_u32 s8, 1 3118; GPRIDX-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 3119; GPRIDX-NEXT: s_cmp_eq_u32 s8, 2 3120; GPRIDX-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 3121; GPRIDX-NEXT: s_cmp_eq_u32 s8, 3 3122; GPRIDX-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5] 3123; GPRIDX-NEXT: s_cmp_eq_u32 s8, 4 3124; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 3125; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 3126; GPRIDX-NEXT: v_mov_b32_e32 v1, s3 3127; GPRIDX-NEXT: v_mov_b32_e32 v2, 0 3128; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 3129; GPRIDX-NEXT: s_endpgm 3130; 3131; MOVREL-LABEL: dyn_extract_v5f64_s_s: 3132; MOVREL: .amd_kernel_code_t 3133; MOVREL-NEXT: amd_code_version_major = 1 3134; MOVREL-NEXT: amd_code_version_minor = 2 3135; MOVREL-NEXT: amd_machine_kind = 1 3136; MOVREL-NEXT: amd_machine_version_major = 8 3137; MOVREL-NEXT: amd_machine_version_minor = 0 3138; MOVREL-NEXT: amd_machine_version_stepping = 3 3139; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 3140; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 3141; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 3142; MOVREL-NEXT: granulated_wavefront_sgpr_count = 1 3143; MOVREL-NEXT: priority = 0 3144; MOVREL-NEXT: float_mode = 240 3145; MOVREL-NEXT: priv = 0 3146; MOVREL-NEXT: enable_dx10_clamp = 1 3147; MOVREL-NEXT: debug_mode = 0 3148; MOVREL-NEXT: enable_ieee_mode = 1 3149; MOVREL-NEXT: enable_wgp_mode = 0 3150; MOVREL-NEXT: enable_mem_ordered = 0 3151; MOVREL-NEXT: enable_fwd_progress = 0 3152; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3153; MOVREL-NEXT: user_sgpr_count = 6 3154; MOVREL-NEXT: enable_trap_handler = 0 3155; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 3156; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0 3157; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0 3158; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 3159; MOVREL-NEXT: enable_vgpr_workitem_id = 0 3160; MOVREL-NEXT: enable_exception_msb = 0 3161; MOVREL-NEXT: granulated_lds_size = 0 3162; MOVREL-NEXT: enable_exception = 0 3163; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 3164; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0 3165; MOVREL-NEXT: enable_sgpr_queue_ptr = 0 3166; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3167; MOVREL-NEXT: enable_sgpr_dispatch_id = 0 3168; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 3169; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 3170; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3171; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3172; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3173; MOVREL-NEXT: enable_wavefront_size32 = 0 3174; MOVREL-NEXT: enable_ordered_append_gds = 0 3175; MOVREL-NEXT: private_element_size = 1 3176; MOVREL-NEXT: is_ptr64 = 1 3177; MOVREL-NEXT: is_dynamic_callstack = 0 3178; MOVREL-NEXT: is_debug_enabled = 0 3179; MOVREL-NEXT: is_xnack_enabled = 0 3180; MOVREL-NEXT: workitem_private_segment_byte_size = 0 3181; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 3182; MOVREL-NEXT: gds_segment_byte_size = 0 3183; MOVREL-NEXT: kernarg_segment_byte_size = 12 3184; MOVREL-NEXT: workgroup_fbarrier_count = 0 3185; MOVREL-NEXT: wavefront_sgpr_count = 9 3186; MOVREL-NEXT: workitem_vgpr_count = 4 3187; MOVREL-NEXT: reserved_vgpr_first = 0 3188; MOVREL-NEXT: reserved_vgpr_count = 0 3189; MOVREL-NEXT: reserved_sgpr_first = 0 3190; MOVREL-NEXT: reserved_sgpr_count = 0 3191; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3192; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 3193; MOVREL-NEXT: kernarg_segment_alignment = 4 3194; MOVREL-NEXT: group_segment_alignment = 4 3195; MOVREL-NEXT: private_segment_alignment = 4 3196; MOVREL-NEXT: wavefront_size = 6 3197; MOVREL-NEXT: call_convention = -1 3198; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 3199; MOVREL-NEXT: .end_amd_kernel_code_t 3200; MOVREL-NEXT: ; %bb.0: ; %entry 3201; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3202; MOVREL-NEXT: s_load_dword s8, s[4:5], 0x8 3203; MOVREL-NEXT: s_mov_b32 s2, 0 3204; MOVREL-NEXT: s_mov_b32 s3, 0x40140000 3205; MOVREL-NEXT: s_mov_b32 s5, 0x40080000 3206; MOVREL-NEXT: s_mov_b32 s4, s2 3207; MOVREL-NEXT: s_waitcnt lgkmcnt(0) 3208; MOVREL-NEXT: s_cmp_eq_u32 s8, 1 3209; MOVREL-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 3210; MOVREL-NEXT: s_cmp_eq_u32 s8, 2 3211; MOVREL-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 3212; MOVREL-NEXT: s_cmp_eq_u32 s8, 3 3213; MOVREL-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5] 3214; MOVREL-NEXT: s_cmp_eq_u32 s8, 4 3215; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 3216; MOVREL-NEXT: v_mov_b32_e32 v0, s2 3217; MOVREL-NEXT: v_mov_b32_e32 v3, s1 3218; MOVREL-NEXT: v_mov_b32_e32 v1, s3 3219; MOVREL-NEXT: v_mov_b32_e32 v2, s0 3220; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 3221; MOVREL-NEXT: s_endpgm 3222; 3223; GFX10-LABEL: dyn_extract_v5f64_s_s: 3224; GFX10: .amd_kernel_code_t 3225; GFX10-NEXT: amd_code_version_major = 1 3226; GFX10-NEXT: amd_code_version_minor = 2 3227; GFX10-NEXT: amd_machine_kind = 1 3228; GFX10-NEXT: amd_machine_version_major = 10 3229; GFX10-NEXT: amd_machine_version_minor = 1 3230; GFX10-NEXT: amd_machine_version_stepping = 0 3231; GFX10-NEXT: kernel_code_entry_byte_offset = 256 3232; GFX10-NEXT: kernel_code_prefetch_byte_size = 0 3233; GFX10-NEXT: granulated_workitem_vgpr_count = 0 3234; GFX10-NEXT: granulated_wavefront_sgpr_count = 1 3235; GFX10-NEXT: priority = 0 3236; GFX10-NEXT: float_mode = 240 3237; GFX10-NEXT: priv = 0 3238; GFX10-NEXT: enable_dx10_clamp = 1 3239; GFX10-NEXT: debug_mode = 0 3240; GFX10-NEXT: enable_ieee_mode = 1 3241; GFX10-NEXT: enable_wgp_mode = 1 3242; GFX10-NEXT: enable_mem_ordered = 1 3243; GFX10-NEXT: enable_fwd_progress = 0 3244; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3245; GFX10-NEXT: user_sgpr_count = 6 3246; GFX10-NEXT: enable_trap_handler = 0 3247; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1 3248; GFX10-NEXT: enable_sgpr_workgroup_id_y = 0 3249; GFX10-NEXT: enable_sgpr_workgroup_id_z = 0 3250; GFX10-NEXT: enable_sgpr_workgroup_info = 0 3251; GFX10-NEXT: enable_vgpr_workitem_id = 0 3252; GFX10-NEXT: enable_exception_msb = 0 3253; GFX10-NEXT: granulated_lds_size = 0 3254; GFX10-NEXT: enable_exception = 0 3255; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1 3256; GFX10-NEXT: enable_sgpr_dispatch_ptr = 0 3257; GFX10-NEXT: enable_sgpr_queue_ptr = 0 3258; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3259; GFX10-NEXT: enable_sgpr_dispatch_id = 0 3260; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0 3261; GFX10-NEXT: enable_sgpr_private_segment_size = 0 3262; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3263; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3264; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3265; GFX10-NEXT: enable_wavefront_size32 = 1 3266; GFX10-NEXT: enable_ordered_append_gds = 0 3267; GFX10-NEXT: private_element_size = 1 3268; GFX10-NEXT: is_ptr64 = 1 3269; GFX10-NEXT: is_dynamic_callstack = 0 3270; GFX10-NEXT: is_debug_enabled = 0 3271; GFX10-NEXT: is_xnack_enabled = 1 3272; GFX10-NEXT: workitem_private_segment_byte_size = 0 3273; GFX10-NEXT: workgroup_group_segment_byte_size = 0 3274; GFX10-NEXT: gds_segment_byte_size = 0 3275; GFX10-NEXT: kernarg_segment_byte_size = 12 3276; GFX10-NEXT: workgroup_fbarrier_count = 0 3277; GFX10-NEXT: wavefront_sgpr_count = 9 3278; GFX10-NEXT: workitem_vgpr_count = 3 3279; GFX10-NEXT: reserved_vgpr_first = 0 3280; GFX10-NEXT: reserved_vgpr_count = 0 3281; GFX10-NEXT: reserved_sgpr_first = 0 3282; GFX10-NEXT: reserved_sgpr_count = 0 3283; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3284; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0 3285; GFX10-NEXT: kernarg_segment_alignment = 4 3286; GFX10-NEXT: group_segment_alignment = 4 3287; GFX10-NEXT: private_segment_alignment = 4 3288; GFX10-NEXT: wavefront_size = 5 3289; GFX10-NEXT: call_convention = -1 3290; GFX10-NEXT: runtime_loader_kernel_symbol = 0 3291; GFX10-NEXT: .end_amd_kernel_code_t 3292; GFX10-NEXT: ; %bb.0: ; %entry 3293; GFX10-NEXT: s_clause 0x1 3294; GFX10-NEXT: s_load_dword s8, s[4:5], 0x8 3295; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3296; GFX10-NEXT: s_mov_b32 s2, 0 3297; GFX10-NEXT: s_mov_b32 s3, 0x40140000 3298; GFX10-NEXT: s_mov_b32 s5, 0x40080000 3299; GFX10-NEXT: s_mov_b32 s4, s2 3300; GFX10-NEXT: v_mov_b32_e32 v2, 0 3301; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3302; GFX10-NEXT: s_cmp_eq_u32 s8, 1 3303; GFX10-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 3304; GFX10-NEXT: s_cmp_eq_u32 s8, 2 3305; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 3306; GFX10-NEXT: s_cmp_eq_u32 s8, 3 3307; GFX10-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5] 3308; GFX10-NEXT: s_cmp_eq_u32 s8, 4 3309; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 3310; GFX10-NEXT: v_mov_b32_e32 v0, s2 3311; GFX10-NEXT: v_mov_b32_e32 v1, s3 3312; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 3313; GFX10-NEXT: s_endpgm 3314; 3315; GFX11-LABEL: dyn_extract_v5f64_s_s: 3316; GFX11: .amd_kernel_code_t 3317; GFX11-NEXT: amd_code_version_major = 1 3318; GFX11-NEXT: amd_code_version_minor = 2 3319; GFX11-NEXT: amd_machine_kind = 1 3320; GFX11-NEXT: amd_machine_version_major = 11 3321; GFX11-NEXT: amd_machine_version_minor = 0 3322; GFX11-NEXT: amd_machine_version_stepping = 0 3323; GFX11-NEXT: kernel_code_entry_byte_offset = 256 3324; GFX11-NEXT: kernel_code_prefetch_byte_size = 0 3325; GFX11-NEXT: granulated_workitem_vgpr_count = 0 3326; GFX11-NEXT: granulated_wavefront_sgpr_count = 1 3327; GFX11-NEXT: priority = 0 3328; GFX11-NEXT: float_mode = 240 3329; GFX11-NEXT: priv = 0 3330; GFX11-NEXT: enable_dx10_clamp = 1 3331; GFX11-NEXT: debug_mode = 0 3332; GFX11-NEXT: enable_ieee_mode = 1 3333; GFX11-NEXT: enable_wgp_mode = 1 3334; GFX11-NEXT: enable_mem_ordered = 1 3335; GFX11-NEXT: enable_fwd_progress = 0 3336; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3337; GFX11-NEXT: user_sgpr_count = 15 3338; GFX11-NEXT: enable_trap_handler = 0 3339; GFX11-NEXT: enable_sgpr_workgroup_id_x = 1 3340; GFX11-NEXT: enable_sgpr_workgroup_id_y = 0 3341; GFX11-NEXT: enable_sgpr_workgroup_id_z = 0 3342; GFX11-NEXT: enable_sgpr_workgroup_info = 0 3343; GFX11-NEXT: enable_vgpr_workitem_id = 0 3344; GFX11-NEXT: enable_exception_msb = 0 3345; GFX11-NEXT: granulated_lds_size = 0 3346; GFX11-NEXT: enable_exception = 0 3347; GFX11-NEXT: enable_sgpr_private_segment_buffer = 0 3348; GFX11-NEXT: enable_sgpr_dispatch_ptr = 0 3349; GFX11-NEXT: enable_sgpr_queue_ptr = 0 3350; GFX11-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3351; GFX11-NEXT: enable_sgpr_dispatch_id = 0 3352; GFX11-NEXT: enable_sgpr_flat_scratch_init = 0 3353; GFX11-NEXT: enable_sgpr_private_segment_size = 0 3354; GFX11-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3355; GFX11-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3356; GFX11-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3357; GFX11-NEXT: enable_wavefront_size32 = 1 3358; GFX11-NEXT: enable_ordered_append_gds = 0 3359; GFX11-NEXT: private_element_size = 1 3360; GFX11-NEXT: is_ptr64 = 1 3361; GFX11-NEXT: is_dynamic_callstack = 0 3362; GFX11-NEXT: is_debug_enabled = 0 3363; GFX11-NEXT: is_xnack_enabled = 0 3364; GFX11-NEXT: workitem_private_segment_byte_size = 0 3365; GFX11-NEXT: workgroup_group_segment_byte_size = 0 3366; GFX11-NEXT: gds_segment_byte_size = 0 3367; GFX11-NEXT: kernarg_segment_byte_size = 12 3368; GFX11-NEXT: workgroup_fbarrier_count = 0 3369; GFX11-NEXT: wavefront_sgpr_count = 9 3370; GFX11-NEXT: workitem_vgpr_count = 3 3371; GFX11-NEXT: reserved_vgpr_first = 0 3372; GFX11-NEXT: reserved_vgpr_count = 0 3373; GFX11-NEXT: reserved_sgpr_first = 0 3374; GFX11-NEXT: reserved_sgpr_count = 0 3375; GFX11-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3376; GFX11-NEXT: debug_private_segment_buffer_sgpr = 0 3377; GFX11-NEXT: kernarg_segment_alignment = 4 3378; GFX11-NEXT: group_segment_alignment = 4 3379; GFX11-NEXT: private_segment_alignment = 4 3380; GFX11-NEXT: wavefront_size = 5 3381; GFX11-NEXT: call_convention = -1 3382; GFX11-NEXT: runtime_loader_kernel_symbol = 0 3383; GFX11-NEXT: .end_amd_kernel_code_t 3384; GFX11-NEXT: ; %bb.0: ; %entry 3385; GFX11-NEXT: s_clause 0x1 3386; GFX11-NEXT: s_load_b32 s8, s[0:1], 0x8 3387; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 3388; GFX11-NEXT: s_mov_b32 s2, 0 3389; GFX11-NEXT: s_mov_b32 s3, 0x40140000 3390; GFX11-NEXT: s_mov_b32 s5, 0x40080000 3391; GFX11-NEXT: s_mov_b32 s4, s2 3392; GFX11-NEXT: v_mov_b32_e32 v2, 0 3393; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3394; GFX11-NEXT: s_cmp_eq_u32 s8, 1 3395; GFX11-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 3396; GFX11-NEXT: s_cmp_eq_u32 s8, 2 3397; GFX11-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 3398; GFX11-NEXT: s_cmp_eq_u32 s8, 3 3399; GFX11-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5] 3400; GFX11-NEXT: s_cmp_eq_u32 s8, 4 3401; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 3402; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 3403; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 3404; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 3405; GFX11-NEXT: s_endpgm 3406entry: 3407 %ext = extractelement <5 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, i32 %sel 3408 store double %ext, double addrspace(1)* %out 3409 ret void 3410} 3411 3412define float @dyn_extract_v15f32_const_s_v(i32 %sel) { 3413; GCN-LABEL: dyn_extract_v15f32_const_s_v: 3414; GCN: ; %bb.0: ; %entry 3415; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3416; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 3417; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000 3418; GCN-NEXT: v_cndmask_b32_e64 v13, 1.0, 2.0, vcc 3419; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 3420; GCN-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc 3421; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 3422; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000 3423; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc 3424; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 3425; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000 3426; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3427; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 3428; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000 3429; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 3430; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 3431; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000 3432; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 3433; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 3434; GCN-NEXT: v_mov_b32_e32 v6, 0x41100000 3435; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 3436; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0 3437; GCN-NEXT: v_mov_b32_e32 v7, 0x41200000 3438; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 3439; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0 3440; GCN-NEXT: v_mov_b32_e32 v8, 0x41300000 3441; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 3442; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0 3443; GCN-NEXT: v_mov_b32_e32 v9, 0x41400000 3444; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 3445; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0 3446; GCN-NEXT: v_mov_b32_e32 v10, 0x41500000 3447; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 3448; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0 3449; GCN-NEXT: v_mov_b32_e32 v11, 0x41600000 3450; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc 3451; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0 3452; GCN-NEXT: v_mov_b32_e32 v12, 0x41700000 3453; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 3454; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0 3455; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v12, vcc 3456; GCN-NEXT: s_setpc_b64 s[30:31] 3457; 3458; GFX10PLUS-LABEL: dyn_extract_v15f32_const_s_v: 3459; GFX10PLUS: ; %bb.0: ; %entry 3460; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3461; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 3462; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 3463; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo 3464; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 3465; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo 3466; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 3467; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo 3468; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 3469; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo 3470; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 3471; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo 3472; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 3473; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo 3474; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 3475; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo 3476; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 3477; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo 3478; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 3479; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo 3480; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 3481; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo 3482; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 3483; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo 3484; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 3485; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo 3486; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 3487; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo 3488; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 3489; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, 0x41700000, vcc_lo 3490; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 3491entry: 3492 %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel 3493 ret float %ext 3494} 3495 3496define amdgpu_ps float @dyn_extract_v15f32_const_s_s(i32 inreg %sel) { 3497; GCN-LABEL: dyn_extract_v15f32_const_s_s: 3498; GCN: ; %bb.0: ; %entry 3499; GCN-NEXT: s_mov_b32 s4, 1.0 3500; GCN-NEXT: s_mov_b32 m0, s2 3501; GCN-NEXT: s_mov_b32 s18, 0x41700000 3502; GCN-NEXT: s_mov_b32 s17, 0x41600000 3503; GCN-NEXT: s_mov_b32 s16, 0x41500000 3504; GCN-NEXT: s_mov_b32 s15, 0x41400000 3505; GCN-NEXT: s_mov_b32 s14, 0x41300000 3506; GCN-NEXT: s_mov_b32 s13, 0x41200000 3507; GCN-NEXT: s_mov_b32 s12, 0x41100000 3508; GCN-NEXT: s_mov_b32 s11, 0x41000000 3509; GCN-NEXT: s_mov_b32 s10, 0x40e00000 3510; GCN-NEXT: s_mov_b32 s9, 0x40c00000 3511; GCN-NEXT: s_mov_b32 s8, 0x40a00000 3512; GCN-NEXT: s_mov_b32 s7, 4.0 3513; GCN-NEXT: s_mov_b32 s6, 0x40400000 3514; GCN-NEXT: s_mov_b32 s5, 2.0 3515; GCN-NEXT: s_movrels_b32 s0, s4 3516; GCN-NEXT: v_mov_b32_e32 v0, s0 3517; GCN-NEXT: ; return to shader part epilog 3518; 3519; GFX10PLUS-LABEL: dyn_extract_v15f32_const_s_s: 3520; GFX10PLUS: ; %bb.0: ; %entry 3521; GFX10PLUS-NEXT: s_mov_b32 s4, 1.0 3522; GFX10PLUS-NEXT: s_mov_b32 m0, s2 3523; GFX10PLUS-NEXT: s_mov_b32 s18, 0x41700000 3524; GFX10PLUS-NEXT: s_mov_b32 s17, 0x41600000 3525; GFX10PLUS-NEXT: s_mov_b32 s16, 0x41500000 3526; GFX10PLUS-NEXT: s_mov_b32 s15, 0x41400000 3527; GFX10PLUS-NEXT: s_mov_b32 s14, 0x41300000 3528; GFX10PLUS-NEXT: s_mov_b32 s13, 0x41200000 3529; GFX10PLUS-NEXT: s_mov_b32 s12, 0x41100000 3530; GFX10PLUS-NEXT: s_mov_b32 s11, 0x41000000 3531; GFX10PLUS-NEXT: s_mov_b32 s10, 0x40e00000 3532; GFX10PLUS-NEXT: s_mov_b32 s9, 0x40c00000 3533; GFX10PLUS-NEXT: s_mov_b32 s8, 0x40a00000 3534; GFX10PLUS-NEXT: s_mov_b32 s7, 4.0 3535; GFX10PLUS-NEXT: s_mov_b32 s6, 0x40400000 3536; GFX10PLUS-NEXT: s_mov_b32 s5, 2.0 3537; GFX10PLUS-NEXT: s_movrels_b32 s0, s4 3538; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 3539; GFX10PLUS-NEXT: ; return to shader part epilog 3540entry: 3541 %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel 3542 ret float %ext 3543} 3544 3545define amdgpu_ps float @dyn_extract_v15f32_s_v(<15 x float> inreg %vec, i32 %sel) { 3546; GCN-LABEL: dyn_extract_v15f32_s_v: 3547; GCN: ; %bb.0: ; %entry 3548; GCN-NEXT: s_mov_b32 s0, s2 3549; GCN-NEXT: s_mov_b32 s1, s3 3550; GCN-NEXT: s_mov_b32 s2, s4 3551; GCN-NEXT: v_mov_b32_e32 v1, s0 3552; GCN-NEXT: v_mov_b32_e32 v2, s1 3553; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 3554; GCN-NEXT: s_mov_b32 s3, s5 3555; GCN-NEXT: v_mov_b32_e32 v3, s2 3556; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3557; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 3558; GCN-NEXT: s_mov_b32 s4, s6 3559; GCN-NEXT: v_mov_b32_e32 v4, s3 3560; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 3561; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 3562; GCN-NEXT: s_mov_b32 s5, s7 3563; GCN-NEXT: v_mov_b32_e32 v5, s4 3564; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 3565; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 3566; GCN-NEXT: s_mov_b32 s6, s8 3567; GCN-NEXT: v_mov_b32_e32 v6, s5 3568; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 3569; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 3570; GCN-NEXT: s_mov_b32 s7, s9 3571; GCN-NEXT: v_mov_b32_e32 v7, s6 3572; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 3573; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 3574; GCN-NEXT: s_mov_b32 s8, s10 3575; GCN-NEXT: v_mov_b32_e32 v8, s7 3576; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 3577; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 3578; GCN-NEXT: s_mov_b32 s9, s11 3579; GCN-NEXT: v_mov_b32_e32 v9, s8 3580; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 3581; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0 3582; GCN-NEXT: s_mov_b32 s10, s12 3583; GCN-NEXT: v_mov_b32_e32 v10, s9 3584; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 3585; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0 3586; GCN-NEXT: v_mov_b32_e32 v11, s10 3587; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc 3588; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0 3589; GCN-NEXT: v_mov_b32_e32 v12, s13 3590; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 3591; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0 3592; GCN-NEXT: v_mov_b32_e32 v13, s14 3593; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc 3594; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0 3595; GCN-NEXT: v_mov_b32_e32 v14, s15 3596; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 3597; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0 3598; GCN-NEXT: v_mov_b32_e32 v15, s16 3599; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc 3600; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0 3601; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 3602; GCN-NEXT: ; return to shader part epilog 3603; 3604; GFX10PLUS-LABEL: dyn_extract_v15f32_s_v: 3605; GFX10PLUS: ; %bb.0: ; %entry 3606; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3607; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 3608; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s1 3609; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3610; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3611; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3612; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3613; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 3614; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 3615; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3616; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3617; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3618; GFX10PLUS-NEXT: s_mov_b32 s8, s10 3619; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo 3620; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 3621; GFX10PLUS-NEXT: s_mov_b32 s9, s11 3622; GFX10PLUS-NEXT: s_mov_b32 s10, s12 3623; GFX10PLUS-NEXT: s_mov_b32 s11, s13 3624; GFX10PLUS-NEXT: s_mov_b32 s12, s14 3625; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo 3626; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 3627; GFX10PLUS-NEXT: s_mov_b32 s13, s15 3628; GFX10PLUS-NEXT: s_mov_b32 s14, s16 3629; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 3630; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 3631; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo 3632; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 3633; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 3634; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 3635; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo 3636; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 3637; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 3638; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 3639; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s9, vcc_lo 3640; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 3641; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 3642; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 3643; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s11, vcc_lo 3644; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 3645; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 3646; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 3647; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s13, vcc_lo 3648; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 3649; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo 3650; GFX10PLUS-NEXT: ; return to shader part epilog 3651entry: 3652 %ext = extractelement <15 x float> %vec, i32 %sel 3653 ret float %ext 3654} 3655 3656define float @dyn_extract_v15f32_v_v(<15 x float> %vec, i32 %sel) { 3657; GCN-LABEL: dyn_extract_v15f32_v_v: 3658; GCN: ; %bb.0: ; %entry 3659; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3660; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 3661; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3662; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 3663; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3664; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 3665; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3666; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 3667; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 3668; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 3669; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 3670; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 3671; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 3672; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 3673; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 3674; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 3675; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 3676; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 3677; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 3678; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 3679; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 3680; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 3681; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc 3682; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 3683; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 3684; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 3685; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc 3686; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 3687; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 3688; GCN-NEXT: s_setpc_b64 s[30:31] 3689; 3690; GFX10PLUS-LABEL: dyn_extract_v15f32_v_v: 3691; GFX10PLUS: ; %bb.0: ; %entry 3692; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3693; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 3694; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 3695; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 3696; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 3697; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3698; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 3699; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 3700; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 3701; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 3702; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 3703; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 3704; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 3705; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 3706; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 3707; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 3708; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 3709; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 3710; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 3711; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo 3712; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 3713; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 3714; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 3715; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo 3716; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 3717; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 3718; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 3719; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo 3720; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 3721; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 3722; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 3723entry: 3724 %ext = extractelement <15 x float> %vec, i32 %sel 3725 ret float %ext 3726} 3727 3728define amdgpu_ps float @dyn_extract_v15f32_v_s(<15 x float> %vec, i32 inreg %sel) { 3729; GPRIDX-LABEL: dyn_extract_v15f32_v_s: 3730; GPRIDX: ; %bb.0: ; %entry 3731; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) 3732; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 3733; GPRIDX-NEXT: s_set_gpr_idx_off 3734; GPRIDX-NEXT: ; return to shader part epilog 3735; 3736; MOVREL-LABEL: dyn_extract_v15f32_v_s: 3737; MOVREL: ; %bb.0: ; %entry 3738; MOVREL-NEXT: s_mov_b32 m0, s2 3739; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 3740; MOVREL-NEXT: ; return to shader part epilog 3741; 3742; GFX10PLUS-LABEL: dyn_extract_v15f32_v_s: 3743; GFX10PLUS: ; %bb.0: ; %entry 3744; GFX10PLUS-NEXT: s_mov_b32 m0, s2 3745; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0 3746; GFX10PLUS-NEXT: ; return to shader part epilog 3747entry: 3748 %ext = extractelement <15 x float> %vec, i32 %sel 3749 ret float %ext 3750} 3751 3752define amdgpu_ps float @dyn_extract_v15f32_s_s(<15 x float> inreg %vec, i32 inreg %sel) { 3753; GCN-LABEL: dyn_extract_v15f32_s_s: 3754; GCN: ; %bb.0: ; %entry 3755; GCN-NEXT: s_mov_b32 s0, s2 3756; GCN-NEXT: s_mov_b32 m0, s17 3757; GCN-NEXT: s_mov_b32 s1, s3 3758; GCN-NEXT: s_mov_b32 s2, s4 3759; GCN-NEXT: s_mov_b32 s3, s5 3760; GCN-NEXT: s_mov_b32 s4, s6 3761; GCN-NEXT: s_mov_b32 s5, s7 3762; GCN-NEXT: s_mov_b32 s6, s8 3763; GCN-NEXT: s_mov_b32 s7, s9 3764; GCN-NEXT: s_mov_b32 s8, s10 3765; GCN-NEXT: s_mov_b32 s9, s11 3766; GCN-NEXT: s_mov_b32 s10, s12 3767; GCN-NEXT: s_mov_b32 s11, s13 3768; GCN-NEXT: s_mov_b32 s12, s14 3769; GCN-NEXT: s_mov_b32 s13, s15 3770; GCN-NEXT: s_mov_b32 s14, s16 3771; GCN-NEXT: s_movrels_b32 s0, s0 3772; GCN-NEXT: v_mov_b32_e32 v0, s0 3773; GCN-NEXT: ; return to shader part epilog 3774; 3775; GFX10PLUS-LABEL: dyn_extract_v15f32_s_s: 3776; GFX10PLUS: ; %bb.0: ; %entry 3777; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3778; GFX10PLUS-NEXT: s_mov_b32 m0, s17 3779; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3780; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3781; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3782; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3783; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3784; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3785; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3786; GFX10PLUS-NEXT: s_mov_b32 s8, s10 3787; GFX10PLUS-NEXT: s_mov_b32 s9, s11 3788; GFX10PLUS-NEXT: s_mov_b32 s10, s12 3789; GFX10PLUS-NEXT: s_mov_b32 s11, s13 3790; GFX10PLUS-NEXT: s_mov_b32 s12, s14 3791; GFX10PLUS-NEXT: s_mov_b32 s13, s15 3792; GFX10PLUS-NEXT: s_mov_b32 s14, s16 3793; GFX10PLUS-NEXT: s_movrels_b32 s0, s0 3794; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 3795; GFX10PLUS-NEXT: ; return to shader part epilog 3796entry: 3797 %ext = extractelement <15 x float> %vec, i32 %sel 3798 ret float %ext 3799} 3800 3801define amdgpu_ps float @dyn_extract_v15f32_s_s_offset3(<15 x float> inreg %vec, i32 inreg %sel) { 3802; GCN-LABEL: dyn_extract_v15f32_s_s_offset3: 3803; GCN: ; %bb.0: ; %entry 3804; GCN-NEXT: s_mov_b32 s0, s2 3805; GCN-NEXT: s_mov_b32 s1, s3 3806; GCN-NEXT: s_mov_b32 s3, s5 3807; GCN-NEXT: s_mov_b32 m0, s17 3808; GCN-NEXT: s_mov_b32 s2, s4 3809; GCN-NEXT: s_mov_b32 s4, s6 3810; GCN-NEXT: s_mov_b32 s5, s7 3811; GCN-NEXT: s_mov_b32 s6, s8 3812; GCN-NEXT: s_mov_b32 s7, s9 3813; GCN-NEXT: s_mov_b32 s8, s10 3814; GCN-NEXT: s_mov_b32 s9, s11 3815; GCN-NEXT: s_mov_b32 s10, s12 3816; GCN-NEXT: s_mov_b32 s11, s13 3817; GCN-NEXT: s_mov_b32 s12, s14 3818; GCN-NEXT: s_mov_b32 s13, s15 3819; GCN-NEXT: s_mov_b32 s14, s16 3820; GCN-NEXT: s_movrels_b32 s0, s3 3821; GCN-NEXT: v_mov_b32_e32 v0, s0 3822; GCN-NEXT: ; return to shader part epilog 3823; 3824; GFX10PLUS-LABEL: dyn_extract_v15f32_s_s_offset3: 3825; GFX10PLUS: ; %bb.0: ; %entry 3826; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3827; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3828; GFX10PLUS-NEXT: s_mov_b32 m0, s17 3829; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3830; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3831; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3832; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3833; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3834; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3835; GFX10PLUS-NEXT: s_mov_b32 s8, s10 3836; GFX10PLUS-NEXT: s_mov_b32 s9, s11 3837; GFX10PLUS-NEXT: s_mov_b32 s10, s12 3838; GFX10PLUS-NEXT: s_mov_b32 s11, s13 3839; GFX10PLUS-NEXT: s_mov_b32 s12, s14 3840; GFX10PLUS-NEXT: s_mov_b32 s13, s15 3841; GFX10PLUS-NEXT: s_mov_b32 s14, s16 3842; GFX10PLUS-NEXT: s_movrels_b32 s0, s3 3843; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 3844; GFX10PLUS-NEXT: ; return to shader part epilog 3845entry: 3846 %add = add i32 %sel, 3 3847 %ext = extractelement <15 x float> %vec, i32 %add 3848 ret float %ext 3849} 3850 3851define float @dyn_extract_v15f32_v_v_offset3(<15 x float> %vec, i32 %sel) { 3852; GPRIDX-LABEL: dyn_extract_v15f32_v_v_offset3: 3853; GPRIDX: ; %bb.0: ; %entry 3854; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3855; GPRIDX-NEXT: v_add_u32_e32 v15, 3, v15 3856; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 3857; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3858; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 3859; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3860; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 3861; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3862; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 3863; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 3864; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 3865; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 3866; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 3867; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 3868; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 3869; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 3870; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 3871; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 3872; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 3873; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 3874; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 3875; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 3876; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 3877; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc 3878; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 3879; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 3880; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 3881; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc 3882; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 3883; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 3884; GPRIDX-NEXT: s_setpc_b64 s[30:31] 3885; 3886; MOVREL-LABEL: dyn_extract_v15f32_v_v_offset3: 3887; MOVREL: ; %bb.0: ; %entry 3888; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3889; MOVREL-NEXT: v_add_u32_e32 v15, vcc, 3, v15 3890; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 3891; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3892; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 3893; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3894; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 3895; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3896; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 3897; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 3898; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 3899; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 3900; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 3901; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 3902; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 3903; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 3904; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 3905; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 3906; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 3907; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 3908; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 3909; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 3910; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 3911; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc 3912; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 3913; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 3914; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 3915; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc 3916; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 3917; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 3918; MOVREL-NEXT: s_setpc_b64 s[30:31] 3919; 3920; GFX10PLUS-LABEL: dyn_extract_v15f32_v_v_offset3: 3921; GFX10PLUS: ; %bb.0: ; %entry 3922; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3923; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 3924; GFX10PLUS-NEXT: v_add_nc_u32_e32 v15, 3, v15 3925; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 3926; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 3927; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 3928; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3929; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 3930; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 3931; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 3932; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 3933; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 3934; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 3935; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 3936; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 3937; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 3938; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 3939; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 3940; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 3941; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 3942; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo 3943; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 3944; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 3945; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 3946; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo 3947; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 3948; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 3949; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 3950; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo 3951; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 3952; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 3953; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 3954entry: 3955 %add = add i32 %sel, 3 3956 %ext = extractelement <15 x float> %vec, i32 %add 3957 ret float %ext 3958} 3959 3960define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(float addrspace(1)* %out, i32 %sel) { 3961; GPRIDX-LABEL: dyn_extract_v4f32_s_s_s: 3962; GPRIDX: .amd_kernel_code_t 3963; GPRIDX-NEXT: amd_code_version_major = 1 3964; GPRIDX-NEXT: amd_code_version_minor = 2 3965; GPRIDX-NEXT: amd_machine_kind = 1 3966; GPRIDX-NEXT: amd_machine_version_major = 9 3967; GPRIDX-NEXT: amd_machine_version_minor = 0 3968; GPRIDX-NEXT: amd_machine_version_stepping = 0 3969; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 3970; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 3971; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 3972; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 0 3973; GPRIDX-NEXT: priority = 0 3974; GPRIDX-NEXT: float_mode = 240 3975; GPRIDX-NEXT: priv = 0 3976; GPRIDX-NEXT: enable_dx10_clamp = 1 3977; GPRIDX-NEXT: debug_mode = 0 3978; GPRIDX-NEXT: enable_ieee_mode = 1 3979; GPRIDX-NEXT: enable_wgp_mode = 0 3980; GPRIDX-NEXT: enable_mem_ordered = 0 3981; GPRIDX-NEXT: enable_fwd_progress = 0 3982; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3983; GPRIDX-NEXT: user_sgpr_count = 6 3984; GPRIDX-NEXT: enable_trap_handler = 0 3985; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 3986; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0 3987; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0 3988; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 3989; GPRIDX-NEXT: enable_vgpr_workitem_id = 0 3990; GPRIDX-NEXT: enable_exception_msb = 0 3991; GPRIDX-NEXT: granulated_lds_size = 0 3992; GPRIDX-NEXT: enable_exception = 0 3993; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 3994; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0 3995; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0 3996; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3997; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0 3998; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 3999; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 4000; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4001; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4002; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4003; GPRIDX-NEXT: enable_wavefront_size32 = 0 4004; GPRIDX-NEXT: enable_ordered_append_gds = 0 4005; GPRIDX-NEXT: private_element_size = 1 4006; GPRIDX-NEXT: is_ptr64 = 1 4007; GPRIDX-NEXT: is_dynamic_callstack = 0 4008; GPRIDX-NEXT: is_debug_enabled = 0 4009; GPRIDX-NEXT: is_xnack_enabled = 1 4010; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 4011; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 4012; GPRIDX-NEXT: gds_segment_byte_size = 0 4013; GPRIDX-NEXT: kernarg_segment_byte_size = 12 4014; GPRIDX-NEXT: workgroup_fbarrier_count = 0 4015; GPRIDX-NEXT: wavefront_sgpr_count = 6 4016; GPRIDX-NEXT: workitem_vgpr_count = 2 4017; GPRIDX-NEXT: reserved_vgpr_first = 0 4018; GPRIDX-NEXT: reserved_vgpr_count = 0 4019; GPRIDX-NEXT: reserved_sgpr_first = 0 4020; GPRIDX-NEXT: reserved_sgpr_count = 0 4021; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4022; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 4023; GPRIDX-NEXT: kernarg_segment_alignment = 4 4024; GPRIDX-NEXT: group_segment_alignment = 4 4025; GPRIDX-NEXT: private_segment_alignment = 4 4026; GPRIDX-NEXT: wavefront_size = 6 4027; GPRIDX-NEXT: call_convention = -1 4028; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 4029; GPRIDX-NEXT: .end_amd_kernel_code_t 4030; GPRIDX-NEXT: ; %bb.0: ; %entry 4031; GPRIDX-NEXT: s_load_dword s2, s[4:5], 0x8 4032; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4033; GPRIDX-NEXT: v_mov_b32_e32 v1, 0 4034; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) 4035; GPRIDX-NEXT: s_cmp_eq_u32 s2, 1 4036; GPRIDX-NEXT: s_cselect_b32 s3, 2.0, 1.0 4037; GPRIDX-NEXT: s_cmp_eq_u32 s2, 2 4038; GPRIDX-NEXT: s_cselect_b32 s3, 0x40400000, s3 4039; GPRIDX-NEXT: s_cmp_eq_u32 s2, 3 4040; GPRIDX-NEXT: s_cselect_b32 s2, 4.0, s3 4041; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 4042; GPRIDX-NEXT: global_store_dword v1, v0, s[0:1] 4043; GPRIDX-NEXT: s_endpgm 4044; 4045; MOVREL-LABEL: dyn_extract_v4f32_s_s_s: 4046; MOVREL: .amd_kernel_code_t 4047; MOVREL-NEXT: amd_code_version_major = 1 4048; MOVREL-NEXT: amd_code_version_minor = 2 4049; MOVREL-NEXT: amd_machine_kind = 1 4050; MOVREL-NEXT: amd_machine_version_major = 8 4051; MOVREL-NEXT: amd_machine_version_minor = 0 4052; MOVREL-NEXT: amd_machine_version_stepping = 3 4053; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 4054; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 4055; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 4056; MOVREL-NEXT: granulated_wavefront_sgpr_count = 0 4057; MOVREL-NEXT: priority = 0 4058; MOVREL-NEXT: float_mode = 240 4059; MOVREL-NEXT: priv = 0 4060; MOVREL-NEXT: enable_dx10_clamp = 1 4061; MOVREL-NEXT: debug_mode = 0 4062; MOVREL-NEXT: enable_ieee_mode = 1 4063; MOVREL-NEXT: enable_wgp_mode = 0 4064; MOVREL-NEXT: enable_mem_ordered = 0 4065; MOVREL-NEXT: enable_fwd_progress = 0 4066; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 4067; MOVREL-NEXT: user_sgpr_count = 6 4068; MOVREL-NEXT: enable_trap_handler = 0 4069; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 4070; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0 4071; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0 4072; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 4073; MOVREL-NEXT: enable_vgpr_workitem_id = 0 4074; MOVREL-NEXT: enable_exception_msb = 0 4075; MOVREL-NEXT: granulated_lds_size = 0 4076; MOVREL-NEXT: enable_exception = 0 4077; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 4078; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0 4079; MOVREL-NEXT: enable_sgpr_queue_ptr = 0 4080; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 4081; MOVREL-NEXT: enable_sgpr_dispatch_id = 0 4082; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 4083; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 4084; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4085; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4086; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4087; MOVREL-NEXT: enable_wavefront_size32 = 0 4088; MOVREL-NEXT: enable_ordered_append_gds = 0 4089; MOVREL-NEXT: private_element_size = 1 4090; MOVREL-NEXT: is_ptr64 = 1 4091; MOVREL-NEXT: is_dynamic_callstack = 0 4092; MOVREL-NEXT: is_debug_enabled = 0 4093; MOVREL-NEXT: is_xnack_enabled = 0 4094; MOVREL-NEXT: workitem_private_segment_byte_size = 0 4095; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 4096; MOVREL-NEXT: gds_segment_byte_size = 0 4097; MOVREL-NEXT: kernarg_segment_byte_size = 12 4098; MOVREL-NEXT: workgroup_fbarrier_count = 0 4099; MOVREL-NEXT: wavefront_sgpr_count = 6 4100; MOVREL-NEXT: workitem_vgpr_count = 3 4101; MOVREL-NEXT: reserved_vgpr_first = 0 4102; MOVREL-NEXT: reserved_vgpr_count = 0 4103; MOVREL-NEXT: reserved_sgpr_first = 0 4104; MOVREL-NEXT: reserved_sgpr_count = 0 4105; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4106; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 4107; MOVREL-NEXT: kernarg_segment_alignment = 4 4108; MOVREL-NEXT: group_segment_alignment = 4 4109; MOVREL-NEXT: private_segment_alignment = 4 4110; MOVREL-NEXT: wavefront_size = 6 4111; MOVREL-NEXT: call_convention = -1 4112; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 4113; MOVREL-NEXT: .end_amd_kernel_code_t 4114; MOVREL-NEXT: ; %bb.0: ; %entry 4115; MOVREL-NEXT: s_load_dword s2, s[4:5], 0x8 4116; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4117; MOVREL-NEXT: s_waitcnt lgkmcnt(0) 4118; MOVREL-NEXT: s_cmp_eq_u32 s2, 1 4119; MOVREL-NEXT: s_cselect_b32 s3, 2.0, 1.0 4120; MOVREL-NEXT: s_cmp_eq_u32 s2, 2 4121; MOVREL-NEXT: s_cselect_b32 s3, 0x40400000, s3 4122; MOVREL-NEXT: s_cmp_eq_u32 s2, 3 4123; MOVREL-NEXT: s_cselect_b32 s2, 4.0, s3 4124; MOVREL-NEXT: v_mov_b32_e32 v0, s0 4125; MOVREL-NEXT: v_mov_b32_e32 v2, s2 4126; MOVREL-NEXT: v_mov_b32_e32 v1, s1 4127; MOVREL-NEXT: flat_store_dword v[0:1], v2 4128; MOVREL-NEXT: s_endpgm 4129; 4130; GFX10-LABEL: dyn_extract_v4f32_s_s_s: 4131; GFX10: .amd_kernel_code_t 4132; GFX10-NEXT: amd_code_version_major = 1 4133; GFX10-NEXT: amd_code_version_minor = 2 4134; GFX10-NEXT: amd_machine_kind = 1 4135; GFX10-NEXT: amd_machine_version_major = 10 4136; GFX10-NEXT: amd_machine_version_minor = 1 4137; GFX10-NEXT: amd_machine_version_stepping = 0 4138; GFX10-NEXT: kernel_code_entry_byte_offset = 256 4139; GFX10-NEXT: kernel_code_prefetch_byte_size = 0 4140; GFX10-NEXT: granulated_workitem_vgpr_count = 0 4141; GFX10-NEXT: granulated_wavefront_sgpr_count = 0 4142; GFX10-NEXT: priority = 0 4143; GFX10-NEXT: float_mode = 240 4144; GFX10-NEXT: priv = 0 4145; GFX10-NEXT: enable_dx10_clamp = 1 4146; GFX10-NEXT: debug_mode = 0 4147; GFX10-NEXT: enable_ieee_mode = 1 4148; GFX10-NEXT: enable_wgp_mode = 1 4149; GFX10-NEXT: enable_mem_ordered = 1 4150; GFX10-NEXT: enable_fwd_progress = 0 4151; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 4152; GFX10-NEXT: user_sgpr_count = 6 4153; GFX10-NEXT: enable_trap_handler = 0 4154; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1 4155; GFX10-NEXT: enable_sgpr_workgroup_id_y = 0 4156; GFX10-NEXT: enable_sgpr_workgroup_id_z = 0 4157; GFX10-NEXT: enable_sgpr_workgroup_info = 0 4158; GFX10-NEXT: enable_vgpr_workitem_id = 0 4159; GFX10-NEXT: enable_exception_msb = 0 4160; GFX10-NEXT: granulated_lds_size = 0 4161; GFX10-NEXT: enable_exception = 0 4162; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1 4163; GFX10-NEXT: enable_sgpr_dispatch_ptr = 0 4164; GFX10-NEXT: enable_sgpr_queue_ptr = 0 4165; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1 4166; GFX10-NEXT: enable_sgpr_dispatch_id = 0 4167; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0 4168; GFX10-NEXT: enable_sgpr_private_segment_size = 0 4169; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4170; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4171; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4172; GFX10-NEXT: enable_wavefront_size32 = 1 4173; GFX10-NEXT: enable_ordered_append_gds = 0 4174; GFX10-NEXT: private_element_size = 1 4175; GFX10-NEXT: is_ptr64 = 1 4176; GFX10-NEXT: is_dynamic_callstack = 0 4177; GFX10-NEXT: is_debug_enabled = 0 4178; GFX10-NEXT: is_xnack_enabled = 1 4179; GFX10-NEXT: workitem_private_segment_byte_size = 0 4180; GFX10-NEXT: workgroup_group_segment_byte_size = 0 4181; GFX10-NEXT: gds_segment_byte_size = 0 4182; GFX10-NEXT: kernarg_segment_byte_size = 12 4183; GFX10-NEXT: workgroup_fbarrier_count = 0 4184; GFX10-NEXT: wavefront_sgpr_count = 6 4185; GFX10-NEXT: workitem_vgpr_count = 2 4186; GFX10-NEXT: reserved_vgpr_first = 0 4187; GFX10-NEXT: reserved_vgpr_count = 0 4188; GFX10-NEXT: reserved_sgpr_first = 0 4189; GFX10-NEXT: reserved_sgpr_count = 0 4190; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4191; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0 4192; GFX10-NEXT: kernarg_segment_alignment = 4 4193; GFX10-NEXT: group_segment_alignment = 4 4194; GFX10-NEXT: private_segment_alignment = 4 4195; GFX10-NEXT: wavefront_size = 5 4196; GFX10-NEXT: call_convention = -1 4197; GFX10-NEXT: runtime_loader_kernel_symbol = 0 4198; GFX10-NEXT: .end_amd_kernel_code_t 4199; GFX10-NEXT: ; %bb.0: ; %entry 4200; GFX10-NEXT: s_clause 0x1 4201; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 4202; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4203; GFX10-NEXT: v_mov_b32_e32 v1, 0 4204; GFX10-NEXT: s_waitcnt lgkmcnt(0) 4205; GFX10-NEXT: s_cmp_eq_u32 s2, 1 4206; GFX10-NEXT: s_cselect_b32 s3, 2.0, 1.0 4207; GFX10-NEXT: s_cmp_eq_u32 s2, 2 4208; GFX10-NEXT: s_cselect_b32 s3, 0x40400000, s3 4209; GFX10-NEXT: s_cmp_eq_u32 s2, 3 4210; GFX10-NEXT: s_cselect_b32 s2, 4.0, s3 4211; GFX10-NEXT: v_mov_b32_e32 v0, s2 4212; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 4213; GFX10-NEXT: s_endpgm 4214; 4215; GFX11-LABEL: dyn_extract_v4f32_s_s_s: 4216; GFX11: .amd_kernel_code_t 4217; GFX11-NEXT: amd_code_version_major = 1 4218; GFX11-NEXT: amd_code_version_minor = 2 4219; GFX11-NEXT: amd_machine_kind = 1 4220; GFX11-NEXT: amd_machine_version_major = 11 4221; GFX11-NEXT: amd_machine_version_minor = 0 4222; GFX11-NEXT: amd_machine_version_stepping = 0 4223; GFX11-NEXT: kernel_code_entry_byte_offset = 256 4224; GFX11-NEXT: kernel_code_prefetch_byte_size = 0 4225; GFX11-NEXT: granulated_workitem_vgpr_count = 0 4226; GFX11-NEXT: granulated_wavefront_sgpr_count = 0 4227; GFX11-NEXT: priority = 0 4228; GFX11-NEXT: float_mode = 240 4229; GFX11-NEXT: priv = 0 4230; GFX11-NEXT: enable_dx10_clamp = 1 4231; GFX11-NEXT: debug_mode = 0 4232; GFX11-NEXT: enable_ieee_mode = 1 4233; GFX11-NEXT: enable_wgp_mode = 1 4234; GFX11-NEXT: enable_mem_ordered = 1 4235; GFX11-NEXT: enable_fwd_progress = 0 4236; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 4237; GFX11-NEXT: user_sgpr_count = 15 4238; GFX11-NEXT: enable_trap_handler = 0 4239; GFX11-NEXT: enable_sgpr_workgroup_id_x = 1 4240; GFX11-NEXT: enable_sgpr_workgroup_id_y = 0 4241; GFX11-NEXT: enable_sgpr_workgroup_id_z = 0 4242; GFX11-NEXT: enable_sgpr_workgroup_info = 0 4243; GFX11-NEXT: enable_vgpr_workitem_id = 0 4244; GFX11-NEXT: enable_exception_msb = 0 4245; GFX11-NEXT: granulated_lds_size = 0 4246; GFX11-NEXT: enable_exception = 0 4247; GFX11-NEXT: enable_sgpr_private_segment_buffer = 0 4248; GFX11-NEXT: enable_sgpr_dispatch_ptr = 0 4249; GFX11-NEXT: enable_sgpr_queue_ptr = 0 4250; GFX11-NEXT: enable_sgpr_kernarg_segment_ptr = 1 4251; GFX11-NEXT: enable_sgpr_dispatch_id = 0 4252; GFX11-NEXT: enable_sgpr_flat_scratch_init = 0 4253; GFX11-NEXT: enable_sgpr_private_segment_size = 0 4254; GFX11-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4255; GFX11-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4256; GFX11-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4257; GFX11-NEXT: enable_wavefront_size32 = 1 4258; GFX11-NEXT: enable_ordered_append_gds = 0 4259; GFX11-NEXT: private_element_size = 1 4260; GFX11-NEXT: is_ptr64 = 1 4261; GFX11-NEXT: is_dynamic_callstack = 0 4262; GFX11-NEXT: is_debug_enabled = 0 4263; GFX11-NEXT: is_xnack_enabled = 0 4264; GFX11-NEXT: workitem_private_segment_byte_size = 0 4265; GFX11-NEXT: workgroup_group_segment_byte_size = 0 4266; GFX11-NEXT: gds_segment_byte_size = 0 4267; GFX11-NEXT: kernarg_segment_byte_size = 12 4268; GFX11-NEXT: workgroup_fbarrier_count = 0 4269; GFX11-NEXT: wavefront_sgpr_count = 4 4270; GFX11-NEXT: workitem_vgpr_count = 2 4271; GFX11-NEXT: reserved_vgpr_first = 0 4272; GFX11-NEXT: reserved_vgpr_count = 0 4273; GFX11-NEXT: reserved_sgpr_first = 0 4274; GFX11-NEXT: reserved_sgpr_count = 0 4275; GFX11-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4276; GFX11-NEXT: debug_private_segment_buffer_sgpr = 0 4277; GFX11-NEXT: kernarg_segment_alignment = 4 4278; GFX11-NEXT: group_segment_alignment = 4 4279; GFX11-NEXT: private_segment_alignment = 4 4280; GFX11-NEXT: wavefront_size = 5 4281; GFX11-NEXT: call_convention = -1 4282; GFX11-NEXT: runtime_loader_kernel_symbol = 0 4283; GFX11-NEXT: .end_amd_kernel_code_t 4284; GFX11-NEXT: ; %bb.0: ; %entry 4285; GFX11-NEXT: s_clause 0x1 4286; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8 4287; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 4288; GFX11-NEXT: v_mov_b32_e32 v1, 0 4289; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4290; GFX11-NEXT: s_cmp_eq_u32 s2, 1 4291; GFX11-NEXT: s_cselect_b32 s3, 2.0, 1.0 4292; GFX11-NEXT: s_cmp_eq_u32 s2, 2 4293; GFX11-NEXT: s_cselect_b32 s3, 0x40400000, s3 4294; GFX11-NEXT: s_cmp_eq_u32 s2, 3 4295; GFX11-NEXT: s_cselect_b32 s2, 4.0, s3 4296; GFX11-NEXT: v_mov_b32_e32 v0, s2 4297; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 4298; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 4299; GFX11-NEXT: s_endpgm 4300entry: 4301 %ext = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %sel 4302 store float %ext, float addrspace(1)* %out 4303 ret void 4304} 4305 4306define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(double addrspace(1)* %out, i32 %sel) { 4307; GPRIDX-LABEL: dyn_extract_v4f64_s_s_s: 4308; GPRIDX: .amd_kernel_code_t 4309; GPRIDX-NEXT: amd_code_version_major = 1 4310; GPRIDX-NEXT: amd_code_version_minor = 2 4311; GPRIDX-NEXT: amd_machine_kind = 1 4312; GPRIDX-NEXT: amd_machine_version_major = 9 4313; GPRIDX-NEXT: amd_machine_version_minor = 0 4314; GPRIDX-NEXT: amd_machine_version_stepping = 0 4315; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 4316; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 4317; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 4318; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 0 4319; GPRIDX-NEXT: priority = 0 4320; GPRIDX-NEXT: float_mode = 240 4321; GPRIDX-NEXT: priv = 0 4322; GPRIDX-NEXT: enable_dx10_clamp = 1 4323; GPRIDX-NEXT: debug_mode = 0 4324; GPRIDX-NEXT: enable_ieee_mode = 1 4325; GPRIDX-NEXT: enable_wgp_mode = 0 4326; GPRIDX-NEXT: enable_mem_ordered = 0 4327; GPRIDX-NEXT: enable_fwd_progress = 0 4328; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 4329; GPRIDX-NEXT: user_sgpr_count = 6 4330; GPRIDX-NEXT: enable_trap_handler = 0 4331; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 4332; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0 4333; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0 4334; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 4335; GPRIDX-NEXT: enable_vgpr_workitem_id = 0 4336; GPRIDX-NEXT: enable_exception_msb = 0 4337; GPRIDX-NEXT: granulated_lds_size = 0 4338; GPRIDX-NEXT: enable_exception = 0 4339; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 4340; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0 4341; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0 4342; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 4343; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0 4344; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 4345; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 4346; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4347; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4348; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4349; GPRIDX-NEXT: enable_wavefront_size32 = 0 4350; GPRIDX-NEXT: enable_ordered_append_gds = 0 4351; GPRIDX-NEXT: private_element_size = 1 4352; GPRIDX-NEXT: is_ptr64 = 1 4353; GPRIDX-NEXT: is_dynamic_callstack = 0 4354; GPRIDX-NEXT: is_debug_enabled = 0 4355; GPRIDX-NEXT: is_xnack_enabled = 1 4356; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 4357; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 4358; GPRIDX-NEXT: gds_segment_byte_size = 0 4359; GPRIDX-NEXT: kernarg_segment_byte_size = 12 4360; GPRIDX-NEXT: workgroup_fbarrier_count = 0 4361; GPRIDX-NEXT: wavefront_sgpr_count = 7 4362; GPRIDX-NEXT: workitem_vgpr_count = 3 4363; GPRIDX-NEXT: reserved_vgpr_first = 0 4364; GPRIDX-NEXT: reserved_vgpr_count = 0 4365; GPRIDX-NEXT: reserved_sgpr_first = 0 4366; GPRIDX-NEXT: reserved_sgpr_count = 0 4367; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4368; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 4369; GPRIDX-NEXT: kernarg_segment_alignment = 4 4370; GPRIDX-NEXT: group_segment_alignment = 4 4371; GPRIDX-NEXT: private_segment_alignment = 4 4372; GPRIDX-NEXT: wavefront_size = 6 4373; GPRIDX-NEXT: call_convention = -1 4374; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 4375; GPRIDX-NEXT: .end_amd_kernel_code_t 4376; GPRIDX-NEXT: ; %bb.0: ; %entry 4377; GPRIDX-NEXT: s_load_dword s6, s[4:5], 0x8 4378; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4379; GPRIDX-NEXT: s_mov_b32 s2, 0 4380; GPRIDX-NEXT: s_mov_b32 s3, 0x40080000 4381; GPRIDX-NEXT: v_mov_b32_e32 v2, 0 4382; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) 4383; GPRIDX-NEXT: s_cmp_eq_u32 s6, 1 4384; GPRIDX-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 4385; GPRIDX-NEXT: s_cmp_eq_u32 s6, 2 4386; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 4387; GPRIDX-NEXT: s_cmp_eq_u32 s6, 3 4388; GPRIDX-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 4389; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 4390; GPRIDX-NEXT: v_mov_b32_e32 v1, s3 4391; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 4392; GPRIDX-NEXT: s_endpgm 4393; 4394; MOVREL-LABEL: dyn_extract_v4f64_s_s_s: 4395; MOVREL: .amd_kernel_code_t 4396; MOVREL-NEXT: amd_code_version_major = 1 4397; MOVREL-NEXT: amd_code_version_minor = 2 4398; MOVREL-NEXT: amd_machine_kind = 1 4399; MOVREL-NEXT: amd_machine_version_major = 8 4400; MOVREL-NEXT: amd_machine_version_minor = 0 4401; MOVREL-NEXT: amd_machine_version_stepping = 3 4402; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 4403; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 4404; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 4405; MOVREL-NEXT: granulated_wavefront_sgpr_count = 0 4406; MOVREL-NEXT: priority = 0 4407; MOVREL-NEXT: float_mode = 240 4408; MOVREL-NEXT: priv = 0 4409; MOVREL-NEXT: enable_dx10_clamp = 1 4410; MOVREL-NEXT: debug_mode = 0 4411; MOVREL-NEXT: enable_ieee_mode = 1 4412; MOVREL-NEXT: enable_wgp_mode = 0 4413; MOVREL-NEXT: enable_mem_ordered = 0 4414; MOVREL-NEXT: enable_fwd_progress = 0 4415; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 4416; MOVREL-NEXT: user_sgpr_count = 6 4417; MOVREL-NEXT: enable_trap_handler = 0 4418; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 4419; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0 4420; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0 4421; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 4422; MOVREL-NEXT: enable_vgpr_workitem_id = 0 4423; MOVREL-NEXT: enable_exception_msb = 0 4424; MOVREL-NEXT: granulated_lds_size = 0 4425; MOVREL-NEXT: enable_exception = 0 4426; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 4427; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0 4428; MOVREL-NEXT: enable_sgpr_queue_ptr = 0 4429; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 4430; MOVREL-NEXT: enable_sgpr_dispatch_id = 0 4431; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 4432; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 4433; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4434; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4435; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4436; MOVREL-NEXT: enable_wavefront_size32 = 0 4437; MOVREL-NEXT: enable_ordered_append_gds = 0 4438; MOVREL-NEXT: private_element_size = 1 4439; MOVREL-NEXT: is_ptr64 = 1 4440; MOVREL-NEXT: is_dynamic_callstack = 0 4441; MOVREL-NEXT: is_debug_enabled = 0 4442; MOVREL-NEXT: is_xnack_enabled = 0 4443; MOVREL-NEXT: workitem_private_segment_byte_size = 0 4444; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 4445; MOVREL-NEXT: gds_segment_byte_size = 0 4446; MOVREL-NEXT: kernarg_segment_byte_size = 12 4447; MOVREL-NEXT: workgroup_fbarrier_count = 0 4448; MOVREL-NEXT: wavefront_sgpr_count = 7 4449; MOVREL-NEXT: workitem_vgpr_count = 4 4450; MOVREL-NEXT: reserved_vgpr_first = 0 4451; MOVREL-NEXT: reserved_vgpr_count = 0 4452; MOVREL-NEXT: reserved_sgpr_first = 0 4453; MOVREL-NEXT: reserved_sgpr_count = 0 4454; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4455; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 4456; MOVREL-NEXT: kernarg_segment_alignment = 4 4457; MOVREL-NEXT: group_segment_alignment = 4 4458; MOVREL-NEXT: private_segment_alignment = 4 4459; MOVREL-NEXT: wavefront_size = 6 4460; MOVREL-NEXT: call_convention = -1 4461; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 4462; MOVREL-NEXT: .end_amd_kernel_code_t 4463; MOVREL-NEXT: ; %bb.0: ; %entry 4464; MOVREL-NEXT: s_load_dword s6, s[4:5], 0x8 4465; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4466; MOVREL-NEXT: s_mov_b32 s2, 0 4467; MOVREL-NEXT: s_mov_b32 s3, 0x40080000 4468; MOVREL-NEXT: s_waitcnt lgkmcnt(0) 4469; MOVREL-NEXT: s_cmp_eq_u32 s6, 1 4470; MOVREL-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 4471; MOVREL-NEXT: s_cmp_eq_u32 s6, 2 4472; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 4473; MOVREL-NEXT: s_cmp_eq_u32 s6, 3 4474; MOVREL-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 4475; MOVREL-NEXT: v_mov_b32_e32 v0, s2 4476; MOVREL-NEXT: v_mov_b32_e32 v3, s1 4477; MOVREL-NEXT: v_mov_b32_e32 v1, s3 4478; MOVREL-NEXT: v_mov_b32_e32 v2, s0 4479; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 4480; MOVREL-NEXT: s_endpgm 4481; 4482; GFX10-LABEL: dyn_extract_v4f64_s_s_s: 4483; GFX10: .amd_kernel_code_t 4484; GFX10-NEXT: amd_code_version_major = 1 4485; GFX10-NEXT: amd_code_version_minor = 2 4486; GFX10-NEXT: amd_machine_kind = 1 4487; GFX10-NEXT: amd_machine_version_major = 10 4488; GFX10-NEXT: amd_machine_version_minor = 1 4489; GFX10-NEXT: amd_machine_version_stepping = 0 4490; GFX10-NEXT: kernel_code_entry_byte_offset = 256 4491; GFX10-NEXT: kernel_code_prefetch_byte_size = 0 4492; GFX10-NEXT: granulated_workitem_vgpr_count = 0 4493; GFX10-NEXT: granulated_wavefront_sgpr_count = 0 4494; GFX10-NEXT: priority = 0 4495; GFX10-NEXT: float_mode = 240 4496; GFX10-NEXT: priv = 0 4497; GFX10-NEXT: enable_dx10_clamp = 1 4498; GFX10-NEXT: debug_mode = 0 4499; GFX10-NEXT: enable_ieee_mode = 1 4500; GFX10-NEXT: enable_wgp_mode = 1 4501; GFX10-NEXT: enable_mem_ordered = 1 4502; GFX10-NEXT: enable_fwd_progress = 0 4503; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 4504; GFX10-NEXT: user_sgpr_count = 6 4505; GFX10-NEXT: enable_trap_handler = 0 4506; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1 4507; GFX10-NEXT: enable_sgpr_workgroup_id_y = 0 4508; GFX10-NEXT: enable_sgpr_workgroup_id_z = 0 4509; GFX10-NEXT: enable_sgpr_workgroup_info = 0 4510; GFX10-NEXT: enable_vgpr_workitem_id = 0 4511; GFX10-NEXT: enable_exception_msb = 0 4512; GFX10-NEXT: granulated_lds_size = 0 4513; GFX10-NEXT: enable_exception = 0 4514; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1 4515; GFX10-NEXT: enable_sgpr_dispatch_ptr = 0 4516; GFX10-NEXT: enable_sgpr_queue_ptr = 0 4517; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1 4518; GFX10-NEXT: enable_sgpr_dispatch_id = 0 4519; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0 4520; GFX10-NEXT: enable_sgpr_private_segment_size = 0 4521; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4522; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4523; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4524; GFX10-NEXT: enable_wavefront_size32 = 1 4525; GFX10-NEXT: enable_ordered_append_gds = 0 4526; GFX10-NEXT: private_element_size = 1 4527; GFX10-NEXT: is_ptr64 = 1 4528; GFX10-NEXT: is_dynamic_callstack = 0 4529; GFX10-NEXT: is_debug_enabled = 0 4530; GFX10-NEXT: is_xnack_enabled = 1 4531; GFX10-NEXT: workitem_private_segment_byte_size = 0 4532; GFX10-NEXT: workgroup_group_segment_byte_size = 0 4533; GFX10-NEXT: gds_segment_byte_size = 0 4534; GFX10-NEXT: kernarg_segment_byte_size = 12 4535; GFX10-NEXT: workgroup_fbarrier_count = 0 4536; GFX10-NEXT: wavefront_sgpr_count = 7 4537; GFX10-NEXT: workitem_vgpr_count = 3 4538; GFX10-NEXT: reserved_vgpr_first = 0 4539; GFX10-NEXT: reserved_vgpr_count = 0 4540; GFX10-NEXT: reserved_sgpr_first = 0 4541; GFX10-NEXT: reserved_sgpr_count = 0 4542; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4543; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0 4544; GFX10-NEXT: kernarg_segment_alignment = 4 4545; GFX10-NEXT: group_segment_alignment = 4 4546; GFX10-NEXT: private_segment_alignment = 4 4547; GFX10-NEXT: wavefront_size = 5 4548; GFX10-NEXT: call_convention = -1 4549; GFX10-NEXT: runtime_loader_kernel_symbol = 0 4550; GFX10-NEXT: .end_amd_kernel_code_t 4551; GFX10-NEXT: ; %bb.0: ; %entry 4552; GFX10-NEXT: s_clause 0x1 4553; GFX10-NEXT: s_load_dword s6, s[4:5], 0x8 4554; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4555; GFX10-NEXT: s_mov_b32 s2, 0 4556; GFX10-NEXT: s_mov_b32 s3, 0x40080000 4557; GFX10-NEXT: v_mov_b32_e32 v2, 0 4558; GFX10-NEXT: s_waitcnt lgkmcnt(0) 4559; GFX10-NEXT: s_cmp_eq_u32 s6, 1 4560; GFX10-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 4561; GFX10-NEXT: s_cmp_eq_u32 s6, 2 4562; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 4563; GFX10-NEXT: s_cmp_eq_u32 s6, 3 4564; GFX10-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 4565; GFX10-NEXT: v_mov_b32_e32 v0, s2 4566; GFX10-NEXT: v_mov_b32_e32 v1, s3 4567; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 4568; GFX10-NEXT: s_endpgm 4569; 4570; GFX11-LABEL: dyn_extract_v4f64_s_s_s: 4571; GFX11: .amd_kernel_code_t 4572; GFX11-NEXT: amd_code_version_major = 1 4573; GFX11-NEXT: amd_code_version_minor = 2 4574; GFX11-NEXT: amd_machine_kind = 1 4575; GFX11-NEXT: amd_machine_version_major = 11 4576; GFX11-NEXT: amd_machine_version_minor = 0 4577; GFX11-NEXT: amd_machine_version_stepping = 0 4578; GFX11-NEXT: kernel_code_entry_byte_offset = 256 4579; GFX11-NEXT: kernel_code_prefetch_byte_size = 0 4580; GFX11-NEXT: granulated_workitem_vgpr_count = 0 4581; GFX11-NEXT: granulated_wavefront_sgpr_count = 0 4582; GFX11-NEXT: priority = 0 4583; GFX11-NEXT: float_mode = 240 4584; GFX11-NEXT: priv = 0 4585; GFX11-NEXT: enable_dx10_clamp = 1 4586; GFX11-NEXT: debug_mode = 0 4587; GFX11-NEXT: enable_ieee_mode = 1 4588; GFX11-NEXT: enable_wgp_mode = 1 4589; GFX11-NEXT: enable_mem_ordered = 1 4590; GFX11-NEXT: enable_fwd_progress = 0 4591; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 4592; GFX11-NEXT: user_sgpr_count = 15 4593; GFX11-NEXT: enable_trap_handler = 0 4594; GFX11-NEXT: enable_sgpr_workgroup_id_x = 1 4595; GFX11-NEXT: enable_sgpr_workgroup_id_y = 0 4596; GFX11-NEXT: enable_sgpr_workgroup_id_z = 0 4597; GFX11-NEXT: enable_sgpr_workgroup_info = 0 4598; GFX11-NEXT: enable_vgpr_workitem_id = 0 4599; GFX11-NEXT: enable_exception_msb = 0 4600; GFX11-NEXT: granulated_lds_size = 0 4601; GFX11-NEXT: enable_exception = 0 4602; GFX11-NEXT: enable_sgpr_private_segment_buffer = 0 4603; GFX11-NEXT: enable_sgpr_dispatch_ptr = 0 4604; GFX11-NEXT: enable_sgpr_queue_ptr = 0 4605; GFX11-NEXT: enable_sgpr_kernarg_segment_ptr = 1 4606; GFX11-NEXT: enable_sgpr_dispatch_id = 0 4607; GFX11-NEXT: enable_sgpr_flat_scratch_init = 0 4608; GFX11-NEXT: enable_sgpr_private_segment_size = 0 4609; GFX11-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4610; GFX11-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4611; GFX11-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4612; GFX11-NEXT: enable_wavefront_size32 = 1 4613; GFX11-NEXT: enable_ordered_append_gds = 0 4614; GFX11-NEXT: private_element_size = 1 4615; GFX11-NEXT: is_ptr64 = 1 4616; GFX11-NEXT: is_dynamic_callstack = 0 4617; GFX11-NEXT: is_debug_enabled = 0 4618; GFX11-NEXT: is_xnack_enabled = 0 4619; GFX11-NEXT: workitem_private_segment_byte_size = 0 4620; GFX11-NEXT: workgroup_group_segment_byte_size = 0 4621; GFX11-NEXT: gds_segment_byte_size = 0 4622; GFX11-NEXT: kernarg_segment_byte_size = 12 4623; GFX11-NEXT: workgroup_fbarrier_count = 0 4624; GFX11-NEXT: wavefront_sgpr_count = 7 4625; GFX11-NEXT: workitem_vgpr_count = 3 4626; GFX11-NEXT: reserved_vgpr_first = 0 4627; GFX11-NEXT: reserved_vgpr_count = 0 4628; GFX11-NEXT: reserved_sgpr_first = 0 4629; GFX11-NEXT: reserved_sgpr_count = 0 4630; GFX11-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4631; GFX11-NEXT: debug_private_segment_buffer_sgpr = 0 4632; GFX11-NEXT: kernarg_segment_alignment = 4 4633; GFX11-NEXT: group_segment_alignment = 4 4634; GFX11-NEXT: private_segment_alignment = 4 4635; GFX11-NEXT: wavefront_size = 5 4636; GFX11-NEXT: call_convention = -1 4637; GFX11-NEXT: runtime_loader_kernel_symbol = 0 4638; GFX11-NEXT: .end_amd_kernel_code_t 4639; GFX11-NEXT: ; %bb.0: ; %entry 4640; GFX11-NEXT: s_clause 0x1 4641; GFX11-NEXT: s_load_b32 s6, s[0:1], 0x8 4642; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 4643; GFX11-NEXT: s_mov_b32 s2, 0 4644; GFX11-NEXT: s_mov_b32 s3, 0x40080000 4645; GFX11-NEXT: v_mov_b32_e32 v2, 0 4646; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4647; GFX11-NEXT: s_cmp_eq_u32 s6, 1 4648; GFX11-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 4649; GFX11-NEXT: s_cmp_eq_u32 s6, 2 4650; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 4651; GFX11-NEXT: s_cmp_eq_u32 s6, 3 4652; GFX11-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 4653; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 4654; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 4655; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 4656; GFX11-NEXT: s_endpgm 4657entry: 4658 %ext = extractelement <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, i32 %sel 4659 store double %ext, double addrspace(1)* %out 4660 ret void 4661} 4662 4663define i32 @v_extract_v64i32_7(<64 x i32> addrspace(1)* %ptr) { 4664; GPRIDX-LABEL: v_extract_v64i32_7: 4665; GPRIDX: ; %bb.0: 4666; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4667; GPRIDX-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 4668; GPRIDX-NEXT: s_waitcnt vmcnt(0) 4669; GPRIDX-NEXT: v_mov_b32_e32 v0, v7 4670; GPRIDX-NEXT: s_setpc_b64 s[30:31] 4671; 4672; MOVREL-LABEL: v_extract_v64i32_7: 4673; MOVREL: ; %bb.0: 4674; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4675; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 4676; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4677; MOVREL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] 4678; MOVREL-NEXT: s_waitcnt vmcnt(0) 4679; MOVREL-NEXT: v_mov_b32_e32 v0, v7 4680; MOVREL-NEXT: s_setpc_b64 s[30:31] 4681; 4682; GFX10-LABEL: v_extract_v64i32_7: 4683; GFX10: ; %bb.0: 4684; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4685; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4686; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 4687; GFX10-NEXT: s_waitcnt vmcnt(0) 4688; GFX10-NEXT: v_mov_b32_e32 v0, v7 4689; GFX10-NEXT: s_setpc_b64 s[30:31] 4690; 4691; GFX11-LABEL: v_extract_v64i32_7: 4692; GFX11: ; %bb.0: 4693; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4694; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4695; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 4696; GFX11-NEXT: s_waitcnt vmcnt(0) 4697; GFX11-NEXT: v_mov_b32_e32 v0, v7 4698; GFX11-NEXT: s_setpc_b64 s[30:31] 4699 %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr 4700 %elt = extractelement <64 x i32> %vec, i32 7 4701 ret i32 %elt 4702} 4703 4704define i32 @v_extract_v64i32_32(<64 x i32> addrspace(1)* %ptr) { 4705; GPRIDX-LABEL: v_extract_v64i32_32: 4706; GPRIDX: ; %bb.0: 4707; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4708; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 4709; GPRIDX-NEXT: s_waitcnt vmcnt(0) 4710; GPRIDX-NEXT: s_setpc_b64 s[30:31] 4711; 4712; MOVREL-LABEL: v_extract_v64i32_32: 4713; MOVREL: ; %bb.0: 4714; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4715; MOVREL-NEXT: s_mov_b64 s[4:5], 0x80 4716; MOVREL-NEXT: v_mov_b32_e32 v2, s4 4717; MOVREL-NEXT: v_mov_b32_e32 v3, s5 4718; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 4719; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 4720; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4721; MOVREL-NEXT: s_waitcnt vmcnt(0) 4722; MOVREL-NEXT: s_setpc_b64 s[30:31] 4723; 4724; GFX10-LABEL: v_extract_v64i32_32: 4725; GFX10: ; %bb.0: 4726; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4727; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4728; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 4729; GFX10-NEXT: s_waitcnt vmcnt(0) 4730; GFX10-NEXT: s_setpc_b64 s[30:31] 4731; 4732; GFX11-LABEL: v_extract_v64i32_32: 4733; GFX11: ; %bb.0: 4734; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4735; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4736; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 4737; GFX11-NEXT: s_waitcnt vmcnt(0) 4738; GFX11-NEXT: s_setpc_b64 s[30:31] 4739 %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr 4740 %elt = extractelement <64 x i32> %vec, i32 32 4741 ret i32 %elt 4742} 4743 4744define i32 @v_extract_v64i32_33(<64 x i32> addrspace(1)* %ptr) { 4745; GPRIDX-LABEL: v_extract_v64i32_33: 4746; GPRIDX: ; %bb.0: 4747; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4748; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 4749; GPRIDX-NEXT: s_waitcnt vmcnt(0) 4750; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 4751; GPRIDX-NEXT: s_setpc_b64 s[30:31] 4752; 4753; MOVREL-LABEL: v_extract_v64i32_33: 4754; MOVREL: ; %bb.0: 4755; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4756; MOVREL-NEXT: s_mov_b64 s[4:5], 0x80 4757; MOVREL-NEXT: v_mov_b32_e32 v2, s4 4758; MOVREL-NEXT: v_mov_b32_e32 v3, s5 4759; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 4760; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 4761; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4762; MOVREL-NEXT: s_waitcnt vmcnt(0) 4763; MOVREL-NEXT: v_mov_b32_e32 v0, v1 4764; MOVREL-NEXT: s_setpc_b64 s[30:31] 4765; 4766; GFX10-LABEL: v_extract_v64i32_33: 4767; GFX10: ; %bb.0: 4768; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4769; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4770; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 4771; GFX10-NEXT: s_waitcnt vmcnt(0) 4772; GFX10-NEXT: v_mov_b32_e32 v0, v1 4773; GFX10-NEXT: s_setpc_b64 s[30:31] 4774; 4775; GFX11-LABEL: v_extract_v64i32_33: 4776; GFX11: ; %bb.0: 4777; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4778; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4779; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 4780; GFX11-NEXT: s_waitcnt vmcnt(0) 4781; GFX11-NEXT: v_mov_b32_e32 v0, v1 4782; GFX11-NEXT: s_setpc_b64 s[30:31] 4783 %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr 4784 %elt = extractelement <64 x i32> %vec, i32 33 4785 ret i32 %elt 4786} 4787 4788define i32 @v_extract_v64i32_37(<64 x i32> addrspace(1)* %ptr) { 4789; GPRIDX-LABEL: v_extract_v64i32_37: 4790; GPRIDX: ; %bb.0: 4791; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4792; GPRIDX-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:144 4793; GPRIDX-NEXT: s_waitcnt vmcnt(0) 4794; GPRIDX-NEXT: v_mov_b32_e32 v0, v5 4795; GPRIDX-NEXT: s_setpc_b64 s[30:31] 4796; 4797; MOVREL-LABEL: v_extract_v64i32_37: 4798; MOVREL: ; %bb.0: 4799; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4800; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x90, v0 4801; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4802; MOVREL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] 4803; MOVREL-NEXT: s_waitcnt vmcnt(0) 4804; MOVREL-NEXT: v_mov_b32_e32 v0, v5 4805; MOVREL-NEXT: s_setpc_b64 s[30:31] 4806; 4807; GFX10-LABEL: v_extract_v64i32_37: 4808; GFX10: ; %bb.0: 4809; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4810; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4811; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:144 4812; GFX10-NEXT: s_waitcnt vmcnt(0) 4813; GFX10-NEXT: v_mov_b32_e32 v0, v5 4814; GFX10-NEXT: s_setpc_b64 s[30:31] 4815; 4816; GFX11-LABEL: v_extract_v64i32_37: 4817; GFX11: ; %bb.0: 4818; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4819; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4820; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off offset:144 4821; GFX11-NEXT: s_waitcnt vmcnt(0) 4822; GFX11-NEXT: v_mov_b32_e32 v0, v5 4823; GFX11-NEXT: s_setpc_b64 s[30:31] 4824 %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr 4825 %elt = extractelement <64 x i32> %vec, i32 37 4826 ret i32 %elt 4827} 4828