1; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s 2 3; GCN-LABEL: {{^}}float4_extelt: 4; GCN-NOT: buffer_ 5; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 6; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 7; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 8; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 9; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 10; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 11; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]] 12; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2.0, [[V1]], [[C2]] 13; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4.0, [[V2]], [[C3]] 14; GCN: store_dword v[{{[0-9:]+}}], [[V3]] 15define amdgpu_kernel void @float4_extelt(float addrspace(1)* %out, i32 %sel) { 16entry: 17 %ext = extractelement <4 x float> <float 0.0, float 1.0, float 2.0, float 4.0>, i32 %sel 18 store float %ext, float addrspace(1)* %out 19 ret void 20} 21 22; GCN-LABEL: {{^}}int4_extelt: 23; GCN-NOT: buffer_ 24; GCN-DAG: s_cmp_lg_u32 [[IDX:s[0-9]+]], 2 25; GCN-DAG: s_cmp_eq_u32 [[IDX]], 1 26; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 27; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 28; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1, [[C1]] 29; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2, [[V1]], vcc 30; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4, [[V2]], vcc 31; GCN: store_dword v[{{[0-9:]+}}], [[V3]] 32define amdgpu_kernel void @int4_extelt(i32 addrspace(1)* %out, i32 %sel) { 33entry: 34 %ext = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 4>, i32 %sel 35 store i32 %ext, i32 addrspace(1)* %out 36 ret void 37} 38 39; GCN-LABEL: {{^}}double4_extelt: 40; GCN-NOT: buffer_ 41; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 42; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 43; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 44; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 45; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3 46; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 47; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] 48; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C2]] 49; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C3]] 50; GCN: store_dwordx2 v[{{[0-9:]+}}] 51define amdgpu_kernel void @double4_extelt(double addrspace(1)* %out, i32 %sel) { 52entry: 53 %ext = extractelement <4 x double> <double 0.01, double 1.01, double 2.01, double 4.01>, i32 %sel 54 store double %ext, double addrspace(1)* %out 55 ret void 56} 57 58; GCN-LABEL: {{^}}double5_extelt: 59; GCN-NOT: buffer_ 60; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 61; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 62; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 63; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 64; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3 65; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 66; GCN-DAG: s_cmp_eq_u32 [[IDX]], 4 67; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 68; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] 69; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C2]] 70; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C3]] 71; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C4]] 72; GCN: store_dwordx2 v[{{[0-9:]+}}] 73define amdgpu_kernel void @double5_extelt(double addrspace(1)* %out, i32 %sel) { 74entry: 75 %ext = extractelement <5 x double> <double 0.01, double 1.01, double 2.01, double 4.01, double 5.01>, i32 %sel 76 store double %ext, double addrspace(1)* %out 77 ret void 78} 79 80; GCN-LABEL: {{^}}half4_extelt: 81; GCN-NOT: buffer_ 82; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x40003c00 83; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x44004200 84; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 4 85; GCN: s_lshr_b64 s[[[RL:[0-9]+]]:{{[0-9]+}}], s[[[SL]]:[[SH]]], [[SEL]] 86; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]] 87; GCN: store_short v[{{[0-9:]+}}], v[[VRL]] 88define amdgpu_kernel void @half4_extelt(half addrspace(1)* %out, i32 %sel) { 89entry: 90 %ext = extractelement <4 x half> <half 1.0, half 2.0, half 3.0, half 4.0>, i32 %sel 91 store half %ext, half addrspace(1)* %out 92 ret void 93} 94 95; GCN-LABEL: {{^}}float2_extelt: 96; GCN-NOT: buffer_ 97; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 98; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 99; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]] 100; GCN: store_dword v[{{[0-9:]+}}], [[V1]] 101define amdgpu_kernel void @float2_extelt(float addrspace(1)* %out, i32 %sel) { 102entry: 103 %ext = extractelement <2 x float> <float 0.0, float 1.0>, i32 %sel 104 store float %ext, float addrspace(1)* %out 105 ret void 106} 107 108; GCN-LABEL: {{^}}double2_extelt: 109; GCN-NOT: buffer_ 110; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 111; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 112; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] 113; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] 114; GCN: store_dwordx2 v[{{[0-9:]+}}] 115define amdgpu_kernel void @double2_extelt(double addrspace(1)* %out, i32 %sel) { 116entry: 117 %ext = extractelement <2 x double> <double 0.01, double 1.01>, i32 %sel 118 store double %ext, double addrspace(1)* %out 119 ret void 120} 121 122; GCN-LABEL: {{^}}half8_extelt: 123; GCN-NOT: buffer_ 124; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 125; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 126; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 127; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 128; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 129; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 130; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 131; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 132; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5 133; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0 134; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6 135; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0 136; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7 137; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0 138; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] 139; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] 140; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] 141; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] 142; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] 143; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] 144; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] 145; GCN: store_short v[{{[0-9:]+}}], [[V7]] 146define amdgpu_kernel void @half8_extelt(half addrspace(1)* %out, i32 %sel) { 147entry: 148 %ext = extractelement <8 x half> <half 1.0, half 2.0, half 3.0, half 4.0, half 5.0, half 6.0, half 7.0, half 8.0>, i32 %sel 149 store half %ext, half addrspace(1)* %out 150 ret void 151} 152 153; GCN-LABEL: {{^}}short8_extelt: 154; GCN-NOT: buffer_ 155; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 156; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 157; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 158; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 159; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 160; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 161; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 162; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 163; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5 164; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0 165; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6 166; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0 167; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7 168; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0 169; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] 170; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] 171; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] 172; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] 173; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] 174; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] 175; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] 176; GCN: store_short v[{{[0-9:]+}}], [[V7]] 177define amdgpu_kernel void @short8_extelt(i16 addrspace(1)* %out, i32 %sel) { 178entry: 179 %ext = extractelement <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i32 %sel 180 store i16 %ext, i16 addrspace(1)* %out 181 ret void 182} 183 184; GCN-LABEL: {{^}}float8_extelt: 185; GCN-DAG: s_load_dwordx2 s[2:3], s[0:1], 0x24 186; GCN-DAG: s_load_dword [[S0:s[0-9]+]], s[0:1], 0x2c 187; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0 188; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 189; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 190; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 191; GCN-DAG: s_waitcnt lgkmcnt(0) 192; GCN-DAG: s_mov_b32 m0, [[S0]] 193; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000 194; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000 195; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000 196; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000 197; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], v{{[0-9]+}} 198; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} 199; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} 200; GCN: flat_store_dword v[{{[0-9:]+}}], [[RES]] 201define amdgpu_kernel void @float8_extelt(float addrspace(1)* %out, i32 %sel) { 202entry: 203 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel 204 store float %ext, float addrspace(1)* %out 205 ret void 206} 207 208; GCN-LABEL: {{^}}double8_extelt: 209; GCN-NOT: buffer_ 210; GCN-NOT: s_or_b32 211; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} 212; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] 213; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 214; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] 215; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] 216; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]] 217define amdgpu_kernel void @double8_extelt(double addrspace(1)* %out, i32 %sel) { 218entry: 219 %ext = extractelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, i32 %sel 220 store double %ext, double addrspace(1)* %out 221 ret void 222} 223 224; GCN-LABEL: {{^}}double7_extelt: 225; GCN-NOT: buffer_ 226; GCN-NOT: s_or_b32 227; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} 228; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] 229; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 230; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] 231; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] 232; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]] 233define amdgpu_kernel void @double7_extelt(double addrspace(1)* %out, i32 %sel) { 234entry: 235 %ext = extractelement <7 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>, i32 %sel 236 store double %ext, double addrspace(1)* %out 237 ret void 238} 239 240; GCN-LABEL: {{^}}float16_extelt: 241; GCN-NOT: buffer_ 242; GCN-DAG: s_mov_b32 m0, 243; GCN-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], 1.0 244; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 245; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 246; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 247; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000 248; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000 249; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000 250; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000 251; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41100000 252; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41200000 253; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41300000 254; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41400000 255; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41500000 256; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41600000 257; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41700000 258; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41800000 259; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], [[VLO]] 260; GCN: store_dword v[{{[0-9:]+}}], [[RES]] 261define amdgpu_kernel void @float16_extelt(float addrspace(1)* %out, i32 %sel) { 262entry: 263 %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel 264 store float %ext, float addrspace(1)* %out 265 ret void 266} 267 268; GCN-LABEL: {{^}}double15_extelt: 269; GCN-NOT: buffer_ 270; GCN-NOT: s_or_b32 271; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} 272; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] 273; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 274; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] 275; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] 276; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]] 277define amdgpu_kernel void @double15_extelt(double addrspace(1)* %out, i32 %sel) { 278entry: 279 %ext = extractelement <15 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0>, i32 %sel 280 store double %ext, double addrspace(1)* %out 281 ret void 282} 283 284; GCN-LABEL: {{^}}double16_extelt: 285; GCN-NOT: buffer_ 286; GCN-NOT: s_or_b32 287; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} 288; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] 289; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 290; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] 291; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] 292; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]] 293define amdgpu_kernel void @double16_extelt(double addrspace(1)* %out, i32 %sel) { 294entry: 295 %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel 296 store double %ext, double addrspace(1)* %out 297 ret void 298} 299 300; GCN-LABEL: {{^}}float32_extelt: 301; GCN-NOT: buffer_ 302; GCN-DAG: s_mov_b32 m0, 303; GCN-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], 1.0 304; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 305; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 306; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 307; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000 308; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000 309; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000 310; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000 311; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41100000 312; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41200000 313; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41300000 314; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41400000 315; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41500000 316; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41600000 317; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41700000 318; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41800000 319; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41880000 320; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000 321; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41980000 322; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a00000 323; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a80000 324; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b00000 325; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b80000 326; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41c00000 327; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41c80000 328; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41d00000 329; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41d80000 330; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41e00000 331; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41e80000 332; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41f00000 333; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41f80000 334; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x42000000 335; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], [[VLO]] 336; GCN: store_dword v[{{[0-9:]+}}], [[RES]] 337define amdgpu_kernel void @float32_extelt(float addrspace(1)* %out, i32 %sel) { 338entry: 339 %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel 340 store float %ext, float addrspace(1)* %out 341 ret void 342} 343 344; GCN-LABEL: {{^}}byte8_extelt: 345; GCN-NOT: buffer_ 346; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x4030201 347; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x8070605 348; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 3 349; GCN: s_lshr_b64 s[[[RL:[0-9]+]]:{{[0-9]+}}], s[[[SL]]:[[SH]]], [[SEL]] 350; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]] 351; GCN: store_byte v[{{[0-9:]+}}], v[[VRL]] 352define amdgpu_kernel void @byte8_extelt(i8 addrspace(1)* %out, i32 %sel) { 353entry: 354 %ext = extractelement <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i32 %sel 355 store i8 %ext, i8 addrspace(1)* %out 356 ret void 357} 358 359; GCN-LABEL: {{^}}byte16_extelt: 360; GCN-NOT: buffer_ 361; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 362; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 363; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 364; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 365; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 366; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 367; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 368; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 369; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5 370; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0 371; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6 372; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0 373; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7 374; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0 375; GCN-DAG: s_cmp_lg_u32 [[IDX]], 8 376; GCN-DAG: s_cselect_b64 [[C8:[^,]+]], -1, 0 377; GCN-DAG: s_cmp_lg_u32 [[IDX]], 9 378; GCN-DAG: s_cselect_b64 [[C9:[^,]+]], -1, 0 379; GCN-DAG: s_cmp_lg_u32 [[IDX]], 10 380; GCN-DAG: s_cselect_b64 [[C10:[^,]+]], -1, 0 381; GCN-DAG: s_cmp_lg_u32 [[IDX]], 11 382; GCN-DAG: s_cselect_b64 [[C11:[^,]+]], -1, 0 383; GCN-DAG: s_cmp_lg_u32 [[IDX]], 12 384; GCN-DAG: s_cselect_b64 [[C12:[^,]+]], -1, 0 385; GCN-DAG: s_cmp_lg_u32 [[IDX]], 13 386; GCN-DAG: s_cselect_b64 [[C13:[^,]+]], -1, 0 387; GCN-DAG: s_cmp_lg_u32 [[IDX]], 14 388; GCN-DAG: s_cselect_b64 [[C14:[^,]+]], -1, 0 389; GCN-DAG: s_cmp_lg_u32 [[IDX]], 15 390; GCN-DAG: s_cselect_b64 [[C15:[^,]+]], -1, 0 391; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] 392; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] 393; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] 394; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] 395; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] 396; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] 397; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] 398; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V8:v[0-9]+]], {{[^,]+}}, [[V7]], [[C8]] 399; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V9:v[0-9]+]], {{[^,]+}}, [[V8]], [[C8]] 400; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V10:v[0-9]+]], {{[^,]+}}, [[V9]], [[C10]] 401; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V11:v[0-9]+]], {{[^,]+}}, [[V10]], [[C11]] 402; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V12:v[0-9]+]], {{[^,]+}}, [[V11]], [[C12]] 403; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V13:v[0-9]+]], {{[^,]+}}, [[V12]], [[C13]] 404; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V14:v[0-9]+]], {{[^,]+}}, [[V13]], [[C14]] 405; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V15:v[0-9]+]], {{[^,]+}}, [[V14]], [[C15]] 406; GCN: store_byte v[{{[0-9:]+}}], [[V15]] 407define amdgpu_kernel void @byte16_extelt(i8 addrspace(1)* %out, i32 %sel) { 408entry: 409 %ext = extractelement <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, i32 %sel 410 store i8 %ext, i8 addrspace(1)* %out 411 ret void 412} 413 414; GCN-LABEL: {{^}}bit4_extelt: 415; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 416; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 417; GCN-DAG: buffer_store_byte [[ZERO]], 418; GCN-DAG: buffer_store_byte [[ONE]], 419; GCN-DAG: buffer_store_byte [[ZERO]], 420; GCN-DAG: buffer_store_byte [[ONE]], 421; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]], 422; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[LOAD]] 423; GCN: flat_store_dword v[{{[0-9:]+}}], [[RES]] 424define amdgpu_kernel void @bit4_extelt(i32 addrspace(1)* %out, i32 %sel) { 425entry: 426 %ext = extractelement <4 x i1> <i1 0, i1 1, i1 0, i1 1>, i32 %sel 427 %zext = zext i1 %ext to i32 428 store i32 %zext, i32 addrspace(1)* %out 429 ret void 430} 431 432; GCN-LABEL: {{^}}bit128_extelt: 433; GCN-NOT: buffer_ 434; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1 435; GCN: s_cmpk_lg_i32 {{s[0-9]+}}, 0x7f 436; GCN: s_cselect_b64 [[CL:[^,]+]], -1, 0 437; GCN: v_cndmask_b32_e{{32|64}} [[VL:v[0-9]+]], 0, [[V1]], [[CL]] 438; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[VL]] 439; GCN: store_dword v[{{[0-9:]+}}], [[RES]] 440define amdgpu_kernel void @bit128_extelt(i32 addrspace(1)* %out, i32 %sel) { 441entry: 442 %ext = extractelement <128 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, i32 %sel 443 %zext = zext i1 %ext to i32 444 store i32 %zext, i32 addrspace(1)* %out 445 ret void 446} 447 448; GCN-LABEL: {{^}}float32_extelt_vec: 449; GCN-NOT: buffer_ 450; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC1:[^,]+]], 1, v0 451; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 1.0, 2.0, [[CC1]] 452; GCN-DAG: v_mov_b32_e32 [[LASTVAL:v[0-9]+]], 0x42000000 453; GCN-DAG: v_cmp_ne_u32_e32 [[LASTCC:[^,]+]], 31, v0 454; GCN-DAG: v_cndmask_b32_e{{32|64}} v0, [[LASTVAL]], v{{[0-9]+}}, [[LASTCC]] 455define float @float32_extelt_vec(i32 %sel) { 456entry: 457 %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel 458 ret float %ext 459} 460 461; GCN-LABEL: {{^}}double16_extelt_vec: 462; GCN-NOT: buffer_ 463; GCN-DAG: v_mov_b32_e32 [[V1HI:v[0-9]+]], 0x3ff19999 464; GCN-DAG: v_mov_b32_e32 [[V1LO:v[0-9]+]], 0x9999999a 465; GCN-DAG: v_mov_b32_e32 [[V2HI:v[0-9]+]], 0x4000cccc 466; GCN-DAG: v_mov_b32_e32 [[V2LO:v[0-9]+]], 0xcccccccd 467; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC1:[^,]+]], 1, v0 468; GCN-DAG: v_cndmask_b32_e{{32|64}} [[R1HI:v[0-9]+]], [[V1HI]], [[V2HI]], [[CC1]] 469; GCN-DAG: v_cndmask_b32_e{{32|64}} [[R1LO:v[0-9]+]], [[V1LO]], [[V2LO]], [[CC1]] 470define double @double16_extelt_vec(i32 %sel) { 471entry: 472 %ext = extractelement <16 x double> <double 1.1, double 2.1, double 3.1, double 4.1, double 5.1, double 6.1, double 7.1, double 8.1, double 9.1, double 10.1, double 11.1, double 12.1, double 13.1, double 14.1, double 15.1, double 16.1>, i32 %sel 473 ret double %ext 474} 475