1; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s 2 3; GCN-LABEL: {{^}}float4_extelt: 4; GCN-NOT: buffer_ 5; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 6; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 7; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 8; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 9; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 10; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 11; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]] 12; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2.0, [[V1]], [[C2]] 13; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4.0, [[V2]], [[C3]] 14; GCN: store_dword v[{{[0-9:]+}}], [[V3]] 15define amdgpu_kernel void @float4_extelt(float addrspace(1)* %out, i32 %sel) { 16entry: 17 %ext = extractelement <4 x float> <float 0.0, float 1.0, float 2.0, float 4.0>, i32 %sel 18 store float %ext, float addrspace(1)* %out 19 ret void 20} 21 22; GCN-LABEL: {{^}}int4_extelt: 23; GCN-NOT: buffer_ 24; GCN-DAG: s_cmp_lg_u32 [[IDX:s[0-9]+]], 2 25; GCN-DAG: s_cmp_eq_u32 [[IDX]], 1 26; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 27; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 28; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1, [[C1]] 29; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2, [[V1]], vcc 30; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4, [[V2]], vcc 31; GCN: store_dword v[{{[0-9:]+}}], [[V3]] 32define amdgpu_kernel void @int4_extelt(i32 addrspace(1)* %out, i32 %sel) { 33entry: 34 %ext = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 4>, i32 %sel 35 store i32 %ext, i32 addrspace(1)* %out 36 ret void 37} 38 39; GCN-LABEL: {{^}}double4_extelt: 40; GCN-NOT: buffer_ 41; GCN-DAG: s_mov_b32 s[[L0LO:[0-9]+]], 0x47ae147b 42; GCN-DAG: s_mov_b32 s[[L0HI:[0-9]+]], 0x3f847ae1 43; GCN-DAG: s_mov_b32 s[[L1LO:[0-9]+]], 0xc28f5c29 44; GCN-DAG: s_mov_b32 s[[L1HI:[0-9]+]], 0x3ff028f5 45; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 46; GCN: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s{{\[}}[[L1LO]]:[[L1HI]]{{\]}}, s{{\[}}[[L0LO]]:[[L0HI]]{{\]}} 47; GCN-DAG: s_mov_b32 s[[L2LO:[0-9]+]], 0xe147ae14 48; GCN-DAG: s_mov_b32 s[[L2HI:[0-9]+]], 0x4000147a 49; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 50; GCN: s_cselect_b64 s{{\[}}[[T1LO:[0-9]+]]:[[T1HI:[0-9]+]]{{\]}}, s{{\[}}[[T0LO]]:[[T0HI]]{{\]}}, s{{\[}}[[L2LO]]:[[L2HI]]{{\]}} 51; GCN-DAG: s_mov_b32 s[[L3LO:[0-9]+]], 0x70a3d70a 52; GCN-DAG: s_mov_b32 s[[L3HI:[0-9]+]], 0x40100a3d 53; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 54; GCN: s_cselect_b64 s{{\[}}[[T2LO:[0-9]+]]:[[T2HI:[0-9]+]]{{\]}}, s{{\[}}[[T1LO]]:[[T1HI]]{{\]}}, s{{\[}}[[L3LO]]:[[L3HI]]{{\]}} 55; GCN-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T2LO]] 56; GCN-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T2HI]] 57; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} 58define amdgpu_kernel void @double4_extelt(double addrspace(1)* %out, i32 %sel) { 59entry: 60 %ext = extractelement <4 x double> <double 0.01, double 1.01, double 2.01, double 4.01>, i32 %sel 61 store double %ext, double addrspace(1)* %out 62 ret void 63} 64 65; GCN-LABEL: {{^}}double5_extelt: 66; GCN-NOT: buffer_ 67; GCN-DAG: s_mov_b32 s[[L0LO:[0-9]+]], 0x47ae147b 68; GCN-DAG: s_mov_b32 s[[L0HI:[0-9]+]], 0x3f847ae1 69; GCN-DAG: s_mov_b32 s[[L1LO:[0-9]+]], 0xc28f5c29 70; GCN-DAG: s_mov_b32 s[[L1HI:[0-9]+]], 0x3ff028f5 71; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 72; GCN: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s{{\[}}[[L1LO]]:[[L1HI]]{{\]}}, s{{\[}}[[L0LO]]:[[L0HI]]{{\]}} 73; GCN-DAG: s_mov_b32 s[[L2LO:[0-9]+]], 0xe147ae14 74; GCN-DAG: s_mov_b32 s[[L2HI:[0-9]+]], 0x4000147a 75; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 76; GCN: s_cselect_b64 s{{\[}}[[T1LO:[0-9]+]]:[[T1HI:[0-9]+]]{{\]}}, s{{\[}}[[T0LO]]:[[T0HI]]{{\]}}, s{{\[}}[[L2LO]]:[[L2HI]]{{\]}} 77; GCN-DAG: s_mov_b32 s[[L3LO:[0-9]+]], 0x70a3d70a 78; GCN-DAG: s_mov_b32 s[[L3HI:[0-9]+]], 0x40100a3d 79; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 80; GCN: s_cselect_b64 s{{\[}}[[T2LO:[0-9]+]]:[[T2HI:[0-9]+]]{{\]}}, s{{\[}}[[T1LO]]:[[T1HI]]{{\]}}, s{{\[}}[[L3LO]]:[[L3HI]]{{\]}} 81; Double literals 5.01 and 4.01 share the same low 32 bits. 82; GCN-DAG: s_mov_b32 s[[L4HI:[0-9]+]], 0x40140a3d 83; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 84; GCN: s_cselect_b64 s{{\[}}[[T3LO:[0-9]+]]:[[T3HI:[0-9]+]]{{\]}}, s{{\[}}[[T2LO]]:[[T2HI]]{{\]}}, s{{\[}}[[L3LO]]:[[L4HI]]{{\]}} 85; GCN-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T3LO]] 86; GCN-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T3HI]] 87; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} 88define amdgpu_kernel void @double5_extelt(double addrspace(1)* %out, i32 %sel) { 89entry: 90 %ext = extractelement <5 x double> <double 0.01, double 1.01, double 2.01, double 4.01, double 5.01>, i32 %sel 91 store double %ext, double addrspace(1)* %out 92 ret void 93} 94 95; GCN-LABEL: {{^}}half4_extelt: 96; GCN-NOT: buffer_ 97; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x40003c00 98; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x44004200 99; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 4 100; GCN: s_lshr_b64 s{{\[}}[[RL:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SL]]:[[SH]]], [[SEL]] 101; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]] 102; GCN: store_short v[{{[0-9:]+}}], v[[VRL]] 103define amdgpu_kernel void @half4_extelt(half addrspace(1)* %out, i32 %sel) { 104entry: 105 %ext = extractelement <4 x half> <half 1.0, half 2.0, half 3.0, half 4.0>, i32 %sel 106 store half %ext, half addrspace(1)* %out 107 ret void 108} 109 110; GCN-LABEL: {{^}}float2_extelt: 111; GCN-NOT: buffer_ 112; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 113; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 114; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]] 115; GCN: store_dword v[{{[0-9:]+}}], [[V1]] 116define amdgpu_kernel void @float2_extelt(float addrspace(1)* %out, i32 %sel) { 117entry: 118 %ext = extractelement <2 x float> <float 0.0, float 1.0>, i32 %sel 119 store float %ext, float addrspace(1)* %out 120 ret void 121} 122 123; GCN-LABEL: {{^}}double2_extelt: 124; GCN-NOT: buffer_ 125; GCN-DAG: s_mov_b32 s[[L0LO:[0-9]+]], 0x47ae147b 126; GCN-DAG: s_mov_b32 s[[L0HI:[0-9]+]], 0x3f847ae1 127; GCN-DAG: s_mov_b32 s[[L1LO:[0-9]+]], 0xc28f5c29 128; GCN-DAG: s_mov_b32 s[[L1HI:[0-9]+]], 0x3ff028f5 129; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 130; GCN: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s{{\[}}[[L1LO]]:[[L1HI]]{{\]}}, s{{\[}}[[L0LO]]:[[L0HI]]{{\]}} 131; GCN-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T0LO]] 132; GCN-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T0HI]] 133; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} 134define amdgpu_kernel void @double2_extelt(double addrspace(1)* %out, i32 %sel) { 135entry: 136 %ext = extractelement <2 x double> <double 0.01, double 1.01>, i32 %sel 137 store double %ext, double addrspace(1)* %out 138 ret void 139} 140 141; GCN-LABEL: {{^}}half8_extelt: 142; GCN-NOT: buffer_ 143; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 144; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 145; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 146; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 147; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 148; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 149; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 150; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 151; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5 152; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0 153; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6 154; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0 155; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7 156; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0 157; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] 158; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] 159; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] 160; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] 161; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] 162; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] 163; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] 164; GCN: store_short v[{{[0-9:]+}}], [[V7]] 165define amdgpu_kernel void @half8_extelt(half addrspace(1)* %out, i32 %sel) { 166entry: 167 %ext = extractelement <8 x half> <half 1.0, half 2.0, half 3.0, half 4.0, half 5.0, half 6.0, half 7.0, half 8.0>, i32 %sel 168 store half %ext, half addrspace(1)* %out 169 ret void 170} 171 172; GCN-LABEL: {{^}}short8_extelt: 173; GCN-NOT: buffer_ 174; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 175; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 176; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 177; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 178; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 179; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 180; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 181; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 182; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5 183; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0 184; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6 185; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0 186; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7 187; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0 188; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] 189; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] 190; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] 191; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] 192; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] 193; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] 194; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] 195; GCN: store_short v[{{[0-9:]+}}], [[V7]] 196define amdgpu_kernel void @short8_extelt(i16 addrspace(1)* %out, i32 %sel) { 197entry: 198 %ext = extractelement <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i32 %sel 199 store i16 %ext, i16 addrspace(1)* %out 200 ret void 201} 202 203; GCN-LABEL: {{^}}float8_extelt: 204; GCN-NOT: buffer_ 205; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 206; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 207; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 208; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 209; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 210; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 211; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 212; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 213; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5 214; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0 215; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6 216; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0 217; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7 218; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0 219; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] 220; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] 221; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] 222; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] 223; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] 224; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] 225; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] 226; GCN: store_dword v[{{[0-9:]+}}], [[V7]] 227define amdgpu_kernel void @float8_extelt(float addrspace(1)* %out, i32 %sel) { 228entry: 229 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel 230 store float %ext, float addrspace(1)* %out 231 ret void 232} 233 234; GCN-LABEL: {{^}}double8_extelt: 235; GCN-NOT: buffer_ 236; GCN-NOT: s_or_b32 237; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} 238; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] 239; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 240; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] 241; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] 242; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]] 243define amdgpu_kernel void @double8_extelt(double addrspace(1)* %out, i32 %sel) { 244entry: 245 %ext = extractelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, i32 %sel 246 store double %ext, double addrspace(1)* %out 247 ret void 248} 249 250; GCN-LABEL: {{^}}double7_extelt: 251; GCN-NOT: buffer_ 252; GCN-NOT: s_or_b32 253; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} 254; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] 255; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 256; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] 257; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] 258; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]] 259define amdgpu_kernel void @double7_extelt(double addrspace(1)* %out, i32 %sel) { 260entry: 261 %ext = extractelement <7 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>, i32 %sel 262 store double %ext, double addrspace(1)* %out 263 ret void 264} 265 266; GCN-LABEL: {{^}}float16_extelt: 267; GCN-NOT: buffer_ 268; GCN-DAG: s_mov_b32 m0, 269; GCN-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], 1.0 270; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 271; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 272; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 273; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000 274; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000 275; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000 276; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000 277; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41100000 278; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41200000 279; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41300000 280; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41400000 281; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41500000 282; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41600000 283; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41700000 284; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41800000 285; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], [[VLO]] 286; GCN: store_dword v[{{[0-9:]+}}], [[RES]] 287define amdgpu_kernel void @float16_extelt(float addrspace(1)* %out, i32 %sel) { 288entry: 289 %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel 290 store float %ext, float addrspace(1)* %out 291 ret void 292} 293 294; GCN-LABEL: {{^}}double15_extelt: 295; GCN-NOT: buffer_ 296; GCN-NOT: s_or_b32 297; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} 298; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] 299; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 300; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] 301; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] 302; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]] 303define amdgpu_kernel void @double15_extelt(double addrspace(1)* %out, i32 %sel) { 304entry: 305 %ext = extractelement <15 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0>, i32 %sel 306 store double %ext, double addrspace(1)* %out 307 ret void 308} 309 310; GCN-LABEL: {{^}}double16_extelt: 311; GCN-NOT: buffer_ 312; GCN-NOT: s_or_b32 313; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} 314; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] 315; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 316; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] 317; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] 318; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]] 319define amdgpu_kernel void @double16_extelt(double addrspace(1)* %out, i32 %sel) { 320entry: 321 %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel 322 store double %ext, double addrspace(1)* %out 323 ret void 324} 325 326; GCN-LABEL: {{^}}float32_extelt: 327; GCN-NOT: buffer_ 328; GCN-DAG: s_mov_b32 m0, 329; GCN-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], 1.0 330; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 331; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 332; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 333; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000 334; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000 335; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000 336; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000 337; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41100000 338; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41200000 339; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41300000 340; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41400000 341; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41500000 342; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41600000 343; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41700000 344; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41800000 345; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41880000 346; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000 347; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41980000 348; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a00000 349; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a80000 350; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b00000 351; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b80000 352; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41c00000 353; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41c80000 354; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41d00000 355; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41d80000 356; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41e00000 357; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41e80000 358; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41f00000 359; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41f80000 360; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x42000000 361; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], [[VLO]] 362; GCN: store_dword v[{{[0-9:]+}}], [[RES]] 363define amdgpu_kernel void @float32_extelt(float addrspace(1)* %out, i32 %sel) { 364entry: 365 %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel 366 store float %ext, float addrspace(1)* %out 367 ret void 368} 369 370; GCN-LABEL: {{^}}byte8_extelt: 371; GCN-NOT: buffer_ 372; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x4030201 373; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x8070605 374; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 3 375; GCN: s_lshr_b64 s{{\[}}[[RL:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SL]]:[[SH]]], [[SEL]] 376; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]] 377; GCN: store_byte v[{{[0-9:]+}}], v[[VRL]] 378define amdgpu_kernel void @byte8_extelt(i8 addrspace(1)* %out, i32 %sel) { 379entry: 380 %ext = extractelement <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i32 %sel 381 store i8 %ext, i8 addrspace(1)* %out 382 ret void 383} 384 385; GCN-LABEL: {{^}}byte16_extelt: 386; GCN-NOT: buffer_ 387; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 388; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 389; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 390; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 391; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 392; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 393; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 394; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 395; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5 396; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0 397; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6 398; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0 399; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7 400; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0 401; GCN-DAG: s_cmp_lg_u32 [[IDX]], 8 402; GCN-DAG: s_cselect_b64 [[C8:[^,]+]], -1, 0 403; GCN-DAG: s_cmp_lg_u32 [[IDX]], 9 404; GCN-DAG: s_cselect_b64 [[C9:[^,]+]], -1, 0 405; GCN-DAG: s_cmp_lg_u32 [[IDX]], 10 406; GCN-DAG: s_cselect_b64 [[C10:[^,]+]], -1, 0 407; GCN-DAG: s_cmp_lg_u32 [[IDX]], 11 408; GCN-DAG: s_cselect_b64 [[C11:[^,]+]], -1, 0 409; GCN-DAG: s_cmp_lg_u32 [[IDX]], 12 410; GCN-DAG: s_cselect_b64 [[C12:[^,]+]], -1, 0 411; GCN-DAG: s_cmp_lg_u32 [[IDX]], 13 412; GCN-DAG: s_cselect_b64 [[C13:[^,]+]], -1, 0 413; GCN-DAG: s_cmp_lg_u32 [[IDX]], 14 414; GCN-DAG: s_cselect_b64 [[C14:[^,]+]], -1, 0 415; GCN-DAG: s_cmp_lg_u32 [[IDX]], 15 416; GCN-DAG: s_cselect_b64 [[C15:[^,]+]], -1, 0 417; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] 418; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] 419; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] 420; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] 421; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] 422; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] 423; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] 424; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V8:v[0-9]+]], {{[^,]+}}, [[V7]], [[C8]] 425; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V9:v[0-9]+]], {{[^,]+}}, [[V8]], [[C8]] 426; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V10:v[0-9]+]], {{[^,]+}}, [[V9]], [[C10]] 427; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V11:v[0-9]+]], {{[^,]+}}, [[V10]], [[C11]] 428; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V12:v[0-9]+]], {{[^,]+}}, [[V11]], [[C12]] 429; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V13:v[0-9]+]], {{[^,]+}}, [[V12]], [[C13]] 430; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V14:v[0-9]+]], {{[^,]+}}, [[V13]], [[C14]] 431; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V15:v[0-9]+]], {{[^,]+}}, [[V14]], [[C15]] 432; GCN: store_byte v[{{[0-9:]+}}], [[V15]] 433define amdgpu_kernel void @byte16_extelt(i8 addrspace(1)* %out, i32 %sel) { 434entry: 435 %ext = extractelement <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, i32 %sel 436 store i8 %ext, i8 addrspace(1)* %out 437 ret void 438} 439 440; GCN-LABEL: {{^}}bit4_extelt: 441; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 442; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 443; GCN-DAG: buffer_store_byte [[ZERO]], 444; GCN-DAG: buffer_store_byte [[ONE]], 445; GCN-DAG: buffer_store_byte [[ZERO]], 446; GCN-DAG: buffer_store_byte [[ONE]], 447; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]], 448; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[LOAD]] 449; GCN: flat_store_dword v[{{[0-9:]+}}], [[RES]] 450define amdgpu_kernel void @bit4_extelt(i32 addrspace(1)* %out, i32 %sel) { 451entry: 452 %ext = extractelement <4 x i1> <i1 0, i1 1, i1 0, i1 1>, i32 %sel 453 %zext = zext i1 %ext to i32 454 store i32 %zext, i32 addrspace(1)* %out 455 ret void 456} 457 458; GCN-LABEL: {{^}}bit128_extelt: 459; GCN-NOT: buffer_ 460; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1 461; GCN: s_cmpk_lg_i32 {{s[0-9]+}}, 0x7f 462; GCN: s_cselect_b64 [[CL:[^,]+]], -1, 0 463; GCN: v_cndmask_b32_e{{32|64}} [[VL:v[0-9]+]], 0, [[V1]], [[CL]] 464; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[VL]] 465; GCN: store_dword v[{{[0-9:]+}}], [[RES]] 466define amdgpu_kernel void @bit128_extelt(i32 addrspace(1)* %out, i32 %sel) { 467entry: 468 %ext = extractelement <128 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, i32 %sel 469 %zext = zext i1 %ext to i32 470 store i32 %zext, i32 addrspace(1)* %out 471 ret void 472} 473 474; GCN-LABEL: {{^}}float32_extelt_vec: 475; GCN-NOT: buffer_ 476; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC1:[^,]+]], 1, v0 477; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 1.0, 2.0, [[CC1]] 478; GCN-DAG: v_mov_b32_e32 [[LASTVAL:v[0-9]+]], 0x42000000 479; GCN-DAG: v_cmp_ne_u32_e32 [[LASTCC:[^,]+]], 31, v0 480; GCN-DAG: v_cndmask_b32_e{{32|64}} v0, [[LASTVAL]], v{{[0-9]+}}, [[LASTCC]] 481define float @float32_extelt_vec(i32 %sel) { 482entry: 483 %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel 484 ret float %ext 485} 486 487; GCN-LABEL: {{^}}double16_extelt_vec: 488; GCN-NOT: buffer_ 489; GCN-DAG: v_mov_b32_e32 [[V1HI:v[0-9]+]], 0x3ff19999 490; GCN-DAG: v_mov_b32_e32 [[V1LO:v[0-9]+]], 0x9999999a 491; GCN-DAG: v_mov_b32_e32 [[V2HI:v[0-9]+]], 0x4000cccc 492; GCN-DAG: v_mov_b32_e32 [[V2LO:v[0-9]+]], 0xcccccccd 493; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC1:[^,]+]], 1, v0 494; GCN-DAG: v_cndmask_b32_e{{32|64}} [[R1HI:v[0-9]+]], [[V1HI]], [[V2HI]], [[CC1]] 495; GCN-DAG: v_cndmask_b32_e{{32|64}} [[R1LO:v[0-9]+]], [[V1LO]], [[V2LO]], [[CC1]] 496define double @double16_extelt_vec(i32 %sel) { 497entry: 498 %ext = extractelement <16 x double> <double 1.1, double 2.1, double 3.1, double 4.1, double 5.1, double 6.1, double 7.1, double 8.1, double 9.1, double 10.1, double 11.1, double 12.1, double 13.1, double 14.1, double 15.1, double 16.1>, i32 %sel 499 ret double %ext 500} 501