1; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s 2 3; GCN-LABEL: {{^}}float4_extelt: 4; GCN-NOT: buffer_ 5; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 6; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 7; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 8; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 9; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 10; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 11; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]] 12; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2.0, [[V1]], [[C2]] 13; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4.0, [[V2]], [[C3]] 14; GCN: store_dword v[{{[0-9:]+}}], [[V3]] 15define amdgpu_kernel void @float4_extelt(float addrspace(1)* %out, i32 %sel) { 16entry: 17 %ext = extractelement <4 x float> <float 0.0, float 1.0, float 2.0, float 4.0>, i32 %sel 18 store float %ext, float addrspace(1)* %out 19 ret void 20} 21 22; GCN-LABEL: {{^}}int4_extelt: 23; GCN-NOT: buffer_ 24; GCN-DAG: s_cmp_lg_u32 [[IDX:s[0-9]+]], 2 25; GCN-DAG: s_cmp_eq_u32 [[IDX]], 1 26; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 27; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 28; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1, [[C1]] 29; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2, [[V1]], vcc 30; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4, [[V2]], vcc 31; GCN: store_dword v[{{[0-9:]+}}], [[V3]] 32define amdgpu_kernel void @int4_extelt(i32 addrspace(1)* %out, i32 %sel) { 33entry: 34 %ext = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 4>, i32 %sel 35 store i32 %ext, i32 addrspace(1)* %out 36 ret void 37} 38 39; GCN-LABEL: {{^}}double4_extelt: 40; GCN-NOT: buffer_ 41; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 42; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 43; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 44; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 45; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3 46; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 47; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] 48; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C2]] 49; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C3]] 50; GCN: store_dwordx2 v[{{[0-9:]+}}] 51define amdgpu_kernel void @double4_extelt(double addrspace(1)* %out, i32 %sel) { 52entry: 53 %ext = extractelement <4 x double> <double 0.01, double 1.01, double 2.01, double 4.01>, i32 %sel 54 store double %ext, double addrspace(1)* %out 55 ret void 56} 57 58; GCN-LABEL: {{^}}double5_extelt: 59; GCN-NOT: buffer_ 60; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 61; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 62; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 63; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 64; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3 65; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 66; GCN-DAG: s_cmp_eq_u32 [[IDX]], 4 67; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 68; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] 69; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C2]] 70; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C3]] 71; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C4]] 72; GCN: store_dwordx2 v[{{[0-9:]+}}] 73define amdgpu_kernel void @double5_extelt(double addrspace(1)* %out, i32 %sel) { 74entry: 75 %ext = extractelement <5 x double> <double 0.01, double 1.01, double 2.01, double 4.01, double 5.01>, i32 %sel 76 store double %ext, double addrspace(1)* %out 77 ret void 78} 79 80; GCN-LABEL: {{^}}half4_extelt: 81; GCN-NOT: buffer_ 82; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x40003c00 83; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x44004200 84; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 4 85; GCN: s_lshr_b64 s{{\[}}[[RL:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SL]]:[[SH]]], [[SEL]] 86; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]] 87; GCN: store_short v[{{[0-9:]+}}], v[[VRL]] 88define amdgpu_kernel void @half4_extelt(half addrspace(1)* %out, i32 %sel) { 89entry: 90 %ext = extractelement <4 x half> <half 1.0, half 2.0, half 3.0, half 4.0>, i32 %sel 91 store half %ext, half addrspace(1)* %out 92 ret void 93} 94 95; GCN-LABEL: {{^}}float2_extelt: 96; GCN-NOT: buffer_ 97; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 98; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 99; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]] 100; GCN: store_dword v[{{[0-9:]+}}], [[V1]] 101define amdgpu_kernel void @float2_extelt(float addrspace(1)* %out, i32 %sel) { 102entry: 103 %ext = extractelement <2 x float> <float 0.0, float 1.0>, i32 %sel 104 store float %ext, float addrspace(1)* %out 105 ret void 106} 107 108; GCN-LABEL: {{^}}double2_extelt: 109; GCN-NOT: buffer_ 110; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 111; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 112; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] 113; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] 114; GCN: store_dwordx2 v[{{[0-9:]+}}] 115define amdgpu_kernel void @double2_extelt(double addrspace(1)* %out, i32 %sel) { 116entry: 117 %ext = extractelement <2 x double> <double 0.01, double 1.01>, i32 %sel 118 store double %ext, double addrspace(1)* %out 119 ret void 120} 121 122; GCN-LABEL: {{^}}half8_extelt: 123; GCN-NOT: buffer_ 124; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 125; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 126; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 127; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 128; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 129; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 130; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 131; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 132; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5 133; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0 134; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6 135; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0 136; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7 137; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0 138; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] 139; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] 140; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] 141; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] 142; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] 143; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] 144; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] 145; GCN: store_short v[{{[0-9:]+}}], [[V7]] 146define amdgpu_kernel void @half8_extelt(half addrspace(1)* %out, i32 %sel) { 147entry: 148 %ext = extractelement <8 x half> <half 1.0, half 2.0, half 3.0, half 4.0, half 5.0, half 6.0, half 7.0, half 8.0>, i32 %sel 149 store half %ext, half addrspace(1)* %out 150 ret void 151} 152 153; GCN-LABEL: {{^}}short8_extelt: 154; GCN-NOT: buffer_ 155; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 156; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 157; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 158; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 159; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 160; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 161; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 162; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 163; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5 164; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0 165; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6 166; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0 167; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7 168; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0 169; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] 170; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] 171; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] 172; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] 173; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] 174; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] 175; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] 176; GCN: store_short v[{{[0-9:]+}}], [[V7]] 177define amdgpu_kernel void @short8_extelt(i16 addrspace(1)* %out, i32 %sel) { 178entry: 179 %ext = extractelement <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i32 %sel 180 store i16 %ext, i16 addrspace(1)* %out 181 ret void 182} 183 184; GCN-LABEL: {{^}}float8_extelt: 185; GCN-NOT: buffer_ 186; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 187; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 188; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 189; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 190; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 191; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 192; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 193; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 194; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5 195; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0 196; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6 197; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0 198; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7 199; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0 200; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] 201; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] 202; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] 203; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] 204; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] 205; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] 206; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] 207; GCN: store_dword v[{{[0-9:]+}}], [[V7]] 208define amdgpu_kernel void @float8_extelt(float addrspace(1)* %out, i32 %sel) { 209entry: 210 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel 211 store float %ext, float addrspace(1)* %out 212 ret void 213} 214 215; GCN-LABEL: {{^}}double8_extelt: 216; GCN-NOT: buffer_ 217; GCN-NOT: s_or_b32 218; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} 219; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] 220; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 221; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] 222; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] 223; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]] 224define amdgpu_kernel void @double8_extelt(double addrspace(1)* %out, i32 %sel) { 225entry: 226 %ext = extractelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, i32 %sel 227 store double %ext, double addrspace(1)* %out 228 ret void 229} 230 231; GCN-LABEL: {{^}}double7_extelt: 232; GCN-NOT: buffer_ 233; GCN-NOT: s_or_b32 234; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} 235; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] 236; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 237; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] 238; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] 239; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]] 240define amdgpu_kernel void @double7_extelt(double addrspace(1)* %out, i32 %sel) { 241entry: 242 %ext = extractelement <7 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>, i32 %sel 243 store double %ext, double addrspace(1)* %out 244 ret void 245} 246 247; GCN-LABEL: {{^}}float16_extelt: 248; GCN-NOT: buffer_ 249; GCN-DAG: s_mov_b32 m0, 250; GCN-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], 1.0 251; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 252; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 253; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 254; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000 255; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000 256; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000 257; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000 258; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41100000 259; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41200000 260; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41300000 261; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41400000 262; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41500000 263; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41600000 264; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41700000 265; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41800000 266; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], [[VLO]] 267; GCN: store_dword v[{{[0-9:]+}}], [[RES]] 268define amdgpu_kernel void @float16_extelt(float addrspace(1)* %out, i32 %sel) { 269entry: 270 %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel 271 store float %ext, float addrspace(1)* %out 272 ret void 273} 274 275; GCN-LABEL: {{^}}double15_extelt: 276; GCN-NOT: buffer_ 277; GCN-NOT: s_or_b32 278; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} 279; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] 280; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 281; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] 282; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] 283; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]] 284define amdgpu_kernel void @double15_extelt(double addrspace(1)* %out, i32 %sel) { 285entry: 286 %ext = extractelement <15 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0>, i32 %sel 287 store double %ext, double addrspace(1)* %out 288 ret void 289} 290 291; GCN-LABEL: {{^}}double16_extelt: 292; GCN-NOT: buffer_ 293; GCN-NOT: s_or_b32 294; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} 295; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] 296; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 297; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] 298; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] 299; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]] 300define amdgpu_kernel void @double16_extelt(double addrspace(1)* %out, i32 %sel) { 301entry: 302 %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel 303 store double %ext, double addrspace(1)* %out 304 ret void 305} 306 307; GCN-LABEL: {{^}}float32_extelt: 308; GCN-NOT: buffer_ 309; GCN-DAG: s_mov_b32 m0, 310; GCN-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], 1.0 311; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 312; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 313; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 314; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000 315; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000 316; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000 317; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000 318; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41100000 319; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41200000 320; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41300000 321; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41400000 322; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41500000 323; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41600000 324; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41700000 325; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41800000 326; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41880000 327; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000 328; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41980000 329; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a00000 330; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a80000 331; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b00000 332; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b80000 333; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41c00000 334; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41c80000 335; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41d00000 336; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41d80000 337; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41e00000 338; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41e80000 339; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41f00000 340; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41f80000 341; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x42000000 342; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], [[VLO]] 343; GCN: store_dword v[{{[0-9:]+}}], [[RES]] 344define amdgpu_kernel void @float32_extelt(float addrspace(1)* %out, i32 %sel) { 345entry: 346 %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel 347 store float %ext, float addrspace(1)* %out 348 ret void 349} 350 351; GCN-LABEL: {{^}}byte8_extelt: 352; GCN-NOT: buffer_ 353; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x4030201 354; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x8070605 355; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 3 356; GCN: s_lshr_b64 s{{\[}}[[RL:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SL]]:[[SH]]], [[SEL]] 357; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]] 358; GCN: store_byte v[{{[0-9:]+}}], v[[VRL]] 359define amdgpu_kernel void @byte8_extelt(i8 addrspace(1)* %out, i32 %sel) { 360entry: 361 %ext = extractelement <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i32 %sel 362 store i8 %ext, i8 addrspace(1)* %out 363 ret void 364} 365 366; GCN-LABEL: {{^}}byte16_extelt: 367; GCN-NOT: buffer_ 368; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 369; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 370; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 371; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 372; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 373; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 374; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 375; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 376; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5 377; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0 378; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6 379; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0 380; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7 381; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0 382; GCN-DAG: s_cmp_lg_u32 [[IDX]], 8 383; GCN-DAG: s_cselect_b64 [[C8:[^,]+]], -1, 0 384; GCN-DAG: s_cmp_lg_u32 [[IDX]], 9 385; GCN-DAG: s_cselect_b64 [[C9:[^,]+]], -1, 0 386; GCN-DAG: s_cmp_lg_u32 [[IDX]], 10 387; GCN-DAG: s_cselect_b64 [[C10:[^,]+]], -1, 0 388; GCN-DAG: s_cmp_lg_u32 [[IDX]], 11 389; GCN-DAG: s_cselect_b64 [[C11:[^,]+]], -1, 0 390; GCN-DAG: s_cmp_lg_u32 [[IDX]], 12 391; GCN-DAG: s_cselect_b64 [[C12:[^,]+]], -1, 0 392; GCN-DAG: s_cmp_lg_u32 [[IDX]], 13 393; GCN-DAG: s_cselect_b64 [[C13:[^,]+]], -1, 0 394; GCN-DAG: s_cmp_lg_u32 [[IDX]], 14 395; GCN-DAG: s_cselect_b64 [[C14:[^,]+]], -1, 0 396; GCN-DAG: s_cmp_lg_u32 [[IDX]], 15 397; GCN-DAG: s_cselect_b64 [[C15:[^,]+]], -1, 0 398; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] 399; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] 400; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] 401; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] 402; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] 403; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] 404; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] 405; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V8:v[0-9]+]], {{[^,]+}}, [[V7]], [[C8]] 406; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V9:v[0-9]+]], {{[^,]+}}, [[V8]], [[C8]] 407; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V10:v[0-9]+]], {{[^,]+}}, [[V9]], [[C10]] 408; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V11:v[0-9]+]], {{[^,]+}}, [[V10]], [[C11]] 409; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V12:v[0-9]+]], {{[^,]+}}, [[V11]], [[C12]] 410; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V13:v[0-9]+]], {{[^,]+}}, [[V12]], [[C13]] 411; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V14:v[0-9]+]], {{[^,]+}}, [[V13]], [[C14]] 412; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V15:v[0-9]+]], {{[^,]+}}, [[V14]], [[C15]] 413; GCN: store_byte v[{{[0-9:]+}}], [[V15]] 414define amdgpu_kernel void @byte16_extelt(i8 addrspace(1)* %out, i32 %sel) { 415entry: 416 %ext = extractelement <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, i32 %sel 417 store i8 %ext, i8 addrspace(1)* %out 418 ret void 419} 420 421; GCN-LABEL: {{^}}bit4_extelt: 422; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 423; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 424; GCN-DAG: buffer_store_byte [[ZERO]], 425; GCN-DAG: buffer_store_byte [[ONE]], 426; GCN-DAG: buffer_store_byte [[ZERO]], 427; GCN-DAG: buffer_store_byte [[ONE]], 428; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]], 429; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[LOAD]] 430; GCN: flat_store_dword v[{{[0-9:]+}}], [[RES]] 431define amdgpu_kernel void @bit4_extelt(i32 addrspace(1)* %out, i32 %sel) { 432entry: 433 %ext = extractelement <4 x i1> <i1 0, i1 1, i1 0, i1 1>, i32 %sel 434 %zext = zext i1 %ext to i32 435 store i32 %zext, i32 addrspace(1)* %out 436 ret void 437} 438 439; GCN-LABEL: {{^}}bit128_extelt: 440; GCN-NOT: buffer_ 441; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1 442; GCN: s_cmpk_lg_i32 {{s[0-9]+}}, 0x7f 443; GCN: s_cselect_b64 [[CL:[^,]+]], -1, 0 444; GCN: v_cndmask_b32_e{{32|64}} [[VL:v[0-9]+]], 0, [[V1]], [[CL]] 445; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[VL]] 446; GCN: store_dword v[{{[0-9:]+}}], [[RES]] 447define amdgpu_kernel void @bit128_extelt(i32 addrspace(1)* %out, i32 %sel) { 448entry: 449 %ext = extractelement <128 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, i32 %sel 450 %zext = zext i1 %ext to i32 451 store i32 %zext, i32 addrspace(1)* %out 452 ret void 453} 454 455; GCN-LABEL: {{^}}float32_extelt_vec: 456; GCN-NOT: buffer_ 457; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC1:[^,]+]], 1, v0 458; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 1.0, 2.0, [[CC1]] 459; GCN-DAG: v_mov_b32_e32 [[LASTVAL:v[0-9]+]], 0x42000000 460; GCN-DAG: v_cmp_ne_u32_e32 [[LASTCC:[^,]+]], 31, v0 461; GCN-DAG: v_cndmask_b32_e{{32|64}} v0, [[LASTVAL]], v{{[0-9]+}}, [[LASTCC]] 462define float @float32_extelt_vec(i32 %sel) { 463entry: 464 %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel 465 ret float %ext 466} 467 468; GCN-LABEL: {{^}}double16_extelt_vec: 469; GCN-NOT: buffer_ 470; GCN-DAG: v_mov_b32_e32 [[V1HI:v[0-9]+]], 0x3ff19999 471; GCN-DAG: v_mov_b32_e32 [[V1LO:v[0-9]+]], 0x9999999a 472; GCN-DAG: v_mov_b32_e32 [[V2HI:v[0-9]+]], 0x4000cccc 473; GCN-DAG: v_mov_b32_e32 [[V2LO:v[0-9]+]], 0xcccccccd 474; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC1:[^,]+]], 1, v0 475; GCN-DAG: v_cndmask_b32_e{{32|64}} [[R1HI:v[0-9]+]], [[V1HI]], [[V2HI]], [[CC1]] 476; GCN-DAG: v_cndmask_b32_e{{32|64}} [[R1LO:v[0-9]+]], [[V1LO]], [[V2LO]], [[CC1]] 477define double @double16_extelt_vec(i32 %sel) { 478entry: 479 %ext = extractelement <16 x double> <double 1.1, double 2.1, double 3.1, double 4.1, double 5.1, double 6.1, double 7.1, double 8.1, double 9.1, double 10.1, double 11.1, double 12.1, double 13.1, double 14.1, double 15.1, double 16.1>, i32 %sel 480 ret double %ext 481} 482