1; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s 2 3; GCN-LABEL: {{^}}float4_inselt: 4; GCN-NOT: v_movrel 5; GCN-NOT: buffer_ 6; GCN-DAG: s_cmp_lg_u32 [[IDX:s[0-9]+]], 3 7; GCN-DAG: s_cselect_b64 [[CC1:[^,]+]], -1, 0 8; GCN-DAG: v_cndmask_b32_e32 v[[ELT_LAST:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC1]] 9; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 10; GCN-DAG: s_cselect_b64 [[CC2:[^,]+]], -1, 0 11; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC2]] 12; GCN-DAG: s_cmp_lg_u32 [[IDX]], 1 13; GCN-DAG: s_cselect_b64 [[CC3:[^,]+]], -1, 0 14; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC3]] 15; GCN-DAG: s_cmp_lg_u32 [[IDX]], 0 16; GCN-DAG: s_cselect_b64 [[CC4:[^,]+]], -1, 0 17; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC4]] 18; GCN: flat_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST]]:[[ELT_LAST]]] 19define amdgpu_kernel void @float4_inselt(<4 x float> addrspace(1)* %out, <4 x float> %vec, i32 %sel) { 20entry: 21 %v = insertelement <4 x float> %vec, float 1.000000e+00, i32 %sel 22 store <4 x float> %v, <4 x float> addrspace(1)* %out 23 ret void 24} 25 26; GCN-LABEL: {{^}}float4_inselt_undef: 27; GCN-NOT: v_movrel 28; GCN-NOT: buffer_ 29; GCN-NOT: v_cmp_ 30; GCN-NOT: v_cndmask_ 31; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 32; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[ONE]] 33; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[ONE]] 34; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[ONE]] 35define amdgpu_kernel void @float4_inselt_undef(<4 x float> addrspace(1)* %out, i32 %sel) { 36entry: 37 %v = insertelement <4 x float> undef, float 1.000000e+00, i32 %sel 38 store <4 x float> %v, <4 x float> addrspace(1)* %out 39 ret void 40} 41 42; GCN-LABEL: {{^}}int4_inselt: 43; GCN-NOT: v_movrel 44; GCN-NOT: buffer_ 45; GCN-DAG: s_cmp_lg_u32 [[IDX:s[0-9]+]], 3 46; GCN-DAG: s_cselect_b32 s[[ELT_3:[0-9]+]], s{{[0-9]+}}, 1 47; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 48; GCN-DAG: s_cselect_b32 s[[ELT_2:[0-9]+]], s{{[0-9]+}}, 1 49; GCN-DAG: s_cmp_lg_u32 [[IDX]], 1 50; GCN-DAG: s_cselect_b32 s[[ELT_1:[0-9]+]], s{{[0-9]+}}, 1 51; GCN-DAG: s_cmp_lg_u32 [[IDX]], 0 52; GCN-DAG: s_cselect_b32 s[[ELT_0:[0-9]+]], s{{[0-9]+}}, 1 53; GCN-DAG: v_mov_b32_e32 v[[VELT_0:[0-9]+]], s[[ELT_0]] 54; GCN-DAG: v_mov_b32_e32 v[[VELT_1:[0-9]+]], s[[ELT_1]] 55; GCN-DAG: v_mov_b32_e32 v[[VELT_2:[0-9]+]], s[[ELT_2]] 56; GCN-DAG: v_mov_b32_e32 v[[VELT_3:[0-9]+]], s[[ELT_3]] 57; GCN: flat_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[VELT_0]]:[[VELT_3]]] 58define amdgpu_kernel void @int4_inselt(<4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %sel) { 59entry: 60 %v = insertelement <4 x i32> %vec, i32 1, i32 %sel 61 store <4 x i32> %v, <4 x i32> addrspace(1)* %out 62 ret void 63} 64 65; GCN-LABEL: {{^}}float2_inselt: 66; GCN-NOT: v_movrel 67; GCN-NOT: buffer_ 68; GCN-DAG: s_cmp_lg_u32 [[IDX:s[0-9]+]], 1 69; GCN-DAG: s_cselect_b64 [[CC1:[^,]+]], -1, 0 70; GCN-DAG: v_cndmask_b32_e32 v[[ELT_LAST:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC1]] 71; GCN-DAG: s_cmp_lg_u32 [[IDX]], 0 72; GCN-DAG: s_cselect_b64 [[CC2:[^,]+]], -1, 0 73; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC2]] 74; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST]]:[[ELT_LAST]]] 75define amdgpu_kernel void @float2_inselt(<2 x float> addrspace(1)* %out, <2 x float> %vec, i32 %sel) { 76entry: 77 %v = insertelement <2 x float> %vec, float 1.000000e+00, i32 %sel 78 store <2 x float> %v, <2 x float> addrspace(1)* %out 79 ret void 80} 81 82; GCN-LABEL: {{^}}float8_inselt: 83; GCN-NOT: v_movrel 84; GCN-NOT: buffer_ 85; GCN-DAG: s_cmp_lg_u32 [[IDX:s[0-9]+]], 3 86; GCN-DAG: s_cselect_b64 [[CC1:[^,]+]], -1, 0 87; GCN-DAG: v_cndmask_b32_e32 v[[ELT_LAST0:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC1]] 88; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 89; GCN-DAG: s_cselect_b64 [[CC2:[^,]+]], -1, 0 90; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC2]] 91; GCN-DAG: s_cmp_lg_u32 [[IDX]], 1 92; GCN-DAG: s_cselect_b64 [[CC3:[^,]+]], -1, 0 93; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC3]] 94; GCN-DAG: s_cmp_lg_u32 [[IDX]], 0 95; GCN-DAG: s_cselect_b64 [[CC4:[^,]+]], -1, 0 96; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST0:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC4]] 97; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7 98; GCN-DAG: s_cselect_b64 [[CC5:[^,]+]], -1, 0 99; GCN-DAG: v_cndmask_b32_e32 v[[ELT_LAST1:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC5]] 100; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6 101; GCN-DAG: s_cselect_b64 [[CC6:[^,]+]], -1, 0 102; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC6]] 103; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5 104; GCN-DAG: s_cselect_b64 [[CC7:[^,]+]], -1, 0 105; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC7]] 106; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 107; GCN-DAG: s_cselect_b64 [[CC8:[^,]+]], -1, 0 108; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST1:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC8]] 109; GCN-DAG: flat_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST0]]:[[ELT_LAST0]]] 110; GCN-DAG: flat_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST1]]:[[ELT_LAST1]]] 111define amdgpu_kernel void @float8_inselt(<8 x float> addrspace(1)* %out, <8 x float> %vec, i32 %sel) { 112entry: 113 %v = insertelement <8 x float> %vec, float 1.000000e+00, i32 %sel 114 store <8 x float> %v, <8 x float> addrspace(1)* %out 115 ret void 116} 117 118; GCN-LABEL: {{^}}float16_inselt: 119; GCN: v_movreld_b32 120define amdgpu_kernel void @float16_inselt(<16 x float> addrspace(1)* %out, <16 x float> %vec, i32 %sel) { 121entry: 122 %v = insertelement <16 x float> %vec, float 1.000000e+00, i32 %sel 123 store <16 x float> %v, <16 x float> addrspace(1)* %out 124 ret void 125} 126 127; GCN-LABEL: {{^}}float32_inselt: 128; GCN: v_movreld_b32 129define amdgpu_kernel void @float32_inselt(<32 x float> addrspace(1)* %out, <32 x float> %vec, i32 %sel) { 130entry: 131 %v = insertelement <32 x float> %vec, float 1.000000e+00, i32 %sel 132 store <32 x float> %v, <32 x float> addrspace(1)* %out 133 ret void 134} 135 136; GCN-LABEL: {{^}}half4_inselt: 137; GCN-NOT: v_cndmask_b32 138; GCN-NOT: v_movrel 139; GCN-NOT: buffer_ 140; GCN: s_lshl_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 4 141; GCN: s_lshl_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], [[SEL]] 142; GCN: s_mov_b32 s[[KLO:[0-9]+]], 0x3c003c00 143; GCN: s_mov_b32 s[[KHI:[0-9]+]], s[[KLO]] 144; GCN: s_andn2_b64 145; GCN: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s{{\[}}[[KLO]]:[[KHI]]] 146; GCN: s_or_b64 147define amdgpu_kernel void @half4_inselt(<4 x half> addrspace(1)* %out, <4 x half> %vec, i32 %sel) { 148entry: 149 %v = insertelement <4 x half> %vec, half 1.000000e+00, i32 %sel 150 store <4 x half> %v, <4 x half> addrspace(1)* %out 151 ret void 152} 153 154; GCN-LABEL: {{^}}half2_inselt: 155; GCN-NOT: v_cndmask_b32 156; GCN-NOT: v_movrel 157; GCN-NOT: buffer_ 158; GCN: s_lshl_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 4 159; GCN: s_lshl_b32 [[V:s[0-9]+]], 0xffff, [[SEL]] 160; GCN: s_andn2_b32 161; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3c003c00 162; GCN: s_or_b32 163define amdgpu_kernel void @half2_inselt(<2 x half> addrspace(1)* %out, <2 x half> %vec, i32 %sel) { 164entry: 165 %v = insertelement <2 x half> %vec, half 1.000000e+00, i32 %sel 166 store <2 x half> %v, <2 x half> addrspace(1)* %out 167 ret void 168} 169 170; GCN-LABEL: {{^}}half8_inselt: 171; GCN-NOT: v_movrel 172; GCN-NOT: buffer_ 173; GCN-DAG: s_cmp_lg_u32 {{s[0-9]+}}, 0 174; GCN-DAG: s_cmp_lg_u32 {{s[0-9]+}}, 1 175; GCN-DAG: s_cmp_lg_u32 {{s[0-9]+}}, 2 176; GCN-DAG: s_cmp_lg_u32 {{s[0-9]+}}, 3 177; GCN-DAG: s_cmp_lg_u32 {{s[0-9]+}}, 4 178; GCN-DAG: s_cmp_lg_u32 {{s[0-9]+}}, 5 179; GCN-DAG: s_cmp_lg_u32 {{s[0-9]+}}, 6 180; GCN-DAG: s_cmp_lg_u32 {{s[0-9]+}}, 7 181; GCN-DAG: v_cndmask_b32_e32 182; GCN-DAG: v_cndmask_b32_e32 183; GCN-DAG: v_cndmask_b32_e32 184; GCN-DAG: v_cndmask_b32_e32 185; GCN-DAG: v_cndmask_b32_e32 186; GCN-DAG: v_cndmask_b32_e32 187; GCN-DAG: v_cndmask_b32_e32 188; GCN-DAG: v_cndmask_b32_e32 189; GCN-DAG: v_or_b32_sdwa 190; GCN-DAG: v_or_b32_sdwa 191; GCN-DAG: v_or_b32_sdwa 192; GCN-DAG: v_or_b32_sdwa 193define amdgpu_kernel void @half8_inselt(<8 x half> addrspace(1)* %out, <8 x half> %vec, i32 %sel) { 194entry: 195 %v = insertelement <8 x half> %vec, half 1.000000e+00, i32 %sel 196 store <8 x half> %v, <8 x half> addrspace(1)* %out 197 ret void 198} 199 200; GCN-LABEL: {{^}}short2_inselt: 201; GCN-NOT: v_cndmask_b32 202; GCN-NOT: v_movrel 203; GCN-NOT: buffer_ 204; GCN: s_lshl_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 4 205; GCN: s_lshl_b32 [[V:s[0-9]+]], 0xffff, [[SEL]] 206; GCN: s_andn2_b32 207; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10001 208; GCN: s_or_b32 209define amdgpu_kernel void @short2_inselt(<2 x i16> addrspace(1)* %out, <2 x i16> %vec, i32 %sel) { 210entry: 211 %v = insertelement <2 x i16> %vec, i16 1, i32 %sel 212 store <2 x i16> %v, <2 x i16> addrspace(1)* %out 213 ret void 214} 215 216; GCN-LABEL: {{^}}short4_inselt: 217; GCN-NOT: v_cndmask_b32 218; GCN-NOT: v_movrel 219; GCN-NOT: buffer_ 220; GCN: s_lshl_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 4 221; GCN: s_lshl_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], [[SEL]] 222; GCN: s_mov_b32 s[[KLO:[0-9]+]], 0x10001 223; GCN: s_mov_b32 s[[KHI:[0-9]+]], s[[KLO]] 224; GCN: s_andn2_b64 225; GCN: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s{{\[}}[[KLO]]:[[KHI]]] 226; GCN: s_or_b64 227define amdgpu_kernel void @short4_inselt(<4 x i16> addrspace(1)* %out, <4 x i16> %vec, i32 %sel) { 228entry: 229 %v = insertelement <4 x i16> %vec, i16 1, i32 %sel 230 store <4 x i16> %v, <4 x i16> addrspace(1)* %out 231 ret void 232} 233 234; GCN-LABEL: {{^}}byte8_inselt: 235; GCN-NOT: v_movrel 236; GCN-NOT: buffer_ 237; GCN: s_lshl_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 3 238; GCN: s_lshl_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], [[SEL]] 239; GCN: s_mov_b32 [[K:s[0-9]+]], 0x1010101 240; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[K]] 241; GCN: s_and_b32 s6, s4, [[K]] 242; GCN: s_andn2_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 243; GCN: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 244define amdgpu_kernel void @byte8_inselt(<8 x i8> addrspace(1)* %out, <8 x i8> %vec, i32 %sel) { 245entry: 246 %v = insertelement <8 x i8> %vec, i8 1, i32 %sel 247 store <8 x i8> %v, <8 x i8> addrspace(1)* %out 248 ret void 249} 250 251; GCN-LABEL: {{^}}byte16_inselt: 252; GCN-NOT: v_movrel 253; GCN-NOT: buffer_ 254; GCN-DAG: s_cmp_lg_u32 {{s[0-9]+}}, 0 255; GCN-DAG: s_cmp_lg_u32 {{s[0-9]+}}, 15 256; GCN-DAG: v_cndmask_b32_e32 257; GCN-DAG: v_cndmask_b32_e32 258; GCN-DAG: v_cndmask_b32_e32 259; GCN-DAG: v_cndmask_b32_e32 260; GCN-DAG: v_cndmask_b32_e32 261; GCN-DAG: v_cndmask_b32_e32 262; GCN-DAG: v_cndmask_b32_e32 263; GCN-DAG: v_cndmask_b32_e32 264; GCN-DAG: v_cndmask_b32_e32 265; GCN-DAG: v_cndmask_b32_e32 266; GCN-DAG: v_cndmask_b32_e32 267; GCN-DAG: v_cndmask_b32_e32 268; GCN-DAG: v_cndmask_b32_e32 269; GCN-DAG: v_cndmask_b32_e32 270; GCN-DAG: v_cndmask_b32_e32 271; GCN-DAG: v_cndmask_b32_e32 272; GCN-DAG: v_or_b32_sdwa 273; GCN-DAG: v_or_b32_sdwa 274; GCN-DAG: v_or_b32_sdwa 275; GCN-DAG: v_or_b32_sdwa 276; GCN-DAG: v_or_b32_sdwa 277; GCN-DAG: v_or_b32_sdwa 278; GCN-DAG: v_or_b32_sdwa 279; GCN-DAG: v_or_b32_sdwa 280define amdgpu_kernel void @byte16_inselt(<16 x i8> addrspace(1)* %out, <16 x i8> %vec, i32 %sel) { 281entry: 282 %v = insertelement <16 x i8> %vec, i8 1, i32 %sel 283 store <16 x i8> %v, <16 x i8> addrspace(1)* %out 284 ret void 285} 286 287; GCN-LABEL: {{^}}double2_inselt: 288; GCN: s_load_dwordx4 s{{\[}}[[FIRST:[0-9]+]]:[[LAST:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}] 289; GCN-NOT: v_movrel 290; GCN-NOT: buffer_ 291; GCN: s_cmp_lg_u32 [[IDX:s[0-9]+]], 1 292; GCN: s_cselect_b64 s{{\[}}[[P0_LO:[0-9]+]]:[[P0_HI:[0-9]+]]{{\]}}, s{{\[}}{{[0-9]+}}:[[LAST]]{{\]}}, 1.0 293; GCN: s_cmp_lg_u32 [[IDX]], 0 294; GCN: s_cselect_b64 s{{\[}}[[P1_LO:[0-9]+]]:[[P1_HI:[0-9]+]]{{\]}}, s{{\[}}[[FIRST]]:{{[0-9]+}}{{\]}}, 1.0 295; GCN: v_mov_b32_e32 v[[V_FIRST:[0-9]+]], s[[P1_LO]] 296; GCN: v_mov_b32_e32 v[[V_SECOND:[0-9]+]], s[[P1_HI]] 297; GCN: v_mov_b32_e32 v[[V_THIRD:[0-9]+]], s[[P0_LO]] 298; GCN: v_mov_b32_e32 v[[V_LAST:[0-9]+]], s[[P0_HI]] 299; GCN: flat_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[V_FIRST]]:[[V_LAST]]{{\]}} 300define amdgpu_kernel void @double2_inselt(<2 x double> addrspace(1)* %out, <2 x double> %vec, i32 %sel) { 301entry: 302 %v = insertelement <2 x double> %vec, double 1.000000e+00, i32 %sel 303 store <2 x double> %v, <2 x double> addrspace(1)* %out 304 ret void 305} 306 307; GCN-LABEL: {{^}}double5_inselt: 308; GCN-NOT: v_movrel 309; GCN-NOT: buffer_ 310; GCN-COUNT-5: s_cselect_b64 311define amdgpu_kernel void @double5_inselt(<5 x double> addrspace(1)* %out, <5 x double> %vec, i32 %sel) { 312entry: 313 %v = insertelement <5 x double> %vec, double 1.000000e+00, i32 %sel 314 store <5 x double> %v, <5 x double> addrspace(1)* %out 315 ret void 316} 317 318; GCN-LABEL: {{^}}double8_inselt: 319; GCN-NOT: v_cndmask 320; GCN-NOT: buffer_ 321; GCN-NOT: s_or_b32 322; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 323; GCN-DAG: v_movreld_b32_e32 v[[#BASE:]], 0 324; GCN-NOT: s_mov_b32 m0 325; GCN: v_movreld_b32_e32 v[[#BASE+1]], 326define amdgpu_kernel void @double8_inselt(<8 x double> addrspace(1)* %out, <8 x double> %vec, i32 %sel) { 327entry: 328 %v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel 329 store <8 x double> %v, <8 x double> addrspace(1)* %out 330 ret void 331} 332 333; GCN-LABEL: {{^}}double7_inselt: 334; GCN-NOT: v_cndmask 335; GCN-NOT: buffer_ 336; GCN-NOT: s_or_b32 337; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 338; GCN-DAG: v_movreld_b32_e32 v[[#BASE]], 0 339; GCN-NOT: s_mov_b32 m0 340; GCN: v_movreld_b32_e32 v[[#BASE+1]], 341define amdgpu_kernel void @double7_inselt(<7 x double> addrspace(1)* %out, <7 x double> %vec, i32 %sel) { 342entry: 343 %v = insertelement <7 x double> %vec, double 1.000000e+00, i32 %sel 344 store <7 x double> %v, <7 x double> addrspace(1)* %out 345 ret void 346} 347 348; GCN-LABEL: {{^}}double16_inselt: 349; GCN-NOT: v_cndmask 350; GCN-NOT: buffer_ 351; GCN-NOT: s_or_b32 352; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 353; GCN-DAG: v_movreld_b32_e32 v[[#BASE:]], 0 354; GCN-NOT: s_mov_b32 m0 355; GCN: v_movreld_b32_e32 v[[#BASE+1]], 356define amdgpu_kernel void @double16_inselt(<16 x double> addrspace(1)* %out, <16 x double> %vec, i32 %sel) { 357entry: 358 %v = insertelement <16 x double> %vec, double 1.000000e+00, i32 %sel 359 store <16 x double> %v, <16 x double> addrspace(1)* %out 360 ret void 361} 362 363; GCN-LABEL: {{^}}double15_inselt: 364; GCN-NOT: v_cndmask 365; GCN-NOT: buffer_ 366; GCN-NOT: s_or_b32 367; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 368; GCN-DAG: v_movreld_b32_e32 v[[#BASE:]], 0 369; GCN-NOT: s_mov_b32 m0 370; GCN: v_movreld_b32_e32 v[[#BASE+1]], 371define amdgpu_kernel void @double15_inselt(<15 x double> addrspace(1)* %out, <15 x double> %vec, i32 %sel) { 372entry: 373 %v = insertelement <15 x double> %vec, double 1.000000e+00, i32 %sel 374 store <15 x double> %v, <15 x double> addrspace(1)* %out 375 ret void 376} 377 378; GCN-LABEL: {{^}}bit4_inselt: 379; GCN: buffer_store_byte 380; GCN: buffer_load_ubyte 381; GCN: buffer_load_ubyte 382; GCN: buffer_load_ubyte 383; GCN: buffer_load_ubyte 384define amdgpu_kernel void @bit4_inselt(<4 x i1> addrspace(1)* %out, <4 x i1> %vec, i32 %sel) { 385entry: 386 %v = insertelement <4 x i1> %vec, i1 1, i32 %sel 387 store <4 x i1> %v, <4 x i1> addrspace(1)* %out 388 ret void 389} 390 391; GCN-LABEL: {{^}}bit128_inselt: 392; GCN-NOT: buffer_ 393; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0 394; GCN-DAG: s_cselect_b64 [[CC1:[^,]+]], -1, 0 395; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}, [[CC1]] 396 397; GCN-DAG: s_cmpk_lg_i32 {{s[0-9]+}}, 0x7f 398; GCN-DAG: s_cselect_b64 [[CCL:[^,]+]], -1, 0 399; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}, [[CCL]] 400define amdgpu_kernel void @bit128_inselt(<128 x i1> addrspace(1)* %out, <128 x i1> %vec, i32 %sel) { 401entry: 402 %v = insertelement <128 x i1> %vec, i1 1, i32 %sel 403 store <128 x i1> %v, <128 x i1> addrspace(1)* %out 404 ret void 405} 406 407; GCN-LABEL: {{^}}float32_inselt_vec: 408; GCN-NOT: buffer_ 409; GCN-COUNT-32: v_cmp_ne_u32 410; GCN-COUNT-32: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 1.0, 411define amdgpu_ps <32 x float> @float32_inselt_vec(<32 x float> %vec, i32 %sel) { 412entry: 413 %v = insertelement <32 x float> %vec, float 1.000000e+00, i32 %sel 414 ret <32 x float> %v 415} 416 417; GCN-LABEL: {{^}}double8_inselt_vec: 418; GCN-NOT: buffer_ 419; GCN: v_cmp_eq_u32 420; GCN-COUNT-2: v_cndmask_b32 421; GCN: v_cmp_eq_u32 422; GCN-COUNT-2: v_cndmask_b32 423; GCN: v_cmp_eq_u32 424; GCN-COUNT-2: v_cndmask_b32 425; GCN: v_cmp_eq_u32 426; GCN-COUNT-2: v_cndmask_b32 427; GCN: v_cmp_eq_u32 428; GCN-COUNT-2: v_cndmask_b32 429; GCN: v_cmp_eq_u32 430; GCN-COUNT-2: v_cndmask_b32 431; GCN: v_cmp_eq_u32 432; GCN-COUNT-2: v_cndmask_b32 433; GCN: v_cmp_eq_u32 434; GCN-COUNT-2: v_cndmask_b32 435define <8 x double> @double8_inselt_vec(<8 x double> %vec, i32 %sel) { 436entry: 437 %v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel 438 ret <8 x double> %v 439} 440