1; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s 2 3; GCN-LABEL: {{^}}float4_inselt: 4; GCN-NOT: v_movrel 5; GCN-NOT: buffer_ 6; GCN-DAG: v_cmp_ne_u32_e64 [[CC1:[^,]+]], [[IDX:s[0-9]+]], 3 7; GCN-DAG: v_cndmask_b32_e32 v[[ELT_LAST:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC1]] 8; GCN-DAG: v_cmp_ne_u32_e64 [[CC2:[^,]+]], [[IDX]], 2 9; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC2]] 10; GCN-DAG: v_cmp_ne_u32_e64 [[CC3:[^,]+]], [[IDX]], 1 11; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC3]] 12; GCN-DAG: v_cmp_ne_u32_e64 [[CC4:[^,]+]], [[IDX]], 0 13; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC4]] 14; GCN: flat_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST]]:[[ELT_LAST]]] 15define amdgpu_kernel void @float4_inselt(<4 x float> addrspace(1)* %out, <4 x float> %vec, i32 %sel) { 16entry: 17 %v = insertelement <4 x float> %vec, float 1.000000e+00, i32 %sel 18 store <4 x float> %v, <4 x float> addrspace(1)* %out 19 ret void 20} 21 22; GCN-LABEL: {{^}}float4_inselt_undef: 23; GCN-NOT: v_movrel 24; GCN-NOT: buffer_ 25; GCN-NOT: v_cmp_ 26; GCN-NOT: v_cndmask_ 27; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 28; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[ONE]] 29; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[ONE]] 30; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[ONE]] 31define amdgpu_kernel void @float4_inselt_undef(<4 x float> addrspace(1)* %out, i32 %sel) { 32entry: 33 %v = insertelement <4 x float> undef, float 1.000000e+00, i32 %sel 34 store <4 x float> %v, <4 x float> addrspace(1)* %out 35 ret void 36} 37 38; GCN-LABEL: {{^}}int4_inselt: 39; GCN-NOT: v_movrel 40; GCN-NOT: buffer_ 41; GCN-DAG: v_cmp_ne_u32_e64 [[CC1:[^,]+]], [[IDX:s[0-9]+]], 3 42; GCN-DAG: v_cndmask_b32_e32 v[[ELT_LAST:[0-9]+]], 1, v{{[0-9]+}}, [[CC1]] 43; GCN-DAG: v_cmp_ne_u32_e64 [[CC2:[^,]+]], [[IDX]], 2 44; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}, [[CC2]] 45; GCN-DAG: v_cmp_ne_u32_e64 [[CC3:[^,]+]], [[IDX]], 1 46; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}, [[CC3]] 47; GCN-DAG: v_cmp_ne_u32_e64 [[CC4:[^,]+]], [[IDX]], 0 48; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST:[0-9]+]], 1, v{{[0-9]+}}, [[CC4]] 49; GCN: flat_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST]]:[[ELT_LAST]]] 50define amdgpu_kernel void @int4_inselt(<4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %sel) { 51entry: 52 %v = insertelement <4 x i32> %vec, i32 1, i32 %sel 53 store <4 x i32> %v, <4 x i32> addrspace(1)* %out 54 ret void 55} 56 57; GCN-LABEL: {{^}}float2_inselt: 58; GCN-NOT: v_movrel 59; GCN-NOT: buffer_ 60; GCN-DAG: v_cmp_ne_u32_e64 [[CC1:[^,]+]], [[IDX:s[0-9]+]], 1 61; GCN-DAG: v_cndmask_b32_e32 v[[ELT_LAST:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC1]] 62; GCN-DAG: v_cmp_ne_u32_e64 [[CC2:[^,]+]], [[IDX]], 0 63; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC2]] 64; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST]]:[[ELT_LAST]]] 65define amdgpu_kernel void @float2_inselt(<2 x float> addrspace(1)* %out, <2 x float> %vec, i32 %sel) { 66entry: 67 %v = insertelement <2 x float> %vec, float 1.000000e+00, i32 %sel 68 store <2 x float> %v, <2 x float> addrspace(1)* %out 69 ret void 70} 71 72; GCN-LABEL: {{^}}float8_inselt: 73; GCN-NOT: v_movrel 74; GCN-NOT: buffer_ 75; GCN-DAG: v_cmp_ne_u32_e64 [[CC1:[^,]+]], [[IDX:s[0-9]+]], 3 76; GCN-DAG: v_cndmask_b32_e32 v[[ELT_LAST0:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC1]] 77; GCN-DAG: v_cmp_ne_u32_e64 [[CC2:[^,]+]], [[IDX]], 2 78; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC2]] 79; GCN-DAG: v_cmp_ne_u32_e64 [[CC3:[^,]+]], [[IDX]], 1 80; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC3]] 81; GCN-DAG: v_cmp_ne_u32_e64 [[CC4:[^,]+]], [[IDX]], 0 82; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST0:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC4]] 83; GCN-DAG: v_cmp_ne_u32_e64 [[CC5:[^,]+]], [[IDX:s[0-9]+]], 7 84; GCN-DAG: v_cndmask_b32_e32 v[[ELT_LAST1:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC5]] 85; GCN-DAG: v_cmp_ne_u32_e64 [[CC6:[^,]+]], [[IDX]], 6 86; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC6]] 87; GCN-DAG: v_cmp_ne_u32_e64 [[CC7:[^,]+]], [[IDX]], 5 88; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC7]] 89; GCN-DAG: v_cmp_ne_u32_e64 [[CC8:[^,]+]], [[IDX]], 4 90; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST1:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC8]] 91; GCN-DAG: flat_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST0]]:[[ELT_LAST0]]] 92; GCN-DAG: flat_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST1]]:[[ELT_LAST1]]] 93define amdgpu_kernel void @float8_inselt(<8 x float> addrspace(1)* %out, <8 x float> %vec, i32 %sel) { 94entry: 95 %v = insertelement <8 x float> %vec, float 1.000000e+00, i32 %sel 96 store <8 x float> %v, <8 x float> addrspace(1)* %out 97 ret void 98} 99 100; GCN-LABEL: {{^}}float16_inselt: 101; GCN: v_movreld_b32 102define amdgpu_kernel void @float16_inselt(<16 x float> addrspace(1)* %out, <16 x float> %vec, i32 %sel) { 103entry: 104 %v = insertelement <16 x float> %vec, float 1.000000e+00, i32 %sel 105 store <16 x float> %v, <16 x float> addrspace(1)* %out 106 ret void 107} 108 109; GCN-LABEL: {{^}}float32_inselt: 110; GCN: v_movreld_b32 111define amdgpu_kernel void @float32_inselt(<32 x float> addrspace(1)* %out, <32 x float> %vec, i32 %sel) { 112entry: 113 %v = insertelement <32 x float> %vec, float 1.000000e+00, i32 %sel 114 store <32 x float> %v, <32 x float> addrspace(1)* %out 115 ret void 116} 117 118; GCN-LABEL: {{^}}half4_inselt: 119; GCN-NOT: v_cndmask_b32 120; GCN-NOT: v_movrel 121; GCN-NOT: buffer_ 122; GCN: s_lshl_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 4 123; GCN: s_lshl_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], [[SEL]] 124; GCN: s_mov_b32 [[K:s[0-9]+]], 0x3c003c00 125; GCN: v_mov_b32_e32 [[V:v[0-9]+]], [[K]] 126; GCN: v_bfi_b32 v{{[0-9]+}}, s{{[0-9]+}}, [[V]], v{{[0-9]+}} 127; GCN: v_bfi_b32 v{{[0-9]+}}, s{{[0-9]+}}, [[V]], v{{[0-9]+}} 128define amdgpu_kernel void @half4_inselt(<4 x half> addrspace(1)* %out, <4 x half> %vec, i32 %sel) { 129entry: 130 %v = insertelement <4 x half> %vec, half 1.000000e+00, i32 %sel 131 store <4 x half> %v, <4 x half> addrspace(1)* %out 132 ret void 133} 134 135; GCN-LABEL: {{^}}half2_inselt: 136; GCN-NOT: v_cndmask_b32 137; GCN-NOT: v_movrel 138; GCN-NOT: buffer_ 139; GCN: s_lshl_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 4 140; GCN: s_lshl_b32 [[V:s[0-9]+]], 0xffff, [[SEL]] 141; GCN: v_bfi_b32 v{{[0-9]+}}, [[V]], v{{[0-9]+}}, v{{[0-9]+}} 142define amdgpu_kernel void @half2_inselt(<2 x half> addrspace(1)* %out, <2 x half> %vec, i32 %sel) { 143entry: 144 %v = insertelement <2 x half> %vec, half 1.000000e+00, i32 %sel 145 store <2 x half> %v, <2 x half> addrspace(1)* %out 146 ret void 147} 148 149; GCN-LABEL: {{^}}half8_inselt: 150; GCN-NOT: v_movrel 151; GCN-NOT: buffer_ 152; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 0 153; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 1 154; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 2 155; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 3 156; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 4 157; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 5 158; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 6 159; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 7 160; GCN-DAG: v_cndmask_b32_e32 161; GCN-DAG: v_cndmask_b32_e32 162; GCN-DAG: v_cndmask_b32_e32 163; GCN-DAG: v_cndmask_b32_e32 164; GCN-DAG: v_cndmask_b32_e32 165; GCN-DAG: v_cndmask_b32_e32 166; GCN-DAG: v_cndmask_b32_e32 167; GCN-DAG: v_cndmask_b32_e32 168; GCN-DAG: v_or_b32_sdwa 169; GCN-DAG: v_or_b32_sdwa 170; GCN-DAG: v_or_b32_sdwa 171; GCN-DAG: v_or_b32_sdwa 172define amdgpu_kernel void @half8_inselt(<8 x half> addrspace(1)* %out, <8 x half> %vec, i32 %sel) { 173entry: 174 %v = insertelement <8 x half> %vec, half 1.000000e+00, i32 %sel 175 store <8 x half> %v, <8 x half> addrspace(1)* %out 176 ret void 177} 178 179; GCN-LABEL: {{^}}short2_inselt: 180; GCN-NOT: v_cndmask_b32 181; GCN-NOT: v_movrel 182; GCN-NOT: buffer_ 183; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x10001 184; GCN: s_lshl_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 4 185; GCN: s_lshl_b32 [[V:s[0-9]+]], 0xffff, [[SEL]] 186; GCN: v_bfi_b32 v{{[0-9]+}}, [[V]], [[K]], v{{[0-9]+}} 187define amdgpu_kernel void @short2_inselt(<2 x i16> addrspace(1)* %out, <2 x i16> %vec, i32 %sel) { 188entry: 189 %v = insertelement <2 x i16> %vec, i16 1, i32 %sel 190 store <2 x i16> %v, <2 x i16> addrspace(1)* %out 191 ret void 192} 193 194; GCN-LABEL: {{^}}short4_inselt: 195; GCN-NOT: v_cndmask_b32 196; GCN-NOT: v_movrel 197; GCN-NOT: buffer_ 198; GCN: s_lshl_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 4 199; GCN: s_lshl_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], [[SEL]] 200; GCN: s_mov_b32 [[K:s[0-9]+]], 0x10001 201; GCN: v_mov_b32_e32 [[V:v[0-9]+]], [[K]] 202; GCN: v_bfi_b32 v{{[0-9]+}}, s{{[0-9]+}}, [[V]], v{{[0-9]+}} 203; GCN: v_bfi_b32 v{{[0-9]+}}, s{{[0-9]+}}, [[V]], v{{[0-9]+}} 204define amdgpu_kernel void @short4_inselt(<4 x i16> addrspace(1)* %out, <4 x i16> %vec, i32 %sel) { 205entry: 206 %v = insertelement <4 x i16> %vec, i16 1, i32 %sel 207 store <4 x i16> %v, <4 x i16> addrspace(1)* %out 208 ret void 209} 210 211; GCN-LABEL: {{^}}byte8_inselt: 212; GCN-NOT: v_movrel 213; GCN-NOT: buffer_ 214; GCN: s_lshl_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 3 215; GCN: s_lshl_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], [[SEL]] 216; GCN: s_mov_b32 [[K:s[0-9]+]], 0x1010101 217; GCN: s_and_b32 s3, s1, [[K]] 218; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[K]] 219; GCN: s_andn2_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 220; GCN: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 221define amdgpu_kernel void @byte8_inselt(<8 x i8> addrspace(1)* %out, <8 x i8> %vec, i32 %sel) { 222entry: 223 %v = insertelement <8 x i8> %vec, i8 1, i32 %sel 224 store <8 x i8> %v, <8 x i8> addrspace(1)* %out 225 ret void 226} 227 228; GCN-LABEL: {{^}}byte16_inselt: 229; GCN-NOT: v_movrel 230; GCN-NOT: buffer_ 231; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 0 232; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 15 233; GCN-DAG: v_cndmask_b32_e32 234; GCN-DAG: v_cndmask_b32_e32 235; GCN-DAG: v_cndmask_b32_e32 236; GCN-DAG: v_cndmask_b32_e32 237; GCN-DAG: v_cndmask_b32_e32 238; GCN-DAG: v_cndmask_b32_e32 239; GCN-DAG: v_cndmask_b32_e32 240; GCN-DAG: v_cndmask_b32_e32 241; GCN-DAG: v_cndmask_b32_e32 242; GCN-DAG: v_cndmask_b32_e32 243; GCN-DAG: v_cndmask_b32_e32 244; GCN-DAG: v_cndmask_b32_e32 245; GCN-DAG: v_cndmask_b32_e32 246; GCN-DAG: v_cndmask_b32_e32 247; GCN-DAG: v_cndmask_b32_e32 248; GCN-DAG: v_cndmask_b32_e32 249; GCN-DAG: v_or_b32_sdwa 250; GCN-DAG: v_or_b32_sdwa 251; GCN-DAG: v_or_b32_sdwa 252; GCN-DAG: v_or_b32_sdwa 253; GCN-DAG: v_or_b32_sdwa 254; GCN-DAG: v_or_b32_sdwa 255; GCN-DAG: v_or_b32_sdwa 256; GCN-DAG: v_or_b32_sdwa 257define amdgpu_kernel void @byte16_inselt(<16 x i8> addrspace(1)* %out, <16 x i8> %vec, i32 %sel) { 258entry: 259 %v = insertelement <16 x i8> %vec, i8 1, i32 %sel 260 store <16 x i8> %v, <16 x i8> addrspace(1)* %out 261 ret void 262} 263 264; GCN-LABEL: {{^}}double2_inselt: 265; GCN-NOT: v_movrel 266; GCN-NOT: buffer_ 267; GCN-DAG: v_cmp_eq_u32_e64 [[CC1:[^,]+]], [[IDX:s[0-9]+]], 1 268; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC1]] 269; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 0, [[CC1]] 270; GCN-DAG: v_cmp_eq_u32_e64 [[CC2:[^,]+]], [[IDX]], 0 271; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC2]] 272; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 0, [[CC2]] 273define amdgpu_kernel void @double2_inselt(<2 x double> addrspace(1)* %out, <2 x double> %vec, i32 %sel) { 274entry: 275 %v = insertelement <2 x double> %vec, double 1.000000e+00, i32 %sel 276 store <2 x double> %v, <2 x double> addrspace(1)* %out 277 ret void 278} 279 280; GCN-LABEL: {{^}}double8_inselt: 281; GCN-NOT: v_cndmask 282; GCN-NOT: buffer_ 283; GCN-NOT: s_or_b32 284; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 285; GCN-DAG: v_movreld_b32_e32 v[[#BASE:]], 0 286; GCN-NOT: s_mov_b32 m0 287; GCN: v_movreld_b32_e32 v[[#BASE+1]], 288define amdgpu_kernel void @double8_inselt(<8 x double> addrspace(1)* %out, <8 x double> %vec, i32 %sel) { 289entry: 290 %v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel 291 store <8 x double> %v, <8 x double> addrspace(1)* %out 292 ret void 293} 294 295; GCN-LABEL: {{^}}double7_inselt: 296; GCN-NOT: v_cndmask 297; GCN-NOT: buffer_ 298; GCN-NOT: s_or_b32 299; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 300; GCN-DAG: v_movreld_b32_e32 v[[#BASE]], 0 301; GCN-NOT: s_mov_b32 m0 302; GCN: v_movreld_b32_e32 v[[#BASE+1]], 303define amdgpu_kernel void @double7_inselt(<7 x double> addrspace(1)* %out, <7 x double> %vec, i32 %sel) { 304entry: 305 %v = insertelement <7 x double> %vec, double 1.000000e+00, i32 %sel 306 store <7 x double> %v, <7 x double> addrspace(1)* %out 307 ret void 308} 309 310; GCN-LABEL: {{^}}double16_inselt: 311; GCN-NOT: v_cndmask 312; GCN-NOT: buffer_ 313; GCN-NOT: s_or_b32 314; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 315; GCN-DAG: v_movreld_b32_e32 v[[#BASE:]], 0 316; GCN-NOT: s_mov_b32 m0 317; GCN: v_movreld_b32_e32 v[[#BASE+1]], 318define amdgpu_kernel void @double16_inselt(<16 x double> addrspace(1)* %out, <16 x double> %vec, i32 %sel) { 319entry: 320 %v = insertelement <16 x double> %vec, double 1.000000e+00, i32 %sel 321 store <16 x double> %v, <16 x double> addrspace(1)* %out 322 ret void 323} 324 325; GCN-LABEL: {{^}}double15_inselt: 326; GCN-NOT: v_cndmask 327; GCN-NOT: buffer_ 328; GCN-NOT: s_or_b32 329; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] 330; GCN-DAG: v_movreld_b32_e32 v[[#BASE:]], 0 331; GCN-NOT: s_mov_b32 m0 332; GCN: v_movreld_b32_e32 v[[#BASE+1]], 333define amdgpu_kernel void @double15_inselt(<15 x double> addrspace(1)* %out, <15 x double> %vec, i32 %sel) { 334entry: 335 %v = insertelement <15 x double> %vec, double 1.000000e+00, i32 %sel 336 store <15 x double> %v, <15 x double> addrspace(1)* %out 337 ret void 338} 339 340; GCN-LABEL: {{^}}bit4_inselt: 341; GCN: buffer_store_byte 342; GCN: buffer_load_ubyte 343; GCN: buffer_load_ubyte 344; GCN: buffer_load_ubyte 345; GCN: buffer_load_ubyte 346define amdgpu_kernel void @bit4_inselt(<4 x i1> addrspace(1)* %out, <4 x i1> %vec, i32 %sel) { 347entry: 348 %v = insertelement <4 x i1> %vec, i1 1, i32 %sel 349 store <4 x i1> %v, <4 x i1> addrspace(1)* %out 350 ret void 351} 352 353; GCN-LABEL: {{^}}bit128_inselt: 354; GCN-NOT: buffer_ 355; GCN-DAG: v_cmp_ne_u32_e64 [[CC1:[^,]+]], s{{[0-9]+}}, 0 356; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}, [[CC1]] 357; GCN-DAG: v_mov_b32_e32 [[LASTIDX:v[0-9]+]], 0x7f 358; GCN-DAG: v_cmp_ne_u32_e32 [[CCL:[^,]+]], s{{[0-9]+}}, [[LASTIDX]] 359; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}, [[CCL]] 360define amdgpu_kernel void @bit128_inselt(<128 x i1> addrspace(1)* %out, <128 x i1> %vec, i32 %sel) { 361entry: 362 %v = insertelement <128 x i1> %vec, i1 1, i32 %sel 363 store <128 x i1> %v, <128 x i1> addrspace(1)* %out 364 ret void 365} 366