1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s 3 4define amdgpu_kernel void @float4_inselt(<4 x float> addrspace(1)* %out, <4 x float> %vec, i32 %sel) { 5; GCN-LABEL: float4_inselt: 6; GCN: ; %bb.0: ; %entry 7; GCN-NEXT: s_load_dword s2, s[0:1], 0x44 8; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 9; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 10; GCN-NEXT: s_waitcnt lgkmcnt(0) 11; GCN-NEXT: s_cmp_lg_u32 s2, 3 12; GCN-NEXT: v_mov_b32_e32 v0, s7 13; GCN-NEXT: s_cselect_b64 vcc, -1, 0 14; GCN-NEXT: s_cmp_lg_u32 s2, 2 15; GCN-NEXT: v_cndmask_b32_e32 v3, 1.0, v0, vcc 16; GCN-NEXT: v_mov_b32_e32 v0, s6 17; GCN-NEXT: s_cselect_b64 vcc, -1, 0 18; GCN-NEXT: s_cmp_lg_u32 s2, 1 19; GCN-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc 20; GCN-NEXT: v_mov_b32_e32 v0, s5 21; GCN-NEXT: s_cselect_b64 vcc, -1, 0 22; GCN-NEXT: s_cmp_lg_u32 s2, 0 23; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v0, vcc 24; GCN-NEXT: v_mov_b32_e32 v0, s4 25; GCN-NEXT: s_cselect_b64 vcc, -1, 0 26; GCN-NEXT: v_mov_b32_e32 v5, s1 27; GCN-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc 28; GCN-NEXT: v_mov_b32_e32 v4, s0 29; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 30; GCN-NEXT: s_endpgm 31entry: 32 %v = insertelement <4 x float> %vec, float 1.000000e+00, i32 %sel 33 store <4 x float> %v, <4 x float> addrspace(1)* %out 34 ret void 35} 36 37define amdgpu_kernel void @float4_inselt_undef(<4 x float> addrspace(1)* %out, i32 %sel) { 38; GCN-LABEL: float4_inselt_undef: 39; GCN: ; %bb.0: ; %entry 40; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 41; GCN-NEXT: v_mov_b32_e32 v0, 1.0 42; GCN-NEXT: v_mov_b32_e32 v1, v0 43; GCN-NEXT: v_mov_b32_e32 v2, v0 44; GCN-NEXT: v_mov_b32_e32 v3, v0 45; GCN-NEXT: s_waitcnt lgkmcnt(0) 46; GCN-NEXT: v_mov_b32_e32 v5, s1 47; GCN-NEXT: v_mov_b32_e32 v4, s0 48; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 49; GCN-NEXT: s_endpgm 50entry: 51 %v = insertelement <4 x float> undef, float 1.000000e+00, i32 %sel 52 store <4 x float> %v, <4 x float> addrspace(1)* %out 53 ret void 54} 55 56define amdgpu_kernel void @int4_inselt(<4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %sel) { 57; GCN-LABEL: int4_inselt: 58; GCN: ; %bb.0: ; %entry 59; GCN-NEXT: s_load_dword s2, s[0:1], 0x44 60; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 61; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 62; GCN-NEXT: s_waitcnt lgkmcnt(0) 63; GCN-NEXT: s_cmp_lg_u32 s2, 3 64; GCN-NEXT: s_cselect_b32 s3, s7, 1 65; GCN-NEXT: s_cmp_lg_u32 s2, 2 66; GCN-NEXT: s_cselect_b32 s6, s6, 1 67; GCN-NEXT: s_cmp_lg_u32 s2, 1 68; GCN-NEXT: s_cselect_b32 s5, s5, 1 69; GCN-NEXT: s_cmp_lg_u32 s2, 0 70; GCN-NEXT: s_cselect_b32 s2, s4, 1 71; GCN-NEXT: v_mov_b32_e32 v5, s1 72; GCN-NEXT: v_mov_b32_e32 v0, s2 73; GCN-NEXT: v_mov_b32_e32 v1, s5 74; GCN-NEXT: v_mov_b32_e32 v2, s6 75; GCN-NEXT: v_mov_b32_e32 v3, s3 76; GCN-NEXT: v_mov_b32_e32 v4, s0 77; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 78; GCN-NEXT: s_endpgm 79entry: 80 %v = insertelement <4 x i32> %vec, i32 1, i32 %sel 81 store <4 x i32> %v, <4 x i32> addrspace(1)* %out 82 ret void 83} 84 85define amdgpu_kernel void @float2_inselt(<2 x float> addrspace(1)* %out, <2 x float> %vec, i32 %sel) { 86; GCN-LABEL: float2_inselt: 87; GCN: ; %bb.0: ; %entry 88; GCN-NEXT: s_load_dword s4, s[0:1], 0x34 89; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 90; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 91; GCN-NEXT: s_waitcnt lgkmcnt(0) 92; GCN-NEXT: s_cmp_lg_u32 s4, 1 93; GCN-NEXT: v_mov_b32_e32 v0, s3 94; GCN-NEXT: s_cselect_b64 vcc, -1, 0 95; GCN-NEXT: s_cmp_lg_u32 s4, 0 96; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v0, vcc 97; GCN-NEXT: v_mov_b32_e32 v0, s2 98; GCN-NEXT: s_cselect_b64 vcc, -1, 0 99; GCN-NEXT: v_mov_b32_e32 v3, s1 100; GCN-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc 101; GCN-NEXT: v_mov_b32_e32 v2, s0 102; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 103; GCN-NEXT: s_endpgm 104entry: 105 %v = insertelement <2 x float> %vec, float 1.000000e+00, i32 %sel 106 store <2 x float> %v, <2 x float> addrspace(1)* %out 107 ret void 108} 109 110define amdgpu_kernel void @float8_inselt(<8 x float> addrspace(1)* %out, <8 x float> %vec, i32 %sel) { 111; GCN-LABEL: float8_inselt: 112; GCN: ; %bb.0: ; %entry 113; GCN-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x44 114; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 115; GCN-NEXT: s_load_dword s1, s[0:1], 0x64 116; GCN-NEXT: s_waitcnt lgkmcnt(0) 117; GCN-NEXT: v_mov_b32_e32 v0, s4 118; GCN-NEXT: s_add_u32 s0, s2, 16 119; GCN-NEXT: s_mov_b32 m0, s1 120; GCN-NEXT: s_addc_u32 s1, s3, 0 121; GCN-NEXT: v_mov_b32_e32 v1, s5 122; GCN-NEXT: v_mov_b32_e32 v2, s6 123; GCN-NEXT: v_mov_b32_e32 v3, s7 124; GCN-NEXT: v_mov_b32_e32 v4, s8 125; GCN-NEXT: v_mov_b32_e32 v5, s9 126; GCN-NEXT: v_mov_b32_e32 v6, s10 127; GCN-NEXT: v_mov_b32_e32 v7, s11 128; GCN-NEXT: v_mov_b32_e32 v9, s1 129; GCN-NEXT: v_movreld_b32_e32 v0, 1.0 130; GCN-NEXT: v_mov_b32_e32 v8, s0 131; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 132; GCN-NEXT: s_nop 0 133; GCN-NEXT: v_mov_b32_e32 v5, s3 134; GCN-NEXT: v_mov_b32_e32 v4, s2 135; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 136; GCN-NEXT: s_endpgm 137entry: 138 %v = insertelement <8 x float> %vec, float 1.000000e+00, i32 %sel 139 store <8 x float> %v, <8 x float> addrspace(1)* %out 140 ret void 141} 142 143define amdgpu_kernel void @float16_inselt(<16 x float> addrspace(1)* %out, <16 x float> %vec, i32 %sel) { 144; GCN-LABEL: float16_inselt: 145; GCN: ; %bb.0: ; %entry 146; GCN-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 147; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 148; GCN-NEXT: s_load_dword s20, s[0:1], 0xa4 149; GCN-NEXT: s_waitcnt lgkmcnt(0) 150; GCN-NEXT: v_mov_b32_e32 v0, s4 151; GCN-NEXT: s_add_u32 s0, s2, 48 152; GCN-NEXT: s_addc_u32 s1, s3, 0 153; GCN-NEXT: v_mov_b32_e32 v17, s1 154; GCN-NEXT: v_mov_b32_e32 v1, s5 155; GCN-NEXT: v_mov_b32_e32 v2, s6 156; GCN-NEXT: v_mov_b32_e32 v3, s7 157; GCN-NEXT: v_mov_b32_e32 v4, s8 158; GCN-NEXT: v_mov_b32_e32 v5, s9 159; GCN-NEXT: v_mov_b32_e32 v6, s10 160; GCN-NEXT: v_mov_b32_e32 v7, s11 161; GCN-NEXT: v_mov_b32_e32 v8, s12 162; GCN-NEXT: v_mov_b32_e32 v9, s13 163; GCN-NEXT: v_mov_b32_e32 v10, s14 164; GCN-NEXT: v_mov_b32_e32 v11, s15 165; GCN-NEXT: v_mov_b32_e32 v12, s16 166; GCN-NEXT: v_mov_b32_e32 v13, s17 167; GCN-NEXT: v_mov_b32_e32 v14, s18 168; GCN-NEXT: v_mov_b32_e32 v15, s19 169; GCN-NEXT: s_mov_b32 m0, s20 170; GCN-NEXT: v_mov_b32_e32 v16, s0 171; GCN-NEXT: s_add_u32 s0, s2, 32 172; GCN-NEXT: v_movreld_b32_e32 v0, 1.0 173; GCN-NEXT: s_addc_u32 s1, s3, 0 174; GCN-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 175; GCN-NEXT: s_nop 0 176; GCN-NEXT: v_mov_b32_e32 v13, s1 177; GCN-NEXT: v_mov_b32_e32 v12, s0 178; GCN-NEXT: s_add_u32 s0, s2, 16 179; GCN-NEXT: s_addc_u32 s1, s3, 0 180; GCN-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 181; GCN-NEXT: s_nop 0 182; GCN-NEXT: v_mov_b32_e32 v9, s1 183; GCN-NEXT: v_mov_b32_e32 v8, s0 184; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 185; GCN-NEXT: s_nop 0 186; GCN-NEXT: v_mov_b32_e32 v5, s3 187; GCN-NEXT: v_mov_b32_e32 v4, s2 188; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 189; GCN-NEXT: s_endpgm 190entry: 191 %v = insertelement <16 x float> %vec, float 1.000000e+00, i32 %sel 192 store <16 x float> %v, <16 x float> addrspace(1)* %out 193 ret void 194} 195 196define amdgpu_kernel void @float32_inselt(<32 x float> addrspace(1)* %out, <32 x float> %vec, i32 %sel) { 197; GCN-LABEL: float32_inselt: 198; GCN: ; %bb.0: ; %entry 199; GCN-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0xa4 200; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 201; GCN-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0xe4 202; GCN-NEXT: s_load_dword s0, s[0:1], 0x124 203; GCN-NEXT: s_waitcnt lgkmcnt(0) 204; GCN-NEXT: v_mov_b32_e32 v0, s36 205; GCN-NEXT: v_mov_b32_e32 v1, s37 206; GCN-NEXT: v_mov_b32_e32 v2, s38 207; GCN-NEXT: s_mov_b32 m0, s0 208; GCN-NEXT: s_add_u32 s0, s2, 0x70 209; GCN-NEXT: s_addc_u32 s1, s3, 0 210; GCN-NEXT: v_mov_b32_e32 v33, s1 211; GCN-NEXT: v_mov_b32_e32 v3, s39 212; GCN-NEXT: v_mov_b32_e32 v4, s40 213; GCN-NEXT: v_mov_b32_e32 v5, s41 214; GCN-NEXT: v_mov_b32_e32 v6, s42 215; GCN-NEXT: v_mov_b32_e32 v7, s43 216; GCN-NEXT: v_mov_b32_e32 v8, s44 217; GCN-NEXT: v_mov_b32_e32 v9, s45 218; GCN-NEXT: v_mov_b32_e32 v10, s46 219; GCN-NEXT: v_mov_b32_e32 v11, s47 220; GCN-NEXT: v_mov_b32_e32 v12, s48 221; GCN-NEXT: v_mov_b32_e32 v13, s49 222; GCN-NEXT: v_mov_b32_e32 v14, s50 223; GCN-NEXT: v_mov_b32_e32 v15, s51 224; GCN-NEXT: v_mov_b32_e32 v16, s4 225; GCN-NEXT: v_mov_b32_e32 v17, s5 226; GCN-NEXT: v_mov_b32_e32 v18, s6 227; GCN-NEXT: v_mov_b32_e32 v19, s7 228; GCN-NEXT: v_mov_b32_e32 v20, s8 229; GCN-NEXT: v_mov_b32_e32 v21, s9 230; GCN-NEXT: v_mov_b32_e32 v22, s10 231; GCN-NEXT: v_mov_b32_e32 v23, s11 232; GCN-NEXT: v_mov_b32_e32 v24, s12 233; GCN-NEXT: v_mov_b32_e32 v25, s13 234; GCN-NEXT: v_mov_b32_e32 v26, s14 235; GCN-NEXT: v_mov_b32_e32 v27, s15 236; GCN-NEXT: v_mov_b32_e32 v28, s16 237; GCN-NEXT: v_mov_b32_e32 v29, s17 238; GCN-NEXT: v_mov_b32_e32 v30, s18 239; GCN-NEXT: v_mov_b32_e32 v31, s19 240; GCN-NEXT: v_mov_b32_e32 v32, s0 241; GCN-NEXT: s_add_u32 s0, s2, 0x60 242; GCN-NEXT: v_movreld_b32_e32 v0, 1.0 243; GCN-NEXT: s_addc_u32 s1, s3, 0 244; GCN-NEXT: flat_store_dwordx4 v[32:33], v[28:31] 245; GCN-NEXT: s_nop 0 246; GCN-NEXT: v_mov_b32_e32 v29, s1 247; GCN-NEXT: v_mov_b32_e32 v28, s0 248; GCN-NEXT: s_add_u32 s0, s2, 0x50 249; GCN-NEXT: s_addc_u32 s1, s3, 0 250; GCN-NEXT: flat_store_dwordx4 v[28:29], v[24:27] 251; GCN-NEXT: s_nop 0 252; GCN-NEXT: v_mov_b32_e32 v25, s1 253; GCN-NEXT: v_mov_b32_e32 v24, s0 254; GCN-NEXT: s_add_u32 s0, s2, 64 255; GCN-NEXT: s_addc_u32 s1, s3, 0 256; GCN-NEXT: flat_store_dwordx4 v[24:25], v[20:23] 257; GCN-NEXT: s_nop 0 258; GCN-NEXT: v_mov_b32_e32 v21, s1 259; GCN-NEXT: v_mov_b32_e32 v20, s0 260; GCN-NEXT: s_add_u32 s0, s2, 48 261; GCN-NEXT: s_addc_u32 s1, s3, 0 262; GCN-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 263; GCN-NEXT: s_nop 0 264; GCN-NEXT: v_mov_b32_e32 v17, s1 265; GCN-NEXT: v_mov_b32_e32 v16, s0 266; GCN-NEXT: s_add_u32 s0, s2, 32 267; GCN-NEXT: s_addc_u32 s1, s3, 0 268; GCN-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 269; GCN-NEXT: s_nop 0 270; GCN-NEXT: v_mov_b32_e32 v13, s1 271; GCN-NEXT: v_mov_b32_e32 v12, s0 272; GCN-NEXT: s_add_u32 s0, s2, 16 273; GCN-NEXT: s_addc_u32 s1, s3, 0 274; GCN-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 275; GCN-NEXT: s_nop 0 276; GCN-NEXT: v_mov_b32_e32 v9, s1 277; GCN-NEXT: v_mov_b32_e32 v8, s0 278; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 279; GCN-NEXT: s_nop 0 280; GCN-NEXT: v_mov_b32_e32 v5, s3 281; GCN-NEXT: v_mov_b32_e32 v4, s2 282; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 283; GCN-NEXT: s_endpgm 284entry: 285 %v = insertelement <32 x float> %vec, float 1.000000e+00, i32 %sel 286 store <32 x float> %v, <32 x float> addrspace(1)* %out 287 ret void 288} 289 290define amdgpu_kernel void @half4_inselt(<4 x half> addrspace(1)* %out, <4 x half> %vec, i32 %sel) { 291; GCN-LABEL: half4_inselt: 292; GCN: ; %bb.0: ; %entry 293; GCN-NEXT: s_load_dword s6, s[0:1], 0x34 294; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 295; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c 296; GCN-NEXT: s_mov_b64 s[4:5], 0xffff 297; GCN-NEXT: s_waitcnt lgkmcnt(0) 298; GCN-NEXT: s_lshl_b32 s6, s6, 4 299; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], s6 300; GCN-NEXT: s_mov_b32 s6, 0x3c003c00 301; GCN-NEXT: s_mov_b32 s7, s6 302; GCN-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 303; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 304; GCN-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 305; GCN-NEXT: v_mov_b32_e32 v0, s2 306; GCN-NEXT: v_mov_b32_e32 v3, s1 307; GCN-NEXT: v_mov_b32_e32 v1, s3 308; GCN-NEXT: v_mov_b32_e32 v2, s0 309; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 310; GCN-NEXT: s_endpgm 311entry: 312 %v = insertelement <4 x half> %vec, half 1.000000e+00, i32 %sel 313 store <4 x half> %v, <4 x half> addrspace(1)* %out 314 ret void 315} 316 317define amdgpu_kernel void @half2_inselt(<2 x half> addrspace(1)* %out, <2 x half> %vec, i32 %sel) { 318; GCN-LABEL: half2_inselt: 319; GCN: ; %bb.0: ; %entry 320; GCN-NEXT: s_load_dword s2, s[0:1], 0x30 321; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c 322; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 323; GCN-NEXT: s_waitcnt lgkmcnt(0) 324; GCN-NEXT: s_lshl_b32 s2, s2, 4 325; GCN-NEXT: s_lshl_b32 s2, 0xffff, s2 326; GCN-NEXT: s_andn2_b32 s3, s3, s2 327; GCN-NEXT: s_and_b32 s2, s2, 0x3c003c00 328; GCN-NEXT: s_or_b32 s2, s2, s3 329; GCN-NEXT: v_mov_b32_e32 v0, s0 330; GCN-NEXT: v_mov_b32_e32 v1, s1 331; GCN-NEXT: v_mov_b32_e32 v2, s2 332; GCN-NEXT: flat_store_dword v[0:1], v2 333; GCN-NEXT: s_endpgm 334entry: 335 %v = insertelement <2 x half> %vec, half 1.000000e+00, i32 %sel 336 store <2 x half> %v, <2 x half> addrspace(1)* %out 337 ret void 338} 339 340define amdgpu_kernel void @half8_inselt(<8 x half> addrspace(1)* %out, <8 x half> %vec, i32 %sel) { 341; GCN-LABEL: half8_inselt: 342; GCN: ; %bb.0: ; %entry 343; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 344; GCN-NEXT: s_load_dword s2, s[0:1], 0x44 345; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 346; GCN-NEXT: v_mov_b32_e32 v0, 0x3c00 347; GCN-NEXT: s_waitcnt lgkmcnt(0) 348; GCN-NEXT: s_lshr_b32 s3, s7, 16 349; GCN-NEXT: s_cmp_lg_u32 s2, 7 350; GCN-NEXT: v_mov_b32_e32 v1, s3 351; GCN-NEXT: s_cselect_b64 vcc, -1, 0 352; GCN-NEXT: s_cmp_lg_u32 s2, 6 353; GCN-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 354; GCN-NEXT: v_mov_b32_e32 v2, s7 355; GCN-NEXT: s_cselect_b64 vcc, -1, 0 356; GCN-NEXT: s_lshr_b32 s3, s6, 16 357; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 358; GCN-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 359; GCN-NEXT: s_cmp_lg_u32 s2, 5 360; GCN-NEXT: v_or_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 361; GCN-NEXT: v_mov_b32_e32 v1, s3 362; GCN-NEXT: s_cselect_b64 vcc, -1, 0 363; GCN-NEXT: s_cmp_lg_u32 s2, 4 364; GCN-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 365; GCN-NEXT: v_mov_b32_e32 v2, s6 366; GCN-NEXT: s_cselect_b64 vcc, -1, 0 367; GCN-NEXT: s_lshr_b32 s3, s5, 16 368; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 369; GCN-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 370; GCN-NEXT: s_cmp_lg_u32 s2, 3 371; GCN-NEXT: v_or_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 372; GCN-NEXT: v_mov_b32_e32 v1, s3 373; GCN-NEXT: s_cselect_b64 vcc, -1, 0 374; GCN-NEXT: s_cmp_lg_u32 s2, 2 375; GCN-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 376; GCN-NEXT: v_mov_b32_e32 v4, s5 377; GCN-NEXT: s_cselect_b64 vcc, -1, 0 378; GCN-NEXT: s_lshr_b32 s3, s4, 16 379; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 380; GCN-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc 381; GCN-NEXT: s_cmp_lg_u32 s2, 1 382; GCN-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 383; GCN-NEXT: v_mov_b32_e32 v4, s3 384; GCN-NEXT: s_cselect_b64 vcc, -1, 0 385; GCN-NEXT: s_cmp_lg_u32 s2, 0 386; GCN-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc 387; GCN-NEXT: v_mov_b32_e32 v5, s4 388; GCN-NEXT: s_cselect_b64 vcc, -1, 0 389; GCN-NEXT: v_lshlrev_b32_e32 v4, 16, v4 390; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 391; GCN-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 392; GCN-NEXT: v_mov_b32_e32 v5, s1 393; GCN-NEXT: v_mov_b32_e32 v4, s0 394; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 395; GCN-NEXT: s_endpgm 396entry: 397 %v = insertelement <8 x half> %vec, half 1.000000e+00, i32 %sel 398 store <8 x half> %v, <8 x half> addrspace(1)* %out 399 ret void 400} 401 402define amdgpu_kernel void @short2_inselt(<2 x i16> addrspace(1)* %out, <2 x i16> %vec, i32 %sel) { 403; GCN-LABEL: short2_inselt: 404; GCN: ; %bb.0: ; %entry 405; GCN-NEXT: s_load_dword s2, s[0:1], 0x30 406; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c 407; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 408; GCN-NEXT: s_waitcnt lgkmcnt(0) 409; GCN-NEXT: s_lshl_b32 s2, s2, 4 410; GCN-NEXT: s_lshl_b32 s2, 0xffff, s2 411; GCN-NEXT: s_andn2_b32 s3, s3, s2 412; GCN-NEXT: s_and_b32 s2, s2, 0x10001 413; GCN-NEXT: s_or_b32 s2, s2, s3 414; GCN-NEXT: v_mov_b32_e32 v0, s0 415; GCN-NEXT: v_mov_b32_e32 v1, s1 416; GCN-NEXT: v_mov_b32_e32 v2, s2 417; GCN-NEXT: flat_store_dword v[0:1], v2 418; GCN-NEXT: s_endpgm 419entry: 420 %v = insertelement <2 x i16> %vec, i16 1, i32 %sel 421 store <2 x i16> %v, <2 x i16> addrspace(1)* %out 422 ret void 423} 424 425define amdgpu_kernel void @short4_inselt(<4 x i16> addrspace(1)* %out, <4 x i16> %vec, i32 %sel) { 426; GCN-LABEL: short4_inselt: 427; GCN: ; %bb.0: ; %entry 428; GCN-NEXT: s_load_dword s6, s[0:1], 0x34 429; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 430; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c 431; GCN-NEXT: s_mov_b64 s[4:5], 0xffff 432; GCN-NEXT: s_waitcnt lgkmcnt(0) 433; GCN-NEXT: s_lshl_b32 s6, s6, 4 434; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], s6 435; GCN-NEXT: s_mov_b32 s6, 0x10001 436; GCN-NEXT: s_mov_b32 s7, s6 437; GCN-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 438; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 439; GCN-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 440; GCN-NEXT: v_mov_b32_e32 v0, s2 441; GCN-NEXT: v_mov_b32_e32 v3, s1 442; GCN-NEXT: v_mov_b32_e32 v1, s3 443; GCN-NEXT: v_mov_b32_e32 v2, s0 444; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 445; GCN-NEXT: s_endpgm 446entry: 447 %v = insertelement <4 x i16> %vec, i16 1, i32 %sel 448 store <4 x i16> %v, <4 x i16> addrspace(1)* %out 449 ret void 450} 451 452define amdgpu_kernel void @byte8_inselt(<8 x i8> addrspace(1)* %out, <8 x i8> %vec, i32 %sel) { 453; GCN-LABEL: byte8_inselt: 454; GCN: ; %bb.0: ; %entry 455; GCN-NEXT: s_load_dword s6, s[0:1], 0x34 456; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 457; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c 458; GCN-NEXT: s_mov_b64 s[4:5], 0xffff 459; GCN-NEXT: s_waitcnt lgkmcnt(0) 460; GCN-NEXT: s_lshl_b32 s6, s6, 3 461; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], s6 462; GCN-NEXT: s_and_b32 s7, s5, 0x1010101 463; GCN-NEXT: s_and_b32 s6, s4, 0x1010101 464; GCN-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 465; GCN-NEXT: s_or_b64 s[0:1], s[6:7], s[0:1] 466; GCN-NEXT: v_mov_b32_e32 v2, s2 467; GCN-NEXT: v_mov_b32_e32 v0, s0 468; GCN-NEXT: v_mov_b32_e32 v1, s1 469; GCN-NEXT: v_mov_b32_e32 v3, s3 470; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 471; GCN-NEXT: s_endpgm 472entry: 473 %v = insertelement <8 x i8> %vec, i8 1, i32 %sel 474 store <8 x i8> %v, <8 x i8> addrspace(1)* %out 475 ret void 476} 477 478define amdgpu_kernel void @byte16_inselt(<16 x i8> addrspace(1)* %out, <16 x i8> %vec, i32 %sel) { 479; GCN-LABEL: byte16_inselt: 480; GCN: ; %bb.0: ; %entry 481; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 482; GCN-NEXT: s_load_dword s2, s[0:1], 0x44 483; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 484; GCN-NEXT: s_waitcnt lgkmcnt(0) 485; GCN-NEXT: s_lshr_b32 s3, s7, 24 486; GCN-NEXT: s_cmp_lg_u32 s2, 15 487; GCN-NEXT: v_mov_b32_e32 v0, s3 488; GCN-NEXT: s_cselect_b64 vcc, -1, 0 489; GCN-NEXT: s_lshr_b32 s3, s7, 16 490; GCN-NEXT: s_cmp_lg_u32 s2, 14 491; GCN-NEXT: v_cndmask_b32_e32 v0, 1, v0, vcc 492; GCN-NEXT: v_mov_b32_e32 v1, s3 493; GCN-NEXT: s_cselect_b64 vcc, -1, 0 494; GCN-NEXT: s_lshr_b32 s3, s7, 8 495; GCN-NEXT: v_lshlrev_b16_e32 v0, 8, v0 496; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 497; GCN-NEXT: s_cmp_lg_u32 s2, 13 498; GCN-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 499; GCN-NEXT: v_mov_b32_e32 v1, s3 500; GCN-NEXT: s_cselect_b64 vcc, -1, 0 501; GCN-NEXT: s_cmp_lg_u32 s2, 12 502; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 503; GCN-NEXT: v_mov_b32_e32 v2, s7 504; GCN-NEXT: s_cselect_b64 vcc, -1, 0 505; GCN-NEXT: v_lshlrev_b16_e32 v1, 8, v1 506; GCN-NEXT: v_cndmask_b32_e32 v2, 1, v2, vcc 507; GCN-NEXT: s_lshr_b32 s3, s6, 24 508; GCN-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 509; GCN-NEXT: s_cmp_lg_u32 s2, 11 510; GCN-NEXT: v_or_b32_sdwa v3, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 511; GCN-NEXT: v_mov_b32_e32 v0, s3 512; GCN-NEXT: s_cselect_b64 vcc, -1, 0 513; GCN-NEXT: s_lshr_b32 s3, s6, 16 514; GCN-NEXT: s_cmp_lg_u32 s2, 10 515; GCN-NEXT: v_cndmask_b32_e32 v0, 1, v0, vcc 516; GCN-NEXT: v_mov_b32_e32 v1, s3 517; GCN-NEXT: s_cselect_b64 vcc, -1, 0 518; GCN-NEXT: s_lshr_b32 s3, s6, 8 519; GCN-NEXT: v_lshlrev_b16_e32 v0, 8, v0 520; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 521; GCN-NEXT: s_cmp_lg_u32 s2, 9 522; GCN-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 523; GCN-NEXT: v_mov_b32_e32 v1, s3 524; GCN-NEXT: s_cselect_b64 vcc, -1, 0 525; GCN-NEXT: s_cmp_lg_u32 s2, 8 526; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 527; GCN-NEXT: v_mov_b32_e32 v2, s6 528; GCN-NEXT: s_cselect_b64 vcc, -1, 0 529; GCN-NEXT: v_lshlrev_b16_e32 v1, 8, v1 530; GCN-NEXT: v_cndmask_b32_e32 v2, 1, v2, vcc 531; GCN-NEXT: s_lshr_b32 s3, s5, 24 532; GCN-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 533; GCN-NEXT: s_cmp_lg_u32 s2, 7 534; GCN-NEXT: v_or_b32_sdwa v2, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 535; GCN-NEXT: v_mov_b32_e32 v0, s3 536; GCN-NEXT: s_cselect_b64 vcc, -1, 0 537; GCN-NEXT: s_lshr_b32 s3, s5, 16 538; GCN-NEXT: s_cmp_lg_u32 s2, 6 539; GCN-NEXT: v_cndmask_b32_e32 v0, 1, v0, vcc 540; GCN-NEXT: v_mov_b32_e32 v1, s3 541; GCN-NEXT: s_cselect_b64 vcc, -1, 0 542; GCN-NEXT: s_lshr_b32 s3, s5, 8 543; GCN-NEXT: v_lshlrev_b16_e32 v0, 8, v0 544; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 545; GCN-NEXT: s_cmp_lg_u32 s2, 5 546; GCN-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 547; GCN-NEXT: v_mov_b32_e32 v1, s3 548; GCN-NEXT: s_cselect_b64 vcc, -1, 0 549; GCN-NEXT: s_cmp_lg_u32 s2, 4 550; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 551; GCN-NEXT: v_mov_b32_e32 v4, s5 552; GCN-NEXT: s_cselect_b64 vcc, -1, 0 553; GCN-NEXT: v_lshlrev_b16_e32 v1, 8, v1 554; GCN-NEXT: v_cndmask_b32_e32 v4, 1, v4, vcc 555; GCN-NEXT: s_lshr_b32 s3, s4, 24 556; GCN-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 557; GCN-NEXT: s_cmp_lg_u32 s2, 3 558; GCN-NEXT: v_or_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 559; GCN-NEXT: v_mov_b32_e32 v0, s3 560; GCN-NEXT: s_cselect_b64 vcc, -1, 0 561; GCN-NEXT: s_lshr_b32 s3, s4, 16 562; GCN-NEXT: s_cmp_lg_u32 s2, 2 563; GCN-NEXT: v_cndmask_b32_e32 v0, 1, v0, vcc 564; GCN-NEXT: v_mov_b32_e32 v4, s3 565; GCN-NEXT: s_cselect_b64 vcc, -1, 0 566; GCN-NEXT: s_lshr_b32 s3, s4, 8 567; GCN-NEXT: v_lshlrev_b16_e32 v0, 8, v0 568; GCN-NEXT: v_cndmask_b32_e32 v4, 1, v4, vcc 569; GCN-NEXT: s_cmp_lg_u32 s2, 1 570; GCN-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 571; GCN-NEXT: v_mov_b32_e32 v4, s3 572; GCN-NEXT: s_cselect_b64 vcc, -1, 0 573; GCN-NEXT: s_cmp_lg_u32 s2, 0 574; GCN-NEXT: v_cndmask_b32_e32 v4, 1, v4, vcc 575; GCN-NEXT: v_mov_b32_e32 v5, s4 576; GCN-NEXT: s_cselect_b64 vcc, -1, 0 577; GCN-NEXT: v_lshlrev_b16_e32 v4, 8, v4 578; GCN-NEXT: v_cndmask_b32_e32 v5, 1, v5, vcc 579; GCN-NEXT: v_or_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 580; GCN-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 581; GCN-NEXT: v_mov_b32_e32 v5, s1 582; GCN-NEXT: v_mov_b32_e32 v4, s0 583; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 584; GCN-NEXT: s_endpgm 585entry: 586 %v = insertelement <16 x i8> %vec, i8 1, i32 %sel 587 store <16 x i8> %v, <16 x i8> addrspace(1)* %out 588 ret void 589} 590 591define amdgpu_kernel void @double2_inselt(<2 x double> addrspace(1)* %out, <2 x double> %vec, i32 %sel) { 592; GCN-LABEL: double2_inselt: 593; GCN: ; %bb.0: ; %entry 594; GCN-NEXT: s_load_dword s2, s[0:1], 0x44 595; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 596; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 597; GCN-NEXT: v_mov_b32_e32 v0, 0x3ff00000 598; GCN-NEXT: s_waitcnt lgkmcnt(0) 599; GCN-NEXT: s_cmp_eq_u32 s2, 1 600; GCN-NEXT: v_mov_b32_e32 v1, s7 601; GCN-NEXT: s_cselect_b64 vcc, -1, 0 602; GCN-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc 603; GCN-NEXT: v_mov_b32_e32 v1, s6 604; GCN-NEXT: s_cmp_eq_u32 s2, 0 605; GCN-NEXT: v_cndmask_b32_e64 v2, v1, 0, vcc 606; GCN-NEXT: v_mov_b32_e32 v1, s5 607; GCN-NEXT: s_cselect_b64 vcc, -1, 0 608; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 609; GCN-NEXT: v_mov_b32_e32 v0, s4 610; GCN-NEXT: v_mov_b32_e32 v5, s1 611; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 612; GCN-NEXT: v_mov_b32_e32 v4, s0 613; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 614; GCN-NEXT: s_endpgm 615entry: 616 %v = insertelement <2 x double> %vec, double 1.000000e+00, i32 %sel 617 store <2 x double> %v, <2 x double> addrspace(1)* %out 618 ret void 619} 620 621define amdgpu_kernel void @double5_inselt(<5 x double> addrspace(1)* %out, <5 x double> %vec, i32 %sel) { 622; GCN-LABEL: double5_inselt: 623; GCN: ; %bb.0: ; %entry 624; GCN-NEXT: s_load_dword s12, s[0:1], 0xa4 625; GCN-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x84 626; GCN-NEXT: s_load_dwordx2 s[10:11], s[0:1], 0x24 627; GCN-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x64 628; GCN-NEXT: v_mov_b32_e32 v4, 0x3ff00000 629; GCN-NEXT: s_waitcnt lgkmcnt(0) 630; GCN-NEXT: s_cmp_eq_u32 s12, 4 631; GCN-NEXT: v_mov_b32_e32 v0, s9 632; GCN-NEXT: s_cselect_b64 vcc, -1, 0 633; GCN-NEXT: v_cndmask_b32_e32 v9, v0, v4, vcc 634; GCN-NEXT: v_mov_b32_e32 v0, s8 635; GCN-NEXT: s_cmp_eq_u32 s12, 1 636; GCN-NEXT: v_cndmask_b32_e64 v8, v0, 0, vcc 637; GCN-NEXT: v_mov_b32_e32 v0, s3 638; GCN-NEXT: s_cselect_b64 vcc, -1, 0 639; GCN-NEXT: v_cndmask_b32_e32 v3, v0, v4, vcc 640; GCN-NEXT: v_mov_b32_e32 v0, s2 641; GCN-NEXT: s_cmp_eq_u32 s12, 0 642; GCN-NEXT: v_cndmask_b32_e64 v2, v0, 0, vcc 643; GCN-NEXT: v_mov_b32_e32 v0, s1 644; GCN-NEXT: s_cselect_b64 vcc, -1, 0 645; GCN-NEXT: v_cndmask_b32_e32 v1, v0, v4, vcc 646; GCN-NEXT: v_mov_b32_e32 v0, s0 647; GCN-NEXT: s_cmp_eq_u32 s12, 3 648; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 649; GCN-NEXT: v_mov_b32_e32 v5, s7 650; GCN-NEXT: s_cselect_b64 vcc, -1, 0 651; GCN-NEXT: v_cndmask_b32_e32 v7, v5, v4, vcc 652; GCN-NEXT: v_mov_b32_e32 v5, s6 653; GCN-NEXT: s_cmp_eq_u32 s12, 2 654; GCN-NEXT: v_cndmask_b32_e64 v6, v5, 0, vcc 655; GCN-NEXT: s_cselect_b64 vcc, -1, 0 656; GCN-NEXT: s_add_u32 s0, s10, 16 657; GCN-NEXT: v_mov_b32_e32 v5, s5 658; GCN-NEXT: s_addc_u32 s1, s11, 0 659; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc 660; GCN-NEXT: v_mov_b32_e32 v4, s4 661; GCN-NEXT: v_mov_b32_e32 v11, s1 662; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc 663; GCN-NEXT: v_mov_b32_e32 v10, s0 664; GCN-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 665; GCN-NEXT: s_add_u32 s0, s10, 32 666; GCN-NEXT: v_mov_b32_e32 v4, s10 667; GCN-NEXT: v_mov_b32_e32 v5, s11 668; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 669; GCN-NEXT: s_addc_u32 s1, s11, 0 670; GCN-NEXT: v_mov_b32_e32 v0, s0 671; GCN-NEXT: v_mov_b32_e32 v1, s1 672; GCN-NEXT: flat_store_dwordx2 v[0:1], v[8:9] 673; GCN-NEXT: s_endpgm 674entry: 675 %v = insertelement <5 x double> %vec, double 1.000000e+00, i32 %sel 676 store <5 x double> %v, <5 x double> addrspace(1)* %out 677 ret void 678} 679 680define amdgpu_kernel void @double8_inselt(<8 x double> addrspace(1)* %out, <8 x double> %vec, i32 %sel) { 681; GCN-LABEL: double8_inselt: 682; GCN: ; %bb.0: ; %entry 683; GCN-NEXT: s_load_dword s2, s[0:1], 0xa4 684; GCN-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 685; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 686; GCN-NEXT: v_mov_b32_e32 v16, 0x3ff00000 687; GCN-NEXT: s_waitcnt lgkmcnt(0) 688; GCN-NEXT: s_lshl_b32 s2, s2, 1 689; GCN-NEXT: v_mov_b32_e32 v0, s4 690; GCN-NEXT: v_mov_b32_e32 v1, s5 691; GCN-NEXT: v_mov_b32_e32 v2, s6 692; GCN-NEXT: v_mov_b32_e32 v3, s7 693; GCN-NEXT: v_mov_b32_e32 v4, s8 694; GCN-NEXT: v_mov_b32_e32 v5, s9 695; GCN-NEXT: v_mov_b32_e32 v6, s10 696; GCN-NEXT: v_mov_b32_e32 v7, s11 697; GCN-NEXT: v_mov_b32_e32 v8, s12 698; GCN-NEXT: v_mov_b32_e32 v9, s13 699; GCN-NEXT: v_mov_b32_e32 v10, s14 700; GCN-NEXT: v_mov_b32_e32 v11, s15 701; GCN-NEXT: v_mov_b32_e32 v12, s16 702; GCN-NEXT: v_mov_b32_e32 v13, s17 703; GCN-NEXT: v_mov_b32_e32 v14, s18 704; GCN-NEXT: v_mov_b32_e32 v15, s19 705; GCN-NEXT: s_mov_b32 m0, s2 706; GCN-NEXT: s_add_u32 s2, s0, 48 707; GCN-NEXT: v_movreld_b32_e32 v0, 0 708; GCN-NEXT: s_addc_u32 s3, s1, 0 709; GCN-NEXT: v_movreld_b32_e32 v1, v16 710; GCN-NEXT: v_mov_b32_e32 v17, s3 711; GCN-NEXT: v_mov_b32_e32 v16, s2 712; GCN-NEXT: s_add_u32 s2, s0, 32 713; GCN-NEXT: s_addc_u32 s3, s1, 0 714; GCN-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 715; GCN-NEXT: s_nop 0 716; GCN-NEXT: v_mov_b32_e32 v13, s3 717; GCN-NEXT: v_mov_b32_e32 v12, s2 718; GCN-NEXT: s_add_u32 s2, s0, 16 719; GCN-NEXT: s_addc_u32 s3, s1, 0 720; GCN-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 721; GCN-NEXT: s_nop 0 722; GCN-NEXT: v_mov_b32_e32 v9, s3 723; GCN-NEXT: v_mov_b32_e32 v8, s2 724; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 725; GCN-NEXT: s_nop 0 726; GCN-NEXT: v_mov_b32_e32 v5, s1 727; GCN-NEXT: v_mov_b32_e32 v4, s0 728; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 729; GCN-NEXT: s_endpgm 730entry: 731 %v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel 732 store <8 x double> %v, <8 x double> addrspace(1)* %out 733 ret void 734} 735 736define amdgpu_kernel void @double7_inselt(<7 x double> addrspace(1)* %out, <7 x double> %vec, i32 %sel) { 737; GCN-LABEL: double7_inselt: 738; GCN: ; %bb.0: ; %entry 739; GCN-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x64 740; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 741; GCN-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x94 742; GCN-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x84 743; GCN-NEXT: s_load_dword s0, s[0:1], 0xa4 744; GCN-NEXT: s_waitcnt lgkmcnt(0) 745; GCN-NEXT: v_mov_b32_e32 v0, s4 746; GCN-NEXT: v_mov_b32_e32 v1, s5 747; GCN-NEXT: v_mov_b32_e32 v2, s6 748; GCN-NEXT: v_mov_b32_e32 v3, s7 749; GCN-NEXT: s_lshl_b32 s0, s0, 1 750; GCN-NEXT: v_mov_b32_e32 v4, s8 751; GCN-NEXT: v_mov_b32_e32 v5, s9 752; GCN-NEXT: v_mov_b32_e32 v6, s10 753; GCN-NEXT: v_mov_b32_e32 v7, s11 754; GCN-NEXT: v_mov_b32_e32 v8, s12 755; GCN-NEXT: v_mov_b32_e32 v9, s13 756; GCN-NEXT: v_mov_b32_e32 v10, s14 757; GCN-NEXT: v_mov_b32_e32 v11, s15 758; GCN-NEXT: v_mov_b32_e32 v12, s16 759; GCN-NEXT: v_mov_b32_e32 v13, s17 760; GCN-NEXT: s_mov_b32 m0, s0 761; GCN-NEXT: v_movreld_b32_e32 v0, 0 762; GCN-NEXT: v_mov_b32_e32 v16, 0x3ff00000 763; GCN-NEXT: s_add_u32 s0, s2, 16 764; GCN-NEXT: v_movreld_b32_e32 v1, v16 765; GCN-NEXT: s_addc_u32 s1, s3, 0 766; GCN-NEXT: v_mov_b32_e32 v15, s1 767; GCN-NEXT: v_mov_b32_e32 v14, s0 768; GCN-NEXT: flat_store_dwordx4 v[14:15], v[4:7] 769; GCN-NEXT: s_add_u32 s0, s2, 48 770; GCN-NEXT: v_mov_b32_e32 v5, s3 771; GCN-NEXT: v_mov_b32_e32 v4, s2 772; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 773; GCN-NEXT: s_addc_u32 s1, s3, 0 774; GCN-NEXT: v_mov_b32_e32 v0, s0 775; GCN-NEXT: v_mov_b32_e32 v1, s1 776; GCN-NEXT: s_add_u32 s0, s2, 32 777; GCN-NEXT: flat_store_dwordx2 v[0:1], v[12:13] 778; GCN-NEXT: s_addc_u32 s1, s3, 0 779; GCN-NEXT: v_mov_b32_e32 v0, s0 780; GCN-NEXT: v_mov_b32_e32 v1, s1 781; GCN-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 782; GCN-NEXT: s_endpgm 783entry: 784 %v = insertelement <7 x double> %vec, double 1.000000e+00, i32 %sel 785 store <7 x double> %v, <7 x double> addrspace(1)* %out 786 ret void 787} 788 789define amdgpu_kernel void @double16_inselt(<16 x double> addrspace(1)* %out, <16 x double> %vec, i32 %sel) { 790; GCN-LABEL: double16_inselt: 791; GCN: ; %bb.0: ; %entry 792; GCN-NEXT: s_load_dword s2, s[0:1], 0x124 793; GCN-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0xa4 794; GCN-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0xe4 795; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 796; GCN-NEXT: v_mov_b32_e32 v32, 0x3ff00000 797; GCN-NEXT: s_waitcnt lgkmcnt(0) 798; GCN-NEXT: v_mov_b32_e32 v0, s36 799; GCN-NEXT: s_lshl_b32 s2, s2, 1 800; GCN-NEXT: v_mov_b32_e32 v1, s37 801; GCN-NEXT: v_mov_b32_e32 v2, s38 802; GCN-NEXT: v_mov_b32_e32 v3, s39 803; GCN-NEXT: v_mov_b32_e32 v4, s40 804; GCN-NEXT: v_mov_b32_e32 v5, s41 805; GCN-NEXT: v_mov_b32_e32 v6, s42 806; GCN-NEXT: v_mov_b32_e32 v7, s43 807; GCN-NEXT: v_mov_b32_e32 v8, s44 808; GCN-NEXT: v_mov_b32_e32 v9, s45 809; GCN-NEXT: v_mov_b32_e32 v10, s46 810; GCN-NEXT: v_mov_b32_e32 v11, s47 811; GCN-NEXT: v_mov_b32_e32 v12, s48 812; GCN-NEXT: v_mov_b32_e32 v13, s49 813; GCN-NEXT: v_mov_b32_e32 v14, s50 814; GCN-NEXT: v_mov_b32_e32 v15, s51 815; GCN-NEXT: v_mov_b32_e32 v16, s4 816; GCN-NEXT: v_mov_b32_e32 v17, s5 817; GCN-NEXT: v_mov_b32_e32 v18, s6 818; GCN-NEXT: v_mov_b32_e32 v19, s7 819; GCN-NEXT: v_mov_b32_e32 v20, s8 820; GCN-NEXT: v_mov_b32_e32 v21, s9 821; GCN-NEXT: v_mov_b32_e32 v22, s10 822; GCN-NEXT: v_mov_b32_e32 v23, s11 823; GCN-NEXT: v_mov_b32_e32 v24, s12 824; GCN-NEXT: v_mov_b32_e32 v25, s13 825; GCN-NEXT: v_mov_b32_e32 v26, s14 826; GCN-NEXT: v_mov_b32_e32 v27, s15 827; GCN-NEXT: v_mov_b32_e32 v28, s16 828; GCN-NEXT: v_mov_b32_e32 v29, s17 829; GCN-NEXT: v_mov_b32_e32 v30, s18 830; GCN-NEXT: v_mov_b32_e32 v31, s19 831; GCN-NEXT: s_mov_b32 m0, s2 832; GCN-NEXT: s_add_u32 s2, s0, 0x70 833; GCN-NEXT: v_movreld_b32_e32 v0, 0 834; GCN-NEXT: s_addc_u32 s3, s1, 0 835; GCN-NEXT: v_movreld_b32_e32 v1, v32 836; GCN-NEXT: v_mov_b32_e32 v33, s3 837; GCN-NEXT: v_mov_b32_e32 v32, s2 838; GCN-NEXT: s_add_u32 s2, s0, 0x60 839; GCN-NEXT: s_addc_u32 s3, s1, 0 840; GCN-NEXT: flat_store_dwordx4 v[32:33], v[28:31] 841; GCN-NEXT: s_nop 0 842; GCN-NEXT: v_mov_b32_e32 v29, s3 843; GCN-NEXT: v_mov_b32_e32 v28, s2 844; GCN-NEXT: s_add_u32 s2, s0, 0x50 845; GCN-NEXT: s_addc_u32 s3, s1, 0 846; GCN-NEXT: flat_store_dwordx4 v[28:29], v[24:27] 847; GCN-NEXT: s_nop 0 848; GCN-NEXT: v_mov_b32_e32 v25, s3 849; GCN-NEXT: v_mov_b32_e32 v24, s2 850; GCN-NEXT: s_add_u32 s2, s0, 64 851; GCN-NEXT: s_addc_u32 s3, s1, 0 852; GCN-NEXT: flat_store_dwordx4 v[24:25], v[20:23] 853; GCN-NEXT: s_nop 0 854; GCN-NEXT: v_mov_b32_e32 v21, s3 855; GCN-NEXT: v_mov_b32_e32 v20, s2 856; GCN-NEXT: s_add_u32 s2, s0, 48 857; GCN-NEXT: s_addc_u32 s3, s1, 0 858; GCN-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 859; GCN-NEXT: s_nop 0 860; GCN-NEXT: v_mov_b32_e32 v17, s3 861; GCN-NEXT: v_mov_b32_e32 v16, s2 862; GCN-NEXT: s_add_u32 s2, s0, 32 863; GCN-NEXT: s_addc_u32 s3, s1, 0 864; GCN-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 865; GCN-NEXT: s_nop 0 866; GCN-NEXT: v_mov_b32_e32 v13, s3 867; GCN-NEXT: v_mov_b32_e32 v12, s2 868; GCN-NEXT: s_add_u32 s2, s0, 16 869; GCN-NEXT: s_addc_u32 s3, s1, 0 870; GCN-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 871; GCN-NEXT: s_nop 0 872; GCN-NEXT: v_mov_b32_e32 v9, s3 873; GCN-NEXT: v_mov_b32_e32 v8, s2 874; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 875; GCN-NEXT: s_nop 0 876; GCN-NEXT: v_mov_b32_e32 v5, s1 877; GCN-NEXT: v_mov_b32_e32 v4, s0 878; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 879; GCN-NEXT: s_endpgm 880entry: 881 %v = insertelement <16 x double> %vec, double 1.000000e+00, i32 %sel 882 store <16 x double> %v, <16 x double> addrspace(1)* %out 883 ret void 884} 885 886define amdgpu_kernel void @double15_inselt(<15 x double> addrspace(1)* %out, <15 x double> %vec, i32 %sel) { 887; GCN-LABEL: double15_inselt: 888; GCN: ; %bb.0: ; %entry 889; GCN-NEXT: s_load_dwordx16 s[8:23], s[0:1], 0xa4 890; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x114 891; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x104 892; GCN-NEXT: s_load_dwordx8 s[24:31], s[0:1], 0xe4 893; GCN-NEXT: v_mov_b32_e32 v32, 0x3ff00000 894; GCN-NEXT: s_waitcnt lgkmcnt(0) 895; GCN-NEXT: v_mov_b32_e32 v0, s8 896; GCN-NEXT: v_mov_b32_e32 v28, s2 897; GCN-NEXT: v_mov_b32_e32 v24, s4 898; GCN-NEXT: s_load_dword s4, s[0:1], 0x124 899; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 900; GCN-NEXT: v_mov_b32_e32 v1, s9 901; GCN-NEXT: v_mov_b32_e32 v2, s10 902; GCN-NEXT: v_mov_b32_e32 v3, s11 903; GCN-NEXT: s_waitcnt lgkmcnt(0) 904; GCN-NEXT: s_lshl_b32 s2, s4, 1 905; GCN-NEXT: v_mov_b32_e32 v4, s12 906; GCN-NEXT: v_mov_b32_e32 v5, s13 907; GCN-NEXT: v_mov_b32_e32 v6, s14 908; GCN-NEXT: v_mov_b32_e32 v7, s15 909; GCN-NEXT: v_mov_b32_e32 v8, s16 910; GCN-NEXT: v_mov_b32_e32 v9, s17 911; GCN-NEXT: v_mov_b32_e32 v10, s18 912; GCN-NEXT: v_mov_b32_e32 v11, s19 913; GCN-NEXT: v_mov_b32_e32 v12, s20 914; GCN-NEXT: v_mov_b32_e32 v13, s21 915; GCN-NEXT: v_mov_b32_e32 v14, s22 916; GCN-NEXT: v_mov_b32_e32 v15, s23 917; GCN-NEXT: v_mov_b32_e32 v16, s24 918; GCN-NEXT: v_mov_b32_e32 v17, s25 919; GCN-NEXT: v_mov_b32_e32 v18, s26 920; GCN-NEXT: v_mov_b32_e32 v19, s27 921; GCN-NEXT: v_mov_b32_e32 v20, s28 922; GCN-NEXT: v_mov_b32_e32 v21, s29 923; GCN-NEXT: v_mov_b32_e32 v22, s30 924; GCN-NEXT: v_mov_b32_e32 v23, s31 925; GCN-NEXT: v_mov_b32_e32 v25, s5 926; GCN-NEXT: v_mov_b32_e32 v26, s6 927; GCN-NEXT: v_mov_b32_e32 v27, s7 928; GCN-NEXT: v_mov_b32_e32 v29, s3 929; GCN-NEXT: s_mov_b32 m0, s2 930; GCN-NEXT: v_movreld_b32_e32 v0, 0 931; GCN-NEXT: s_add_u32 s2, s0, 0x50 932; GCN-NEXT: v_movreld_b32_e32 v1, v32 933; GCN-NEXT: s_addc_u32 s3, s1, 0 934; GCN-NEXT: v_mov_b32_e32 v31, s3 935; GCN-NEXT: v_mov_b32_e32 v30, s2 936; GCN-NEXT: s_add_u32 s2, s0, 64 937; GCN-NEXT: s_addc_u32 s3, s1, 0 938; GCN-NEXT: flat_store_dwordx4 v[30:31], v[20:23] 939; GCN-NEXT: s_nop 0 940; GCN-NEXT: v_mov_b32_e32 v21, s3 941; GCN-NEXT: v_mov_b32_e32 v20, s2 942; GCN-NEXT: s_add_u32 s2, s0, 48 943; GCN-NEXT: s_addc_u32 s3, s1, 0 944; GCN-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 945; GCN-NEXT: s_nop 0 946; GCN-NEXT: v_mov_b32_e32 v17, s3 947; GCN-NEXT: v_mov_b32_e32 v16, s2 948; GCN-NEXT: s_add_u32 s2, s0, 32 949; GCN-NEXT: s_addc_u32 s3, s1, 0 950; GCN-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 951; GCN-NEXT: s_nop 0 952; GCN-NEXT: v_mov_b32_e32 v13, s3 953; GCN-NEXT: v_mov_b32_e32 v12, s2 954; GCN-NEXT: s_add_u32 s2, s0, 16 955; GCN-NEXT: s_addc_u32 s3, s1, 0 956; GCN-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 957; GCN-NEXT: s_nop 0 958; GCN-NEXT: v_mov_b32_e32 v9, s3 959; GCN-NEXT: v_mov_b32_e32 v8, s2 960; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 961; GCN-NEXT: s_add_u32 s2, s0, 0x70 962; GCN-NEXT: v_mov_b32_e32 v5, s1 963; GCN-NEXT: v_mov_b32_e32 v4, s0 964; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 965; GCN-NEXT: s_addc_u32 s3, s1, 0 966; GCN-NEXT: v_mov_b32_e32 v0, s2 967; GCN-NEXT: v_mov_b32_e32 v1, s3 968; GCN-NEXT: s_add_u32 s0, s0, 0x60 969; GCN-NEXT: flat_store_dwordx2 v[0:1], v[28:29] 970; GCN-NEXT: s_addc_u32 s1, s1, 0 971; GCN-NEXT: v_mov_b32_e32 v0, s0 972; GCN-NEXT: v_mov_b32_e32 v1, s1 973; GCN-NEXT: flat_store_dwordx4 v[0:1], v[24:27] 974; GCN-NEXT: s_endpgm 975entry: 976 %v = insertelement <15 x double> %vec, double 1.000000e+00, i32 %sel 977 store <15 x double> %v, <15 x double> addrspace(1)* %out 978 ret void 979} 980 981define amdgpu_kernel void @bit4_inselt(<4 x i1> addrspace(1)* %out, <4 x i1> %vec, i32 %sel) { 982; GCN-LABEL: bit4_inselt: 983; GCN: ; %bb.0: ; %entry 984; GCN-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 985; GCN-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1 986; GCN-NEXT: s_mov_b32 s6, -1 987; GCN-NEXT: s_mov_b32 s7, 0xe80000 988; GCN-NEXT: s_add_u32 s4, s4, s3 989; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 990; GCN-NEXT: s_addc_u32 s5, s5, 0 991; GCN-NEXT: v_mov_b32_e32 v0, 4 992; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 993; GCN-NEXT: s_waitcnt lgkmcnt(0) 994; GCN-NEXT: s_and_b32 s3, s3, 3 995; GCN-NEXT: v_mov_b32_e32 v1, s2 996; GCN-NEXT: v_lshrrev_b16_e64 v2, 1, s2 997; GCN-NEXT: v_lshrrev_b16_e64 v3, 2, s2 998; GCN-NEXT: v_lshrrev_b16_e64 v4, 3, s2 999; GCN-NEXT: v_or_b32_e32 v0, s3, v0 1000; GCN-NEXT: v_and_b32_e32 v2, 1, v2 1001; GCN-NEXT: v_and_b32_e32 v3, 3, v3 1002; GCN-NEXT: v_and_b32_e32 v4, 1, v4 1003; GCN-NEXT: buffer_store_byte v1, off, s[4:7], 0 offset:4 1004; GCN-NEXT: buffer_store_byte v4, off, s[4:7], 0 offset:7 1005; GCN-NEXT: buffer_store_byte v3, off, s[4:7], 0 offset:6 1006; GCN-NEXT: buffer_store_byte v2, off, s[4:7], 0 offset:5 1007; GCN-NEXT: v_mov_b32_e32 v1, 1 1008; GCN-NEXT: buffer_store_byte v1, v0, s[4:7], 0 offen 1009; GCN-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 offset:4 1010; GCN-NEXT: buffer_load_ubyte v1, off, s[4:7], 0 offset:5 1011; GCN-NEXT: buffer_load_ubyte v2, off, s[4:7], 0 offset:6 1012; GCN-NEXT: buffer_load_ubyte v3, off, s[4:7], 0 offset:7 1013; GCN-NEXT: s_waitcnt vmcnt(3) 1014; GCN-NEXT: v_and_b32_e32 v0, 1, v0 1015; GCN-NEXT: s_waitcnt vmcnt(2) 1016; GCN-NEXT: v_and_b32_e32 v1, 1, v1 1017; GCN-NEXT: s_waitcnt vmcnt(1) 1018; GCN-NEXT: v_and_b32_e32 v2, 1, v2 1019; GCN-NEXT: v_lshlrev_b16_e32 v1, 1, v1 1020; GCN-NEXT: v_lshlrev_b16_e32 v2, 2, v2 1021; GCN-NEXT: v_or_b32_e32 v0, v0, v1 1022; GCN-NEXT: s_waitcnt vmcnt(0) 1023; GCN-NEXT: v_lshlrev_b16_e32 v3, 3, v3 1024; GCN-NEXT: v_or_b32_e32 v0, v0, v2 1025; GCN-NEXT: v_or_b32_e32 v0, v0, v3 1026; GCN-NEXT: v_and_b32_e32 v2, 15, v0 1027; GCN-NEXT: v_mov_b32_e32 v0, s0 1028; GCN-NEXT: v_mov_b32_e32 v1, s1 1029; GCN-NEXT: flat_store_byte v[0:1], v2 1030; GCN-NEXT: s_endpgm 1031entry: 1032 %v = insertelement <4 x i1> %vec, i1 1, i32 %sel 1033 store <4 x i1> %v, <4 x i1> addrspace(1)* %out 1034 ret void 1035} 1036 1037define amdgpu_kernel void @bit128_inselt(<128 x i1> addrspace(1)* %out, <128 x i1> %vec, i32 %sel) { 1038; GCN-LABEL: bit128_inselt: 1039; GCN: ; %bb.0: ; %entry 1040; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 1041; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 1042; GCN-NEXT: s_load_dword s0, s[0:1], 0x44 1043; GCN-NEXT: s_waitcnt lgkmcnt(0) 1044; GCN-NEXT: s_lshr_b32 s1, s4, 24 1045; GCN-NEXT: s_lshr_b32 s8, s4, 16 1046; GCN-NEXT: s_lshr_b32 s9, s4, 17 1047; GCN-NEXT: s_lshr_b32 s10, s4, 18 1048; GCN-NEXT: s_lshr_b32 s11, s4, 19 1049; GCN-NEXT: s_lshr_b32 s12, s4, 20 1050; GCN-NEXT: s_lshr_b32 s13, s4, 21 1051; GCN-NEXT: s_lshr_b32 s14, s4, 22 1052; GCN-NEXT: s_lshr_b32 s15, s4, 23 1053; GCN-NEXT: s_lshr_b32 s16, s5, 24 1054; GCN-NEXT: s_lshr_b32 s17, s5, 16 1055; GCN-NEXT: s_lshr_b32 s18, s5, 17 1056; GCN-NEXT: s_lshr_b32 s19, s5, 18 1057; GCN-NEXT: s_lshr_b32 s20, s5, 19 1058; GCN-NEXT: s_lshr_b32 s21, s5, 20 1059; GCN-NEXT: s_lshr_b32 s22, s5, 21 1060; GCN-NEXT: s_lshr_b32 s23, s5, 22 1061; GCN-NEXT: s_lshr_b32 s24, s5, 23 1062; GCN-NEXT: s_lshr_b32 s25, s6, 24 1063; GCN-NEXT: s_lshr_b32 s26, s6, 16 1064; GCN-NEXT: s_lshr_b32 s27, s6, 17 1065; GCN-NEXT: s_lshr_b32 s28, s6, 18 1066; GCN-NEXT: s_lshr_b32 s29, s6, 19 1067; GCN-NEXT: s_lshr_b32 s30, s6, 20 1068; GCN-NEXT: s_lshr_b32 s31, s6, 21 1069; GCN-NEXT: s_lshr_b32 s33, s6, 22 1070; GCN-NEXT: s_lshr_b32 s34, s6, 23 1071; GCN-NEXT: s_lshr_b32 s35, s7, 24 1072; GCN-NEXT: s_lshr_b32 s36, s7, 16 1073; GCN-NEXT: s_lshr_b32 s37, s7, 17 1074; GCN-NEXT: s_lshr_b32 s38, s7, 18 1075; GCN-NEXT: s_lshr_b32 s39, s7, 19 1076; GCN-NEXT: s_lshr_b32 s40, s7, 20 1077; GCN-NEXT: s_lshr_b32 s41, s7, 21 1078; GCN-NEXT: s_lshr_b32 s42, s7, 22 1079; GCN-NEXT: s_lshr_b32 s43, s7, 23 1080; GCN-NEXT: s_cmpk_lg_i32 s0, 0x77 1081; GCN-NEXT: v_mov_b32_e32 v16, s43 1082; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1083; GCN-NEXT: s_cmpk_lg_i32 s0, 0x76 1084; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1085; GCN-NEXT: v_mov_b32_e32 v17, s42 1086; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1087; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1088; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1089; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1090; GCN-NEXT: s_cmpk_lg_i32 s0, 0x75 1091; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1092; GCN-NEXT: v_mov_b32_e32 v17, s41 1093; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1094; GCN-NEXT: s_cmpk_lg_i32 s0, 0x74 1095; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1096; GCN-NEXT: v_mov_b32_e32 v18, s40 1097; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1098; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1099; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1100; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1101; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1102; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 1103; GCN-NEXT: v_and_b32_e32 v17, 3, v17 1104; GCN-NEXT: s_cmpk_lg_i32 s0, 0x73 1105; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1106; GCN-NEXT: v_mov_b32_e32 v17, s39 1107; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1108; GCN-NEXT: s_cmpk_lg_i32 s0, 0x72 1109; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1110; GCN-NEXT: v_mov_b32_e32 v18, s38 1111; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1112; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1113; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1114; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1115; GCN-NEXT: s_cmpk_lg_i32 s0, 0x71 1116; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1117; GCN-NEXT: v_mov_b32_e32 v18, s37 1118; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1119; GCN-NEXT: s_cmpk_lg_i32 s0, 0x70 1120; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1121; GCN-NEXT: v_mov_b32_e32 v19, s36 1122; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1123; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1124; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1125; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1126; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1127; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1128; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1129; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1130; GCN-NEXT: v_lshlrev_b16_e32 v16, 4, v16 1131; GCN-NEXT: v_and_b32_e32 v17, 15, v17 1132; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7f 1133; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1134; GCN-NEXT: v_lshrrev_b16_e64 v17, 7, s35 1135; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1136; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7e 1137; GCN-NEXT: v_lshrrev_b16_e64 v18, 6, s35 1138; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1139; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1140; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1141; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1142; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1143; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7d 1144; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1145; GCN-NEXT: v_lshrrev_b16_e64 v18, 5, s35 1146; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1147; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7c 1148; GCN-NEXT: v_lshrrev_b16_e64 v19, 4, s35 1149; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1150; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1151; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1152; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1153; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1154; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1155; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1156; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1157; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7b 1158; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1159; GCN-NEXT: v_lshrrev_b16_e64 v18, 3, s35 1160; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1161; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7a 1162; GCN-NEXT: v_lshrrev_b16_e64 v19, 2, s35 1163; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1164; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1165; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1166; GCN-NEXT: s_cmpk_lg_i32 s0, 0x78 1167; GCN-NEXT: v_mov_b32_e32 v14, s35 1168; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1169; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1170; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1171; GCN-NEXT: s_cmpk_lg_i32 s0, 0x79 1172; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1173; GCN-NEXT: v_lshrrev_b16_e64 v19, 1, s35 1174; GCN-NEXT: v_cndmask_b32_e32 v14, 1, v14, vcc 1175; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1176; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1177; GCN-NEXT: v_and_b32_e32 v14, 1, v14 1178; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 1179; GCN-NEXT: v_or_b32_e32 v14, v14, v19 1180; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 1181; GCN-NEXT: v_and_b32_e32 v14, 3, v14 1182; GCN-NEXT: v_or_b32_e32 v14, v14, v18 1183; GCN-NEXT: v_lshlrev_b16_e32 v17, 4, v17 1184; GCN-NEXT: v_and_b32_e32 v14, 15, v14 1185; GCN-NEXT: v_or_b32_sdwa v14, v14, v17 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1186; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6f 1187; GCN-NEXT: v_or_b32_sdwa v14, v16, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1188; GCN-NEXT: v_lshrrev_b16_e64 v16, 15, s7 1189; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1190; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6e 1191; GCN-NEXT: v_lshrrev_b16_e64 v17, 14, s7 1192; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1193; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1194; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1195; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1196; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1197; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6d 1198; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1199; GCN-NEXT: v_lshrrev_b16_e64 v17, 13, s7 1200; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1201; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6c 1202; GCN-NEXT: v_lshrrev_b16_e64 v18, 12, s7 1203; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1204; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1205; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1206; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1207; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1208; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1209; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 1210; GCN-NEXT: v_and_b32_e32 v17, 3, v17 1211; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6b 1212; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1213; GCN-NEXT: v_lshrrev_b16_e64 v17, 11, s7 1214; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1215; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6a 1216; GCN-NEXT: v_lshrrev_b16_e64 v18, 10, s7 1217; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1218; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1219; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1220; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1221; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1222; GCN-NEXT: s_cmpk_lg_i32 s0, 0x69 1223; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1224; GCN-NEXT: v_lshrrev_b16_e64 v18, 9, s7 1225; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1226; GCN-NEXT: s_cmpk_lg_i32 s0, 0x68 1227; GCN-NEXT: v_lshrrev_b16_e64 v19, 8, s7 1228; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1229; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1230; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1231; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1232; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1233; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1234; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1235; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1236; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1237; GCN-NEXT: v_lshlrev_b16_e32 v16, 4, v16 1238; GCN-NEXT: v_and_b32_e32 v17, 15, v17 1239; GCN-NEXT: s_cmpk_lg_i32 s0, 0x67 1240; GCN-NEXT: v_or_b32_sdwa v16, v17, v16 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1241; GCN-NEXT: v_lshrrev_b16_e64 v17, 7, s7 1242; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1243; GCN-NEXT: s_cmpk_lg_i32 s0, 0x66 1244; GCN-NEXT: v_lshrrev_b16_e64 v18, 6, s7 1245; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1246; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1247; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1248; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1249; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1250; GCN-NEXT: s_cmpk_lg_i32 s0, 0x65 1251; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1252; GCN-NEXT: v_lshrrev_b16_e64 v18, 5, s7 1253; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1254; GCN-NEXT: s_cmpk_lg_i32 s0, 0x64 1255; GCN-NEXT: v_lshrrev_b16_e64 v19, 4, s7 1256; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1257; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1258; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1259; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1260; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1261; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1262; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1263; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1264; GCN-NEXT: s_cmpk_lg_i32 s0, 0x63 1265; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1266; GCN-NEXT: v_lshrrev_b16_e64 v18, 3, s7 1267; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1268; GCN-NEXT: s_cmpk_lg_i32 s0, 0x62 1269; GCN-NEXT: v_lshrrev_b16_e64 v19, 2, s7 1270; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1271; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1272; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1273; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1274; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1275; GCN-NEXT: s_cmpk_lg_i32 s0, 0x61 1276; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1277; GCN-NEXT: v_lshrrev_b16_e64 v19, 1, s7 1278; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1279; GCN-NEXT: s_cmpk_lg_i32 s0, 0x60 1280; GCN-NEXT: v_mov_b32_e32 v15, s7 1281; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1282; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1283; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1284; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 1285; GCN-NEXT: v_and_b32_e32 v15, 1, v15 1286; GCN-NEXT: v_or_b32_e32 v15, v15, v19 1287; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 1288; GCN-NEXT: v_and_b32_e32 v15, 3, v15 1289; GCN-NEXT: v_or_b32_e32 v15, v15, v18 1290; GCN-NEXT: v_lshlrev_b16_e32 v17, 4, v17 1291; GCN-NEXT: v_and_b32_e32 v15, 15, v15 1292; GCN-NEXT: v_or_b32_e32 v15, v15, v17 1293; GCN-NEXT: s_cmpk_lg_i32 s0, 0x57 1294; GCN-NEXT: v_or_b32_sdwa v15, v15, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1295; GCN-NEXT: v_mov_b32_e32 v16, s34 1296; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1297; GCN-NEXT: s_cmpk_lg_i32 s0, 0x56 1298; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1299; GCN-NEXT: v_mov_b32_e32 v17, s33 1300; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1301; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1302; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1303; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1304; GCN-NEXT: s_cmpk_lg_i32 s0, 0x55 1305; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1306; GCN-NEXT: v_mov_b32_e32 v17, s31 1307; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1308; GCN-NEXT: s_cmpk_lg_i32 s0, 0x54 1309; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1310; GCN-NEXT: v_mov_b32_e32 v18, s30 1311; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1312; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1313; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1314; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1315; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1316; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 1317; GCN-NEXT: v_and_b32_e32 v17, 3, v17 1318; GCN-NEXT: s_cmpk_lg_i32 s0, 0x53 1319; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1320; GCN-NEXT: v_mov_b32_e32 v17, s29 1321; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1322; GCN-NEXT: s_cmpk_lg_i32 s0, 0x52 1323; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1324; GCN-NEXT: v_mov_b32_e32 v18, s28 1325; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1326; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1327; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1328; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1329; GCN-NEXT: s_cmpk_lg_i32 s0, 0x51 1330; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1331; GCN-NEXT: v_mov_b32_e32 v18, s27 1332; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1333; GCN-NEXT: s_cmpk_lg_i32 s0, 0x50 1334; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1335; GCN-NEXT: v_mov_b32_e32 v19, s26 1336; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1337; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1338; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1339; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1340; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1341; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1342; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1343; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1344; GCN-NEXT: v_lshlrev_b16_e32 v16, 4, v16 1345; GCN-NEXT: v_and_b32_e32 v17, 15, v17 1346; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5f 1347; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1348; GCN-NEXT: v_lshrrev_b16_e64 v17, 7, s25 1349; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1350; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5e 1351; GCN-NEXT: v_lshrrev_b16_e64 v18, 6, s25 1352; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1353; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1354; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1355; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1356; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1357; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5d 1358; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1359; GCN-NEXT: v_lshrrev_b16_e64 v18, 5, s25 1360; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1361; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5c 1362; GCN-NEXT: v_lshrrev_b16_e64 v19, 4, s25 1363; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1364; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1365; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1366; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1367; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1368; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1369; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1370; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1371; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5b 1372; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1373; GCN-NEXT: v_lshrrev_b16_e64 v18, 3, s25 1374; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1375; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5a 1376; GCN-NEXT: v_lshrrev_b16_e64 v19, 2, s25 1377; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1378; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1379; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1380; GCN-NEXT: s_cmpk_lg_i32 s0, 0x58 1381; GCN-NEXT: v_mov_b32_e32 v3, s25 1382; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1383; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1384; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1385; GCN-NEXT: s_cmpk_lg_i32 s0, 0x59 1386; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1387; GCN-NEXT: v_lshrrev_b16_e64 v19, 1, s25 1388; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v3, vcc 1389; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1390; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1391; GCN-NEXT: v_and_b32_e32 v3, 1, v3 1392; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 1393; GCN-NEXT: v_or_b32_e32 v3, v3, v19 1394; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 1395; GCN-NEXT: v_and_b32_e32 v3, 3, v3 1396; GCN-NEXT: v_or_b32_e32 v3, v3, v18 1397; GCN-NEXT: v_lshlrev_b16_e32 v17, 4, v17 1398; GCN-NEXT: v_and_b32_e32 v3, 15, v3 1399; GCN-NEXT: v_or_b32_sdwa v3, v3, v17 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1400; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4f 1401; GCN-NEXT: v_or_b32_sdwa v16, v16, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1402; GCN-NEXT: v_lshrrev_b16_e64 v3, 15, s6 1403; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1404; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4e 1405; GCN-NEXT: v_lshrrev_b16_e64 v17, 14, s6 1406; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v3, vcc 1407; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1408; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1409; GCN-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1410; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1411; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4d 1412; GCN-NEXT: v_or_b32_e32 v3, v17, v3 1413; GCN-NEXT: v_lshrrev_b16_e64 v17, 13, s6 1414; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1415; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4c 1416; GCN-NEXT: v_lshrrev_b16_e64 v18, 12, s6 1417; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1418; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1419; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1420; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1421; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1422; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1423; GCN-NEXT: v_lshlrev_b16_e32 v3, 2, v3 1424; GCN-NEXT: v_and_b32_e32 v17, 3, v17 1425; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4b 1426; GCN-NEXT: v_or_b32_e32 v3, v17, v3 1427; GCN-NEXT: v_lshrrev_b16_e64 v17, 11, s6 1428; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1429; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4a 1430; GCN-NEXT: v_lshrrev_b16_e64 v18, 10, s6 1431; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1432; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1433; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1434; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1435; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1436; GCN-NEXT: s_cmpk_lg_i32 s0, 0x49 1437; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1438; GCN-NEXT: v_lshrrev_b16_e64 v18, 9, s6 1439; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1440; GCN-NEXT: s_cmpk_lg_i32 s0, 0x48 1441; GCN-NEXT: v_lshrrev_b16_e64 v19, 8, s6 1442; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1443; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1444; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1445; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1446; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1447; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1448; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1449; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1450; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1451; GCN-NEXT: v_lshlrev_b16_e32 v3, 4, v3 1452; GCN-NEXT: v_and_b32_e32 v17, 15, v17 1453; GCN-NEXT: s_cmpk_lg_i32 s0, 0x47 1454; GCN-NEXT: v_or_b32_sdwa v17, v17, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1455; GCN-NEXT: v_lshrrev_b16_e64 v3, 7, s6 1456; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1457; GCN-NEXT: s_cmpk_lg_i32 s0, 0x46 1458; GCN-NEXT: v_lshrrev_b16_e64 v18, 6, s6 1459; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v3, vcc 1460; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1461; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1462; GCN-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1463; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1464; GCN-NEXT: s_cmpk_lg_i32 s0, 0x45 1465; GCN-NEXT: v_or_b32_e32 v3, v18, v3 1466; GCN-NEXT: v_lshrrev_b16_e64 v18, 5, s6 1467; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1468; GCN-NEXT: s_cmpk_lg_i32 s0, 0x44 1469; GCN-NEXT: v_lshrrev_b16_e64 v19, 4, s6 1470; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1471; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1472; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1473; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1474; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1475; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1476; GCN-NEXT: v_lshlrev_b16_e32 v3, 2, v3 1477; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1478; GCN-NEXT: s_cmpk_lg_i32 s0, 0x43 1479; GCN-NEXT: v_or_b32_e32 v18, v18, v3 1480; GCN-NEXT: v_lshrrev_b16_e64 v3, 3, s6 1481; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1482; GCN-NEXT: s_cmpk_lg_i32 s0, 0x42 1483; GCN-NEXT: v_lshrrev_b16_e64 v19, 2, s6 1484; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v3, vcc 1485; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1486; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1487; GCN-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1488; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1489; GCN-NEXT: s_cmpk_lg_i32 s0, 0x41 1490; GCN-NEXT: v_or_b32_e32 v3, v19, v3 1491; GCN-NEXT: v_lshrrev_b16_e64 v19, 1, s6 1492; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1493; GCN-NEXT: s_cmp_lg_u32 s0, 64 1494; GCN-NEXT: v_mov_b32_e32 v2, s6 1495; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1496; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1497; GCN-NEXT: v_cndmask_b32_e32 v2, 1, v2, vcc 1498; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 1499; GCN-NEXT: v_and_b32_e32 v2, 1, v2 1500; GCN-NEXT: v_or_b32_e32 v2, v2, v19 1501; GCN-NEXT: v_lshlrev_b16_e32 v3, 2, v3 1502; GCN-NEXT: v_and_b32_e32 v2, 3, v2 1503; GCN-NEXT: v_or_b32_e32 v2, v2, v3 1504; GCN-NEXT: v_or_b32_sdwa v3, v15, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1505; GCN-NEXT: v_lshlrev_b16_e32 v14, 4, v18 1506; GCN-NEXT: v_and_b32_e32 v2, 15, v2 1507; GCN-NEXT: s_cmp_lg_u32 s0, 55 1508; GCN-NEXT: v_or_b32_e32 v2, v2, v14 1509; GCN-NEXT: v_mov_b32_e32 v14, s24 1510; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1511; GCN-NEXT: s_cmp_lg_u32 s0, 54 1512; GCN-NEXT: v_cndmask_b32_e32 v14, 1, v14, vcc 1513; GCN-NEXT: v_mov_b32_e32 v15, s23 1514; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1515; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1516; GCN-NEXT: v_lshlrev_b16_e32 v14, 1, v14 1517; GCN-NEXT: v_and_b32_e32 v15, 1, v15 1518; GCN-NEXT: s_cmp_lg_u32 s0, 53 1519; GCN-NEXT: v_or_b32_sdwa v2, v2, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1520; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1521; GCN-NEXT: v_mov_b32_e32 v15, s22 1522; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1523; GCN-NEXT: s_cmp_lg_u32 s0, 52 1524; GCN-NEXT: v_or_b32_sdwa v2, v2, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1525; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1526; GCN-NEXT: v_mov_b32_e32 v16, s21 1527; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1528; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1529; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1530; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1531; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1532; GCN-NEXT: v_lshlrev_b16_e32 v14, 2, v14 1533; GCN-NEXT: v_and_b32_e32 v15, 3, v15 1534; GCN-NEXT: s_cmp_lg_u32 s0, 51 1535; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1536; GCN-NEXT: v_mov_b32_e32 v15, s20 1537; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1538; GCN-NEXT: s_cmp_lg_u32 s0, 50 1539; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1540; GCN-NEXT: v_mov_b32_e32 v16, s19 1541; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1542; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1543; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1544; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1545; GCN-NEXT: s_cmp_lg_u32 s0, 49 1546; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1547; GCN-NEXT: v_mov_b32_e32 v16, s18 1548; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1549; GCN-NEXT: s_cmp_lg_u32 s0, 48 1550; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1551; GCN-NEXT: v_mov_b32_e32 v17, s17 1552; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1553; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1554; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1555; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1556; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1557; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1558; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1559; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1560; GCN-NEXT: v_lshlrev_b16_e32 v14, 4, v14 1561; GCN-NEXT: v_and_b32_e32 v15, 15, v15 1562; GCN-NEXT: s_cmp_lg_u32 s0, 63 1563; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1564; GCN-NEXT: v_lshrrev_b16_e64 v15, 7, s16 1565; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1566; GCN-NEXT: s_cmp_lg_u32 s0, 62 1567; GCN-NEXT: v_lshrrev_b16_e64 v16, 6, s16 1568; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1569; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1570; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1571; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1572; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1573; GCN-NEXT: s_cmp_lg_u32 s0, 61 1574; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1575; GCN-NEXT: v_lshrrev_b16_e64 v16, 5, s16 1576; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1577; GCN-NEXT: s_cmp_lg_u32 s0, 60 1578; GCN-NEXT: v_lshrrev_b16_e64 v17, 4, s16 1579; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1580; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1581; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1582; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1583; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1584; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1585; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1586; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1587; GCN-NEXT: s_cmp_lg_u32 s0, 59 1588; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1589; GCN-NEXT: v_lshrrev_b16_e64 v16, 3, s16 1590; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1591; GCN-NEXT: s_cmp_lg_u32 s0, 58 1592; GCN-NEXT: v_lshrrev_b16_e64 v17, 2, s16 1593; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1594; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1595; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1596; GCN-NEXT: s_cmp_lg_u32 s0, 56 1597; GCN-NEXT: v_mov_b32_e32 v13, s16 1598; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1599; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1600; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1601; GCN-NEXT: s_cmp_lg_u32 s0, 57 1602; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1603; GCN-NEXT: v_lshrrev_b16_e64 v17, 1, s16 1604; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1605; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1606; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1607; GCN-NEXT: v_and_b32_e32 v13, 1, v13 1608; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1609; GCN-NEXT: v_or_b32_e32 v13, v13, v17 1610; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 1611; GCN-NEXT: v_and_b32_e32 v13, 3, v13 1612; GCN-NEXT: v_or_b32_e32 v13, v13, v16 1613; GCN-NEXT: v_lshlrev_b16_e32 v15, 4, v15 1614; GCN-NEXT: v_and_b32_e32 v13, 15, v13 1615; GCN-NEXT: v_or_b32_sdwa v13, v13, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1616; GCN-NEXT: s_cmp_lg_u32 s0, 47 1617; GCN-NEXT: v_or_b32_sdwa v14, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1618; GCN-NEXT: v_lshrrev_b16_e64 v13, 15, s5 1619; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1620; GCN-NEXT: s_cmp_lg_u32 s0, 46 1621; GCN-NEXT: v_lshrrev_b16_e64 v15, 14, s5 1622; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1623; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1624; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1625; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 1626; GCN-NEXT: v_and_b32_e32 v15, 1, v15 1627; GCN-NEXT: s_cmp_lg_u32 s0, 45 1628; GCN-NEXT: v_or_b32_e32 v13, v15, v13 1629; GCN-NEXT: v_lshrrev_b16_e64 v15, 13, s5 1630; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1631; GCN-NEXT: s_cmp_lg_u32 s0, 44 1632; GCN-NEXT: v_lshrrev_b16_e64 v16, 12, s5 1633; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1634; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1635; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1636; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1637; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1638; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1639; GCN-NEXT: v_lshlrev_b16_e32 v13, 2, v13 1640; GCN-NEXT: v_and_b32_e32 v15, 3, v15 1641; GCN-NEXT: s_cmp_lg_u32 s0, 43 1642; GCN-NEXT: v_or_b32_e32 v13, v15, v13 1643; GCN-NEXT: v_lshrrev_b16_e64 v15, 11, s5 1644; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1645; GCN-NEXT: s_cmp_lg_u32 s0, 42 1646; GCN-NEXT: v_lshrrev_b16_e64 v16, 10, s5 1647; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1648; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1649; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1650; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1651; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1652; GCN-NEXT: s_cmp_lg_u32 s0, 41 1653; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1654; GCN-NEXT: v_lshrrev_b16_e64 v16, 9, s5 1655; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1656; GCN-NEXT: s_cmp_lg_u32 s0, 40 1657; GCN-NEXT: v_lshrrev_b16_e64 v17, 8, s5 1658; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1659; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1660; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1661; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1662; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1663; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1664; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1665; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1666; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1667; GCN-NEXT: v_lshlrev_b16_e32 v13, 4, v13 1668; GCN-NEXT: v_and_b32_e32 v15, 15, v15 1669; GCN-NEXT: s_cmp_lg_u32 s0, 39 1670; GCN-NEXT: v_or_b32_sdwa v15, v15, v13 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1671; GCN-NEXT: v_lshrrev_b16_e64 v13, 7, s5 1672; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1673; GCN-NEXT: s_cmp_lg_u32 s0, 38 1674; GCN-NEXT: v_lshrrev_b16_e64 v16, 6, s5 1675; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1676; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1677; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1678; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 1679; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1680; GCN-NEXT: s_cmp_lg_u32 s0, 37 1681; GCN-NEXT: v_or_b32_e32 v13, v16, v13 1682; GCN-NEXT: v_lshrrev_b16_e64 v16, 5, s5 1683; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1684; GCN-NEXT: s_cmp_lg_u32 s0, 36 1685; GCN-NEXT: v_lshrrev_b16_e64 v17, 4, s5 1686; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1687; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1688; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1689; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1690; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1691; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1692; GCN-NEXT: v_lshlrev_b16_e32 v13, 2, v13 1693; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1694; GCN-NEXT: s_cmp_lg_u32 s0, 35 1695; GCN-NEXT: v_or_b32_e32 v16, v16, v13 1696; GCN-NEXT: v_lshrrev_b16_e64 v13, 3, s5 1697; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1698; GCN-NEXT: s_cmp_lg_u32 s0, 34 1699; GCN-NEXT: v_lshrrev_b16_e64 v17, 2, s5 1700; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1701; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1702; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1703; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 1704; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1705; GCN-NEXT: s_cmp_lg_u32 s0, 33 1706; GCN-NEXT: v_or_b32_e32 v17, v17, v13 1707; GCN-NEXT: v_lshrrev_b16_e64 v13, 1, s5 1708; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1709; GCN-NEXT: s_cmp_lg_u32 s0, 32 1710; GCN-NEXT: v_mov_b32_e32 v1, s5 1711; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1712; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1713; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 1714; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 1715; GCN-NEXT: v_and_b32_e32 v1, 1, v1 1716; GCN-NEXT: v_or_b32_e32 v1, v1, v13 1717; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1718; GCN-NEXT: v_and_b32_e32 v1, 3, v1 1719; GCN-NEXT: v_or_b32_e32 v1, v1, v17 1720; GCN-NEXT: v_lshlrev_b16_e32 v16, 4, v16 1721; GCN-NEXT: v_and_b32_e32 v1, 15, v1 1722; GCN-NEXT: v_or_b32_e32 v1, v1, v16 1723; GCN-NEXT: v_or_b32_sdwa v1, v1, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1724; GCN-NEXT: s_cmp_lg_u32 s0, 23 1725; GCN-NEXT: v_or_b32_sdwa v1, v1, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1726; GCN-NEXT: v_mov_b32_e32 v14, s15 1727; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1728; GCN-NEXT: s_cmp_lg_u32 s0, 22 1729; GCN-NEXT: v_cndmask_b32_e32 v14, 1, v14, vcc 1730; GCN-NEXT: v_mov_b32_e32 v15, s14 1731; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1732; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1733; GCN-NEXT: v_lshlrev_b16_e32 v14, 1, v14 1734; GCN-NEXT: v_and_b32_e32 v15, 1, v15 1735; GCN-NEXT: s_cmp_lg_u32 s0, 21 1736; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1737; GCN-NEXT: v_mov_b32_e32 v15, s13 1738; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1739; GCN-NEXT: s_cmp_lg_u32 s0, 20 1740; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1741; GCN-NEXT: v_mov_b32_e32 v16, s12 1742; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1743; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1744; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1745; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1746; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1747; GCN-NEXT: v_lshlrev_b16_e32 v14, 2, v14 1748; GCN-NEXT: v_and_b32_e32 v15, 3, v15 1749; GCN-NEXT: s_cmp_lg_u32 s0, 19 1750; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1751; GCN-NEXT: v_mov_b32_e32 v15, s11 1752; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1753; GCN-NEXT: s_cmp_lg_u32 s0, 18 1754; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1755; GCN-NEXT: v_mov_b32_e32 v16, s10 1756; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1757; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1758; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1759; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1760; GCN-NEXT: s_cmp_lg_u32 s0, 17 1761; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1762; GCN-NEXT: v_mov_b32_e32 v16, s9 1763; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1764; GCN-NEXT: s_cmp_lg_u32 s0, 16 1765; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1766; GCN-NEXT: v_mov_b32_e32 v18, s8 1767; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1768; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1769; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1770; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1771; GCN-NEXT: v_or_b32_e32 v16, v18, v16 1772; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1773; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1774; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1775; GCN-NEXT: v_lshlrev_b16_e32 v14, 4, v14 1776; GCN-NEXT: v_and_b32_e32 v15, 15, v15 1777; GCN-NEXT: s_cmp_lg_u32 s0, 31 1778; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1779; GCN-NEXT: v_lshrrev_b16_e64 v15, 7, s1 1780; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1781; GCN-NEXT: s_cmp_lg_u32 s0, 30 1782; GCN-NEXT: v_lshrrev_b16_e64 v16, 6, s1 1783; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1784; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1785; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1786; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1787; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1788; GCN-NEXT: s_cmp_lg_u32 s0, 29 1789; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1790; GCN-NEXT: v_lshrrev_b16_e64 v16, 5, s1 1791; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1792; GCN-NEXT: s_cmp_lg_u32 s0, 28 1793; GCN-NEXT: v_lshrrev_b16_e64 v18, 4, s1 1794; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1795; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1796; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1797; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1798; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1799; GCN-NEXT: v_or_b32_e32 v16, v18, v16 1800; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1801; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1802; GCN-NEXT: s_cmp_lg_u32 s0, 27 1803; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1804; GCN-NEXT: v_lshrrev_b16_e64 v16, 3, s1 1805; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1806; GCN-NEXT: s_cmp_lg_u32 s0, 26 1807; GCN-NEXT: v_lshrrev_b16_e64 v18, 2, s1 1808; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1809; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1810; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1811; GCN-NEXT: s_cmp_lg_u32 s0, 24 1812; GCN-NEXT: v_mov_b32_e32 v17, s1 1813; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1814; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1815; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1816; GCN-NEXT: s_cmp_lg_u32 s0, 25 1817; GCN-NEXT: v_or_b32_e32 v16, v18, v16 1818; GCN-NEXT: v_lshrrev_b16_e64 v18, 1, s1 1819; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1820; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1821; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1822; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1823; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1824; GCN-NEXT: v_or_b32_e32 v17, v17, v18 1825; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 1826; GCN-NEXT: v_and_b32_e32 v17, 3, v17 1827; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1828; GCN-NEXT: v_lshlrev_b16_e32 v15, 4, v15 1829; GCN-NEXT: v_and_b32_e32 v16, 15, v16 1830; GCN-NEXT: v_or_b32_sdwa v15, v16, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1831; GCN-NEXT: s_cmp_lg_u32 s0, 15 1832; GCN-NEXT: v_or_b32_sdwa v14, v14, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1833; GCN-NEXT: v_lshrrev_b16_e64 v15, 15, s4 1834; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1835; GCN-NEXT: s_cmp_lg_u32 s0, 14 1836; GCN-NEXT: v_lshrrev_b16_e64 v16, 14, s4 1837; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1838; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1839; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1840; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1841; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1842; GCN-NEXT: s_cmp_lg_u32 s0, 13 1843; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1844; GCN-NEXT: v_lshrrev_b16_e64 v16, 13, s4 1845; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1846; GCN-NEXT: s_cmp_lg_u32 s0, 12 1847; GCN-NEXT: v_lshrrev_b16_e64 v17, 12, s4 1848; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1849; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1850; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1851; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1852; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1853; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1854; GCN-NEXT: s_cmp_lg_u32 s0, 11 1855; GCN-NEXT: v_lshrrev_b16_e64 v18, 11, s4 1856; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1857; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1858; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1859; GCN-NEXT: s_cmp_lg_u32 s0, 10 1860; GCN-NEXT: v_lshrrev_b16_e64 v13, 10, s4 1861; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1862; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v18, vcc 1863; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1864; GCN-NEXT: s_cmp_lg_u32 s0, 9 1865; GCN-NEXT: v_lshrrev_b16_e64 v12, 9, s4 1866; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1867; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1868; GCN-NEXT: s_cmp_lg_u32 s0, 8 1869; GCN-NEXT: v_lshrrev_b16_e64 v11, 8, s4 1870; GCN-NEXT: v_cndmask_b32_e32 v12, 1, v12, vcc 1871; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1872; GCN-NEXT: s_cmp_lg_u32 s0, 7 1873; GCN-NEXT: v_lshrrev_b16_e64 v10, 7, s4 1874; GCN-NEXT: v_cndmask_b32_e32 v11, 1, v11, vcc 1875; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1876; GCN-NEXT: s_cmp_lg_u32 s0, 6 1877; GCN-NEXT: v_lshrrev_b16_e64 v9, 6, s4 1878; GCN-NEXT: v_cndmask_b32_e32 v10, 1, v10, vcc 1879; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1880; GCN-NEXT: s_cmp_lg_u32 s0, 5 1881; GCN-NEXT: v_lshrrev_b16_e64 v8, 5, s4 1882; GCN-NEXT: v_cndmask_b32_e32 v9, 1, v9, vcc 1883; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1884; GCN-NEXT: s_cmp_lg_u32 s0, 4 1885; GCN-NEXT: v_lshrrev_b16_e64 v7, 4, s4 1886; GCN-NEXT: v_cndmask_b32_e32 v8, 1, v8, vcc 1887; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1888; GCN-NEXT: s_cmp_lg_u32 s0, 3 1889; GCN-NEXT: v_lshrrev_b16_e64 v6, 3, s4 1890; GCN-NEXT: v_cndmask_b32_e32 v7, 1, v7, vcc 1891; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1892; GCN-NEXT: s_cmp_lg_u32 s0, 2 1893; GCN-NEXT: v_lshrrev_b16_e64 v5, 2, s4 1894; GCN-NEXT: v_cndmask_b32_e32 v6, 1, v6, vcc 1895; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1896; GCN-NEXT: s_cmp_lg_u32 s0, 1 1897; GCN-NEXT: v_lshrrev_b16_e64 v4, 1, s4 1898; GCN-NEXT: v_cndmask_b32_e32 v5, 1, v5, vcc 1899; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1900; GCN-NEXT: s_cmp_lg_u32 s0, 0 1901; GCN-NEXT: v_mov_b32_e32 v0, s4 1902; GCN-NEXT: v_cndmask_b32_e32 v4, 1, v4, vcc 1903; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1904; GCN-NEXT: v_cndmask_b32_e32 v0, 1, v0, vcc 1905; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1906; GCN-NEXT: v_and_b32_e32 v13, 1, v13 1907; GCN-NEXT: v_lshlrev_b16_e32 v12, 1, v12 1908; GCN-NEXT: v_and_b32_e32 v11, 1, v11 1909; GCN-NEXT: v_lshlrev_b16_e32 v10, 1, v10 1910; GCN-NEXT: v_and_b32_e32 v9, 1, v9 1911; GCN-NEXT: v_lshlrev_b16_e32 v8, 1, v8 1912; GCN-NEXT: v_and_b32_e32 v7, 1, v7 1913; GCN-NEXT: v_lshlrev_b16_e32 v6, 1, v6 1914; GCN-NEXT: v_and_b32_e32 v5, 1, v5 1915; GCN-NEXT: v_lshlrev_b16_e32 v4, 1, v4 1916; GCN-NEXT: v_and_b32_e32 v0, 1, v0 1917; GCN-NEXT: v_or_b32_e32 v13, v13, v16 1918; GCN-NEXT: v_or_b32_e32 v11, v11, v12 1919; GCN-NEXT: v_or_b32_e32 v9, v9, v10 1920; GCN-NEXT: v_or_b32_e32 v7, v7, v8 1921; GCN-NEXT: v_or_b32_e32 v5, v5, v6 1922; GCN-NEXT: v_or_b32_e32 v0, v0, v4 1923; GCN-NEXT: v_lshlrev_b16_e32 v13, 2, v13 1924; GCN-NEXT: v_and_b32_e32 v11, 3, v11 1925; GCN-NEXT: v_lshlrev_b16_e32 v9, 2, v9 1926; GCN-NEXT: v_and_b32_e32 v7, 3, v7 1927; GCN-NEXT: v_lshlrev_b16_e32 v5, 2, v5 1928; GCN-NEXT: v_and_b32_e32 v0, 3, v0 1929; GCN-NEXT: v_or_b32_e32 v11, v11, v13 1930; GCN-NEXT: v_or_b32_e32 v7, v7, v9 1931; GCN-NEXT: v_or_b32_e32 v0, v0, v5 1932; GCN-NEXT: v_lshlrev_b16_e32 v15, 4, v15 1933; GCN-NEXT: v_and_b32_e32 v11, 15, v11 1934; GCN-NEXT: v_lshlrev_b16_e32 v7, 4, v7 1935; GCN-NEXT: v_and_b32_e32 v0, 15, v0 1936; GCN-NEXT: v_or_b32_sdwa v11, v11, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1937; GCN-NEXT: v_or_b32_e32 v0, v0, v7 1938; GCN-NEXT: v_or_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1939; GCN-NEXT: v_mov_b32_e32 v5, s3 1940; GCN-NEXT: v_or_b32_sdwa v0, v0, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1941; GCN-NEXT: v_mov_b32_e32 v4, s2 1942; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1943; GCN-NEXT: s_endpgm 1944entry: 1945 %v = insertelement <128 x i1> %vec, i1 1, i32 %sel 1946 store <128 x i1> %v, <128 x i1> addrspace(1)* %out 1947 ret void 1948} 1949 1950define amdgpu_ps <32 x float> @float32_inselt_vec(<32 x float> %vec, i32 %sel) { 1951; GCN-LABEL: float32_inselt_vec: 1952; GCN: ; %bb.0: ; %entry 1953; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v32 1954; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 2, v32 1955; GCN-NEXT: v_cmp_ne_u32_e64 s[2:3], 3, v32 1956; GCN-NEXT: v_cmp_ne_u32_e64 s[4:5], 4, v32 1957; GCN-NEXT: v_cmp_ne_u32_e64 s[6:7], 5, v32 1958; GCN-NEXT: v_cmp_ne_u32_e64 s[8:9], 6, v32 1959; GCN-NEXT: v_cmp_ne_u32_e64 s[10:11], 7, v32 1960; GCN-NEXT: v_cmp_ne_u32_e64 s[12:13], 8, v32 1961; GCN-NEXT: v_cmp_ne_u32_e64 s[14:15], 9, v32 1962; GCN-NEXT: v_cmp_ne_u32_e64 s[16:17], 10, v32 1963; GCN-NEXT: v_cmp_ne_u32_e64 s[18:19], 11, v32 1964; GCN-NEXT: v_cmp_ne_u32_e64 s[20:21], 12, v32 1965; GCN-NEXT: v_cmp_ne_u32_e64 s[22:23], 13, v32 1966; GCN-NEXT: v_cmp_ne_u32_e64 s[24:25], 14, v32 1967; GCN-NEXT: v_cmp_ne_u32_e64 s[26:27], 15, v32 1968; GCN-NEXT: v_cmp_ne_u32_e64 s[28:29], 16, v32 1969; GCN-NEXT: v_cmp_ne_u32_e64 s[30:31], 17, v32 1970; GCN-NEXT: v_cmp_ne_u32_e64 s[34:35], 18, v32 1971; GCN-NEXT: v_cmp_ne_u32_e64 s[36:37], 19, v32 1972; GCN-NEXT: v_cmp_ne_u32_e64 s[38:39], 20, v32 1973; GCN-NEXT: v_cmp_ne_u32_e64 s[40:41], 21, v32 1974; GCN-NEXT: v_cmp_ne_u32_e64 s[42:43], 22, v32 1975; GCN-NEXT: v_cmp_ne_u32_e64 s[44:45], 23, v32 1976; GCN-NEXT: v_cmp_ne_u32_e64 s[46:47], 24, v32 1977; GCN-NEXT: v_cmp_ne_u32_e64 s[48:49], 25, v32 1978; GCN-NEXT: v_cmp_ne_u32_e64 s[50:51], 26, v32 1979; GCN-NEXT: v_cmp_ne_u32_e64 s[52:53], 27, v32 1980; GCN-NEXT: v_cmp_ne_u32_e64 s[54:55], 28, v32 1981; GCN-NEXT: v_cmp_ne_u32_e64 s[56:57], 29, v32 1982; GCN-NEXT: v_cmp_ne_u32_e64 s[58:59], 30, v32 1983; GCN-NEXT: v_cmp_ne_u32_e64 s[60:61], 31, v32 1984; GCN-NEXT: v_cmp_ne_u32_e64 s[62:63], 0, v32 1985; GCN-NEXT: v_cndmask_b32_e64 v0, 1.0, v0, s[62:63] 1986; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc 1987; GCN-NEXT: v_cndmask_b32_e64 v2, 1.0, v2, s[0:1] 1988; GCN-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[2:3] 1989; GCN-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[4:5] 1990; GCN-NEXT: v_cndmask_b32_e64 v5, 1.0, v5, s[6:7] 1991; GCN-NEXT: v_cndmask_b32_e64 v6, 1.0, v6, s[8:9] 1992; GCN-NEXT: v_cndmask_b32_e64 v7, 1.0, v7, s[10:11] 1993; GCN-NEXT: v_cndmask_b32_e64 v8, 1.0, v8, s[12:13] 1994; GCN-NEXT: v_cndmask_b32_e64 v9, 1.0, v9, s[14:15] 1995; GCN-NEXT: v_cndmask_b32_e64 v10, 1.0, v10, s[16:17] 1996; GCN-NEXT: v_cndmask_b32_e64 v11, 1.0, v11, s[18:19] 1997; GCN-NEXT: v_cndmask_b32_e64 v12, 1.0, v12, s[20:21] 1998; GCN-NEXT: v_cndmask_b32_e64 v13, 1.0, v13, s[22:23] 1999; GCN-NEXT: v_cndmask_b32_e64 v14, 1.0, v14, s[24:25] 2000; GCN-NEXT: v_cndmask_b32_e64 v15, 1.0, v15, s[26:27] 2001; GCN-NEXT: v_cndmask_b32_e64 v16, 1.0, v16, s[28:29] 2002; GCN-NEXT: v_cndmask_b32_e64 v17, 1.0, v17, s[30:31] 2003; GCN-NEXT: v_cndmask_b32_e64 v18, 1.0, v18, s[34:35] 2004; GCN-NEXT: v_cndmask_b32_e64 v19, 1.0, v19, s[36:37] 2005; GCN-NEXT: v_cndmask_b32_e64 v20, 1.0, v20, s[38:39] 2006; GCN-NEXT: v_cndmask_b32_e64 v21, 1.0, v21, s[40:41] 2007; GCN-NEXT: v_cndmask_b32_e64 v22, 1.0, v22, s[42:43] 2008; GCN-NEXT: v_cndmask_b32_e64 v23, 1.0, v23, s[44:45] 2009; GCN-NEXT: v_cndmask_b32_e64 v24, 1.0, v24, s[46:47] 2010; GCN-NEXT: v_cndmask_b32_e64 v25, 1.0, v25, s[48:49] 2011; GCN-NEXT: v_cndmask_b32_e64 v26, 1.0, v26, s[50:51] 2012; GCN-NEXT: v_cndmask_b32_e64 v27, 1.0, v27, s[52:53] 2013; GCN-NEXT: v_cndmask_b32_e64 v28, 1.0, v28, s[54:55] 2014; GCN-NEXT: v_cndmask_b32_e64 v29, 1.0, v29, s[56:57] 2015; GCN-NEXT: v_cndmask_b32_e64 v30, 1.0, v30, s[58:59] 2016; GCN-NEXT: v_cndmask_b32_e64 v31, 1.0, v31, s[60:61] 2017; GCN-NEXT: ; return to shader part epilog 2018entry: 2019 %v = insertelement <32 x float> %vec, float 1.000000e+00, i32 %sel 2020 ret <32 x float> %v 2021} 2022 2023define <8 x double> @double8_inselt_vec(<8 x double> %vec, i32 %sel) { 2024; GCN-LABEL: double8_inselt_vec: 2025; GCN: ; %bb.0: ; %entry 2026; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2027; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 2028; GCN-NEXT: v_mov_b32_e32 v17, 0x3ff00000 2029; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 2030; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc 2031; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 2032; GCN-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc 2033; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc 2034; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 2035; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc 2036; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v17, vcc 2037; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 2038; GCN-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc 2039; GCN-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc 2040; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 2041; GCN-NEXT: v_cndmask_b32_e64 v8, v8, 0, vcc 2042; GCN-NEXT: v_cndmask_b32_e32 v9, v9, v17, vcc 2043; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 2044; GCN-NEXT: v_cndmask_b32_e64 v10, v10, 0, vcc 2045; GCN-NEXT: v_cndmask_b32_e32 v11, v11, v17, vcc 2046; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 2047; GCN-NEXT: v_cndmask_b32_e64 v12, v12, 0, vcc 2048; GCN-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc 2049; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 2050; GCN-NEXT: v_cndmask_b32_e64 v14, v14, 0, vcc 2051; GCN-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc 2052; GCN-NEXT: s_setpc_b64 s[30:31] 2053entry: 2054 %v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel 2055 ret <8 x double> %v 2056} 2057