1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s 3 4define amdgpu_kernel void @float4_inselt(<4 x float> addrspace(1)* %out, <4 x float> %vec, i32 %sel) { 5; GCN-LABEL: float4_inselt: 6; GCN: ; %bb.0: ; %entry 7; GCN-NEXT: s_load_dword s2, s[0:1], 0x44 8; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 9; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 10; GCN-NEXT: s_waitcnt lgkmcnt(0) 11; GCN-NEXT: s_cmp_lg_u32 s2, 3 12; GCN-NEXT: v_mov_b32_e32 v0, s7 13; GCN-NEXT: s_cselect_b64 vcc, -1, 0 14; GCN-NEXT: s_cmp_lg_u32 s2, 2 15; GCN-NEXT: v_cndmask_b32_e32 v3, 1.0, v0, vcc 16; GCN-NEXT: v_mov_b32_e32 v0, s6 17; GCN-NEXT: s_cselect_b64 vcc, -1, 0 18; GCN-NEXT: s_cmp_lg_u32 s2, 1 19; GCN-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc 20; GCN-NEXT: v_mov_b32_e32 v0, s5 21; GCN-NEXT: s_cselect_b64 vcc, -1, 0 22; GCN-NEXT: s_cmp_lg_u32 s2, 0 23; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v0, vcc 24; GCN-NEXT: v_mov_b32_e32 v0, s4 25; GCN-NEXT: s_cselect_b64 vcc, -1, 0 26; GCN-NEXT: v_mov_b32_e32 v5, s1 27; GCN-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc 28; GCN-NEXT: v_mov_b32_e32 v4, s0 29; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 30; GCN-NEXT: s_endpgm 31entry: 32 %v = insertelement <4 x float> %vec, float 1.000000e+00, i32 %sel 33 store <4 x float> %v, <4 x float> addrspace(1)* %out 34 ret void 35} 36 37define amdgpu_kernel void @float4_inselt_undef(<4 x float> addrspace(1)* %out, i32 %sel) { 38; GCN-LABEL: float4_inselt_undef: 39; GCN: ; %bb.0: ; %entry 40; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 41; GCN-NEXT: v_mov_b32_e32 v0, 1.0 42; GCN-NEXT: v_mov_b32_e32 v1, v0 43; GCN-NEXT: v_mov_b32_e32 v2, v0 44; GCN-NEXT: v_mov_b32_e32 v3, v0 45; GCN-NEXT: s_waitcnt lgkmcnt(0) 46; GCN-NEXT: v_mov_b32_e32 v5, s1 47; GCN-NEXT: v_mov_b32_e32 v4, s0 48; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 49; GCN-NEXT: s_endpgm 50entry: 51 %v = insertelement <4 x float> undef, float 1.000000e+00, i32 %sel 52 store <4 x float> %v, <4 x float> addrspace(1)* %out 53 ret void 54} 55 56define amdgpu_kernel void @int4_inselt(<4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %sel) { 57; GCN-LABEL: int4_inselt: 58; GCN: ; %bb.0: ; %entry 59; GCN-NEXT: s_load_dword s2, s[0:1], 0x44 60; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 61; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 62; GCN-NEXT: s_waitcnt lgkmcnt(0) 63; GCN-NEXT: s_cmp_lg_u32 s2, 3 64; GCN-NEXT: s_cselect_b32 s3, s7, 1 65; GCN-NEXT: s_cmp_lg_u32 s2, 2 66; GCN-NEXT: s_cselect_b32 s6, s6, 1 67; GCN-NEXT: s_cmp_lg_u32 s2, 1 68; GCN-NEXT: s_cselect_b32 s5, s5, 1 69; GCN-NEXT: s_cmp_lg_u32 s2, 0 70; GCN-NEXT: s_cselect_b32 s2, s4, 1 71; GCN-NEXT: v_mov_b32_e32 v5, s1 72; GCN-NEXT: v_mov_b32_e32 v0, s2 73; GCN-NEXT: v_mov_b32_e32 v1, s5 74; GCN-NEXT: v_mov_b32_e32 v2, s6 75; GCN-NEXT: v_mov_b32_e32 v3, s3 76; GCN-NEXT: v_mov_b32_e32 v4, s0 77; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 78; GCN-NEXT: s_endpgm 79entry: 80 %v = insertelement <4 x i32> %vec, i32 1, i32 %sel 81 store <4 x i32> %v, <4 x i32> addrspace(1)* %out 82 ret void 83} 84 85define amdgpu_kernel void @float2_inselt(<2 x float> addrspace(1)* %out, <2 x float> %vec, i32 %sel) { 86; GCN-LABEL: float2_inselt: 87; GCN: ; %bb.0: ; %entry 88; GCN-NEXT: s_load_dword s4, s[0:1], 0x34 89; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 90; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 91; GCN-NEXT: s_waitcnt lgkmcnt(0) 92; GCN-NEXT: s_cmp_lg_u32 s4, 1 93; GCN-NEXT: v_mov_b32_e32 v0, s3 94; GCN-NEXT: s_cselect_b64 vcc, -1, 0 95; GCN-NEXT: s_cmp_lg_u32 s4, 0 96; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v0, vcc 97; GCN-NEXT: v_mov_b32_e32 v0, s2 98; GCN-NEXT: s_cselect_b64 vcc, -1, 0 99; GCN-NEXT: v_mov_b32_e32 v3, s1 100; GCN-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc 101; GCN-NEXT: v_mov_b32_e32 v2, s0 102; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 103; GCN-NEXT: s_endpgm 104entry: 105 %v = insertelement <2 x float> %vec, float 1.000000e+00, i32 %sel 106 store <2 x float> %v, <2 x float> addrspace(1)* %out 107 ret void 108} 109 110define amdgpu_kernel void @float8_inselt(<8 x float> addrspace(1)* %out, <8 x float> %vec, i32 %sel) { 111; GCN-LABEL: float8_inselt: 112; GCN: ; %bb.0: ; %entry 113; GCN-NEXT: s_load_dword s2, s[0:1], 0x64 114; GCN-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x44 115; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 116; GCN-NEXT: s_waitcnt lgkmcnt(0) 117; GCN-NEXT: s_cmp_lg_u32 s2, 3 118; GCN-NEXT: v_mov_b32_e32 v0, s7 119; GCN-NEXT: s_cselect_b64 vcc, -1, 0 120; GCN-NEXT: s_cmp_lg_u32 s2, 2 121; GCN-NEXT: v_cndmask_b32_e32 v3, 1.0, v0, vcc 122; GCN-NEXT: v_mov_b32_e32 v0, s6 123; GCN-NEXT: s_cselect_b64 vcc, -1, 0 124; GCN-NEXT: s_cmp_lg_u32 s2, 1 125; GCN-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc 126; GCN-NEXT: v_mov_b32_e32 v0, s5 127; GCN-NEXT: s_cselect_b64 vcc, -1, 0 128; GCN-NEXT: s_cmp_lg_u32 s2, 0 129; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v0, vcc 130; GCN-NEXT: v_mov_b32_e32 v0, s4 131; GCN-NEXT: s_cselect_b64 vcc, -1, 0 132; GCN-NEXT: s_cmp_lg_u32 s2, 7 133; GCN-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc 134; GCN-NEXT: v_mov_b32_e32 v4, s11 135; GCN-NEXT: s_cselect_b64 vcc, -1, 0 136; GCN-NEXT: s_cmp_lg_u32 s2, 6 137; GCN-NEXT: v_cndmask_b32_e32 v7, 1.0, v4, vcc 138; GCN-NEXT: v_mov_b32_e32 v4, s10 139; GCN-NEXT: s_cselect_b64 vcc, -1, 0 140; GCN-NEXT: s_cmp_lg_u32 s2, 5 141; GCN-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc 142; GCN-NEXT: v_mov_b32_e32 v4, s9 143; GCN-NEXT: s_cselect_b64 vcc, -1, 0 144; GCN-NEXT: s_cmp_lg_u32 s2, 4 145; GCN-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc 146; GCN-NEXT: s_cselect_b64 vcc, -1, 0 147; GCN-NEXT: s_add_u32 s2, s0, 16 148; GCN-NEXT: s_addc_u32 s3, s1, 0 149; GCN-NEXT: v_mov_b32_e32 v4, s8 150; GCN-NEXT: v_mov_b32_e32 v9, s3 151; GCN-NEXT: v_cndmask_b32_e32 v4, 1.0, v4, vcc 152; GCN-NEXT: v_mov_b32_e32 v8, s2 153; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 154; GCN-NEXT: s_nop 0 155; GCN-NEXT: v_mov_b32_e32 v5, s1 156; GCN-NEXT: v_mov_b32_e32 v4, s0 157; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 158; GCN-NEXT: s_endpgm 159entry: 160 %v = insertelement <8 x float> %vec, float 1.000000e+00, i32 %sel 161 store <8 x float> %v, <8 x float> addrspace(1)* %out 162 ret void 163} 164 165define amdgpu_kernel void @float16_inselt(<16 x float> addrspace(1)* %out, <16 x float> %vec, i32 %sel) { 166; GCN-LABEL: float16_inselt: 167; GCN: ; %bb.0: ; %entry 168; GCN-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 169; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 170; GCN-NEXT: s_load_dword s20, s[0:1], 0xa4 171; GCN-NEXT: s_waitcnt lgkmcnt(0) 172; GCN-NEXT: v_mov_b32_e32 v0, s4 173; GCN-NEXT: s_add_u32 s0, s2, 48 174; GCN-NEXT: s_addc_u32 s1, s3, 0 175; GCN-NEXT: v_mov_b32_e32 v17, s1 176; GCN-NEXT: v_mov_b32_e32 v1, s5 177; GCN-NEXT: v_mov_b32_e32 v2, s6 178; GCN-NEXT: v_mov_b32_e32 v3, s7 179; GCN-NEXT: v_mov_b32_e32 v4, s8 180; GCN-NEXT: v_mov_b32_e32 v5, s9 181; GCN-NEXT: v_mov_b32_e32 v6, s10 182; GCN-NEXT: v_mov_b32_e32 v7, s11 183; GCN-NEXT: v_mov_b32_e32 v8, s12 184; GCN-NEXT: v_mov_b32_e32 v9, s13 185; GCN-NEXT: v_mov_b32_e32 v10, s14 186; GCN-NEXT: v_mov_b32_e32 v11, s15 187; GCN-NEXT: v_mov_b32_e32 v12, s16 188; GCN-NEXT: v_mov_b32_e32 v13, s17 189; GCN-NEXT: v_mov_b32_e32 v14, s18 190; GCN-NEXT: v_mov_b32_e32 v15, s19 191; GCN-NEXT: s_mov_b32 m0, s20 192; GCN-NEXT: v_mov_b32_e32 v16, s0 193; GCN-NEXT: s_add_u32 s0, s2, 32 194; GCN-NEXT: v_movreld_b32_e32 v0, 1.0 195; GCN-NEXT: s_addc_u32 s1, s3, 0 196; GCN-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 197; GCN-NEXT: s_nop 0 198; GCN-NEXT: v_mov_b32_e32 v13, s1 199; GCN-NEXT: v_mov_b32_e32 v12, s0 200; GCN-NEXT: s_add_u32 s0, s2, 16 201; GCN-NEXT: s_addc_u32 s1, s3, 0 202; GCN-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 203; GCN-NEXT: s_nop 0 204; GCN-NEXT: v_mov_b32_e32 v9, s1 205; GCN-NEXT: v_mov_b32_e32 v8, s0 206; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 207; GCN-NEXT: s_nop 0 208; GCN-NEXT: v_mov_b32_e32 v5, s3 209; GCN-NEXT: v_mov_b32_e32 v4, s2 210; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 211; GCN-NEXT: s_endpgm 212entry: 213 %v = insertelement <16 x float> %vec, float 1.000000e+00, i32 %sel 214 store <16 x float> %v, <16 x float> addrspace(1)* %out 215 ret void 216} 217 218define amdgpu_kernel void @float32_inselt(<32 x float> addrspace(1)* %out, <32 x float> %vec, i32 %sel) { 219; GCN-LABEL: float32_inselt: 220; GCN: ; %bb.0: ; %entry 221; GCN-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0xa4 222; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 223; GCN-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0xe4 224; GCN-NEXT: s_load_dword s0, s[0:1], 0x124 225; GCN-NEXT: s_waitcnt lgkmcnt(0) 226; GCN-NEXT: v_mov_b32_e32 v0, s36 227; GCN-NEXT: v_mov_b32_e32 v1, s37 228; GCN-NEXT: v_mov_b32_e32 v2, s38 229; GCN-NEXT: s_mov_b32 m0, s0 230; GCN-NEXT: s_add_u32 s0, s2, 0x70 231; GCN-NEXT: s_addc_u32 s1, s3, 0 232; GCN-NEXT: v_mov_b32_e32 v33, s1 233; GCN-NEXT: v_mov_b32_e32 v3, s39 234; GCN-NEXT: v_mov_b32_e32 v4, s40 235; GCN-NEXT: v_mov_b32_e32 v5, s41 236; GCN-NEXT: v_mov_b32_e32 v6, s42 237; GCN-NEXT: v_mov_b32_e32 v7, s43 238; GCN-NEXT: v_mov_b32_e32 v8, s44 239; GCN-NEXT: v_mov_b32_e32 v9, s45 240; GCN-NEXT: v_mov_b32_e32 v10, s46 241; GCN-NEXT: v_mov_b32_e32 v11, s47 242; GCN-NEXT: v_mov_b32_e32 v12, s48 243; GCN-NEXT: v_mov_b32_e32 v13, s49 244; GCN-NEXT: v_mov_b32_e32 v14, s50 245; GCN-NEXT: v_mov_b32_e32 v15, s51 246; GCN-NEXT: v_mov_b32_e32 v16, s4 247; GCN-NEXT: v_mov_b32_e32 v17, s5 248; GCN-NEXT: v_mov_b32_e32 v18, s6 249; GCN-NEXT: v_mov_b32_e32 v19, s7 250; GCN-NEXT: v_mov_b32_e32 v20, s8 251; GCN-NEXT: v_mov_b32_e32 v21, s9 252; GCN-NEXT: v_mov_b32_e32 v22, s10 253; GCN-NEXT: v_mov_b32_e32 v23, s11 254; GCN-NEXT: v_mov_b32_e32 v24, s12 255; GCN-NEXT: v_mov_b32_e32 v25, s13 256; GCN-NEXT: v_mov_b32_e32 v26, s14 257; GCN-NEXT: v_mov_b32_e32 v27, s15 258; GCN-NEXT: v_mov_b32_e32 v28, s16 259; GCN-NEXT: v_mov_b32_e32 v29, s17 260; GCN-NEXT: v_mov_b32_e32 v30, s18 261; GCN-NEXT: v_mov_b32_e32 v31, s19 262; GCN-NEXT: v_mov_b32_e32 v32, s0 263; GCN-NEXT: s_add_u32 s0, s2, 0x60 264; GCN-NEXT: v_movreld_b32_e32 v0, 1.0 265; GCN-NEXT: s_addc_u32 s1, s3, 0 266; GCN-NEXT: flat_store_dwordx4 v[32:33], v[28:31] 267; GCN-NEXT: s_nop 0 268; GCN-NEXT: v_mov_b32_e32 v29, s1 269; GCN-NEXT: v_mov_b32_e32 v28, s0 270; GCN-NEXT: s_add_u32 s0, s2, 0x50 271; GCN-NEXT: s_addc_u32 s1, s3, 0 272; GCN-NEXT: flat_store_dwordx4 v[28:29], v[24:27] 273; GCN-NEXT: s_nop 0 274; GCN-NEXT: v_mov_b32_e32 v25, s1 275; GCN-NEXT: v_mov_b32_e32 v24, s0 276; GCN-NEXT: s_add_u32 s0, s2, 64 277; GCN-NEXT: s_addc_u32 s1, s3, 0 278; GCN-NEXT: flat_store_dwordx4 v[24:25], v[20:23] 279; GCN-NEXT: s_nop 0 280; GCN-NEXT: v_mov_b32_e32 v21, s1 281; GCN-NEXT: v_mov_b32_e32 v20, s0 282; GCN-NEXT: s_add_u32 s0, s2, 48 283; GCN-NEXT: s_addc_u32 s1, s3, 0 284; GCN-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 285; GCN-NEXT: s_nop 0 286; GCN-NEXT: v_mov_b32_e32 v17, s1 287; GCN-NEXT: v_mov_b32_e32 v16, s0 288; GCN-NEXT: s_add_u32 s0, s2, 32 289; GCN-NEXT: s_addc_u32 s1, s3, 0 290; GCN-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 291; GCN-NEXT: s_nop 0 292; GCN-NEXT: v_mov_b32_e32 v13, s1 293; GCN-NEXT: v_mov_b32_e32 v12, s0 294; GCN-NEXT: s_add_u32 s0, s2, 16 295; GCN-NEXT: s_addc_u32 s1, s3, 0 296; GCN-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 297; GCN-NEXT: s_nop 0 298; GCN-NEXT: v_mov_b32_e32 v9, s1 299; GCN-NEXT: v_mov_b32_e32 v8, s0 300; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 301; GCN-NEXT: s_nop 0 302; GCN-NEXT: v_mov_b32_e32 v5, s3 303; GCN-NEXT: v_mov_b32_e32 v4, s2 304; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 305; GCN-NEXT: s_endpgm 306entry: 307 %v = insertelement <32 x float> %vec, float 1.000000e+00, i32 %sel 308 store <32 x float> %v, <32 x float> addrspace(1)* %out 309 ret void 310} 311 312define amdgpu_kernel void @half4_inselt(<4 x half> addrspace(1)* %out, <4 x half> %vec, i32 %sel) { 313; GCN-LABEL: half4_inselt: 314; GCN: ; %bb.0: ; %entry 315; GCN-NEXT: s_load_dword s6, s[0:1], 0x34 316; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 317; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c 318; GCN-NEXT: s_mov_b64 s[4:5], 0xffff 319; GCN-NEXT: s_waitcnt lgkmcnt(0) 320; GCN-NEXT: s_lshl_b32 s6, s6, 4 321; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], s6 322; GCN-NEXT: s_mov_b32 s6, 0x3c003c00 323; GCN-NEXT: s_mov_b32 s7, s6 324; GCN-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 325; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 326; GCN-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 327; GCN-NEXT: v_mov_b32_e32 v0, s2 328; GCN-NEXT: v_mov_b32_e32 v3, s1 329; GCN-NEXT: v_mov_b32_e32 v1, s3 330; GCN-NEXT: v_mov_b32_e32 v2, s0 331; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 332; GCN-NEXT: s_endpgm 333entry: 334 %v = insertelement <4 x half> %vec, half 1.000000e+00, i32 %sel 335 store <4 x half> %v, <4 x half> addrspace(1)* %out 336 ret void 337} 338 339define amdgpu_kernel void @half2_inselt(<2 x half> addrspace(1)* %out, <2 x half> %vec, i32 %sel) { 340; GCN-LABEL: half2_inselt: 341; GCN: ; %bb.0: ; %entry 342; GCN-NEXT: s_load_dword s2, s[0:1], 0x30 343; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c 344; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 345; GCN-NEXT: s_waitcnt lgkmcnt(0) 346; GCN-NEXT: s_lshl_b32 s2, s2, 4 347; GCN-NEXT: s_lshl_b32 s2, 0xffff, s2 348; GCN-NEXT: s_andn2_b32 s3, s3, s2 349; GCN-NEXT: s_and_b32 s2, s2, 0x3c003c00 350; GCN-NEXT: s_or_b32 s2, s2, s3 351; GCN-NEXT: v_mov_b32_e32 v0, s0 352; GCN-NEXT: v_mov_b32_e32 v1, s1 353; GCN-NEXT: v_mov_b32_e32 v2, s2 354; GCN-NEXT: flat_store_dword v[0:1], v2 355; GCN-NEXT: s_endpgm 356entry: 357 %v = insertelement <2 x half> %vec, half 1.000000e+00, i32 %sel 358 store <2 x half> %v, <2 x half> addrspace(1)* %out 359 ret void 360} 361 362define amdgpu_kernel void @half8_inselt(<8 x half> addrspace(1)* %out, <8 x half> %vec, i32 %sel) { 363; GCN-LABEL: half8_inselt: 364; GCN: ; %bb.0: ; %entry 365; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 366; GCN-NEXT: s_load_dword s2, s[0:1], 0x44 367; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 368; GCN-NEXT: v_mov_b32_e32 v0, 0x3c00 369; GCN-NEXT: s_waitcnt lgkmcnt(0) 370; GCN-NEXT: s_lshr_b32 s3, s7, 16 371; GCN-NEXT: s_cmp_lg_u32 s2, 7 372; GCN-NEXT: v_mov_b32_e32 v1, s3 373; GCN-NEXT: s_cselect_b64 vcc, -1, 0 374; GCN-NEXT: s_cmp_lg_u32 s2, 6 375; GCN-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 376; GCN-NEXT: v_mov_b32_e32 v2, s7 377; GCN-NEXT: s_cselect_b64 vcc, -1, 0 378; GCN-NEXT: s_lshr_b32 s3, s6, 16 379; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 380; GCN-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 381; GCN-NEXT: s_cmp_lg_u32 s2, 5 382; GCN-NEXT: v_or_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 383; GCN-NEXT: v_mov_b32_e32 v1, s3 384; GCN-NEXT: s_cselect_b64 vcc, -1, 0 385; GCN-NEXT: s_cmp_lg_u32 s2, 4 386; GCN-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 387; GCN-NEXT: v_mov_b32_e32 v2, s6 388; GCN-NEXT: s_cselect_b64 vcc, -1, 0 389; GCN-NEXT: s_lshr_b32 s3, s5, 16 390; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 391; GCN-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 392; GCN-NEXT: s_cmp_lg_u32 s2, 3 393; GCN-NEXT: v_or_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 394; GCN-NEXT: v_mov_b32_e32 v1, s3 395; GCN-NEXT: s_cselect_b64 vcc, -1, 0 396; GCN-NEXT: s_cmp_lg_u32 s2, 2 397; GCN-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 398; GCN-NEXT: v_mov_b32_e32 v4, s5 399; GCN-NEXT: s_cselect_b64 vcc, -1, 0 400; GCN-NEXT: s_lshr_b32 s3, s4, 16 401; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 402; GCN-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc 403; GCN-NEXT: s_cmp_lg_u32 s2, 1 404; GCN-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 405; GCN-NEXT: v_mov_b32_e32 v4, s3 406; GCN-NEXT: s_cselect_b64 vcc, -1, 0 407; GCN-NEXT: s_cmp_lg_u32 s2, 0 408; GCN-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc 409; GCN-NEXT: v_mov_b32_e32 v5, s4 410; GCN-NEXT: s_cselect_b64 vcc, -1, 0 411; GCN-NEXT: v_lshlrev_b32_e32 v4, 16, v4 412; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 413; GCN-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 414; GCN-NEXT: v_mov_b32_e32 v5, s1 415; GCN-NEXT: v_mov_b32_e32 v4, s0 416; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 417; GCN-NEXT: s_endpgm 418entry: 419 %v = insertelement <8 x half> %vec, half 1.000000e+00, i32 %sel 420 store <8 x half> %v, <8 x half> addrspace(1)* %out 421 ret void 422} 423 424define amdgpu_kernel void @short2_inselt(<2 x i16> addrspace(1)* %out, <2 x i16> %vec, i32 %sel) { 425; GCN-LABEL: short2_inselt: 426; GCN: ; %bb.0: ; %entry 427; GCN-NEXT: s_load_dword s2, s[0:1], 0x30 428; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c 429; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 430; GCN-NEXT: s_waitcnt lgkmcnt(0) 431; GCN-NEXT: s_lshl_b32 s2, s2, 4 432; GCN-NEXT: s_lshl_b32 s2, 0xffff, s2 433; GCN-NEXT: s_andn2_b32 s3, s3, s2 434; GCN-NEXT: s_and_b32 s2, s2, 0x10001 435; GCN-NEXT: s_or_b32 s2, s2, s3 436; GCN-NEXT: v_mov_b32_e32 v0, s0 437; GCN-NEXT: v_mov_b32_e32 v1, s1 438; GCN-NEXT: v_mov_b32_e32 v2, s2 439; GCN-NEXT: flat_store_dword v[0:1], v2 440; GCN-NEXT: s_endpgm 441entry: 442 %v = insertelement <2 x i16> %vec, i16 1, i32 %sel 443 store <2 x i16> %v, <2 x i16> addrspace(1)* %out 444 ret void 445} 446 447define amdgpu_kernel void @short4_inselt(<4 x i16> addrspace(1)* %out, <4 x i16> %vec, i32 %sel) { 448; GCN-LABEL: short4_inselt: 449; GCN: ; %bb.0: ; %entry 450; GCN-NEXT: s_load_dword s6, s[0:1], 0x34 451; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 452; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c 453; GCN-NEXT: s_mov_b64 s[4:5], 0xffff 454; GCN-NEXT: s_waitcnt lgkmcnt(0) 455; GCN-NEXT: s_lshl_b32 s6, s6, 4 456; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], s6 457; GCN-NEXT: s_mov_b32 s6, 0x10001 458; GCN-NEXT: s_mov_b32 s7, s6 459; GCN-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 460; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 461; GCN-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 462; GCN-NEXT: v_mov_b32_e32 v0, s2 463; GCN-NEXT: v_mov_b32_e32 v3, s1 464; GCN-NEXT: v_mov_b32_e32 v1, s3 465; GCN-NEXT: v_mov_b32_e32 v2, s0 466; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 467; GCN-NEXT: s_endpgm 468entry: 469 %v = insertelement <4 x i16> %vec, i16 1, i32 %sel 470 store <4 x i16> %v, <4 x i16> addrspace(1)* %out 471 ret void 472} 473 474define amdgpu_kernel void @byte8_inselt(<8 x i8> addrspace(1)* %out, <8 x i8> %vec, i32 %sel) { 475; GCN-LABEL: byte8_inselt: 476; GCN: ; %bb.0: ; %entry 477; GCN-NEXT: s_load_dword s6, s[0:1], 0x34 478; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 479; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c 480; GCN-NEXT: s_mov_b64 s[4:5], 0xffff 481; GCN-NEXT: s_waitcnt lgkmcnt(0) 482; GCN-NEXT: s_lshl_b32 s6, s6, 3 483; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], s6 484; GCN-NEXT: s_and_b32 s7, s5, 0x1010101 485; GCN-NEXT: s_and_b32 s6, s4, 0x1010101 486; GCN-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 487; GCN-NEXT: s_or_b64 s[0:1], s[6:7], s[0:1] 488; GCN-NEXT: v_mov_b32_e32 v2, s2 489; GCN-NEXT: v_mov_b32_e32 v0, s0 490; GCN-NEXT: v_mov_b32_e32 v1, s1 491; GCN-NEXT: v_mov_b32_e32 v3, s3 492; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 493; GCN-NEXT: s_endpgm 494entry: 495 %v = insertelement <8 x i8> %vec, i8 1, i32 %sel 496 store <8 x i8> %v, <8 x i8> addrspace(1)* %out 497 ret void 498} 499 500define amdgpu_kernel void @byte16_inselt(<16 x i8> addrspace(1)* %out, <16 x i8> %vec, i32 %sel) { 501; GCN-LABEL: byte16_inselt: 502; GCN: ; %bb.0: ; %entry 503; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 504; GCN-NEXT: s_load_dword s2, s[0:1], 0x44 505; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 506; GCN-NEXT: s_waitcnt lgkmcnt(0) 507; GCN-NEXT: s_lshr_b32 s3, s7, 24 508; GCN-NEXT: s_cmp_lg_u32 s2, 15 509; GCN-NEXT: v_mov_b32_e32 v0, s3 510; GCN-NEXT: s_cselect_b64 vcc, -1, 0 511; GCN-NEXT: s_lshr_b32 s3, s7, 16 512; GCN-NEXT: s_cmp_lg_u32 s2, 14 513; GCN-NEXT: v_cndmask_b32_e32 v0, 1, v0, vcc 514; GCN-NEXT: v_mov_b32_e32 v1, s3 515; GCN-NEXT: s_cselect_b64 vcc, -1, 0 516; GCN-NEXT: s_lshr_b32 s3, s7, 8 517; GCN-NEXT: v_lshlrev_b16_e32 v0, 8, v0 518; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 519; GCN-NEXT: s_cmp_lg_u32 s2, 13 520; GCN-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 521; GCN-NEXT: v_mov_b32_e32 v1, s3 522; GCN-NEXT: s_cselect_b64 vcc, -1, 0 523; GCN-NEXT: s_cmp_lg_u32 s2, 12 524; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 525; GCN-NEXT: v_mov_b32_e32 v2, s7 526; GCN-NEXT: s_cselect_b64 vcc, -1, 0 527; GCN-NEXT: v_lshlrev_b16_e32 v1, 8, v1 528; GCN-NEXT: v_cndmask_b32_e32 v2, 1, v2, vcc 529; GCN-NEXT: s_lshr_b32 s3, s6, 24 530; GCN-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 531; GCN-NEXT: s_cmp_lg_u32 s2, 11 532; GCN-NEXT: v_or_b32_sdwa v3, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 533; GCN-NEXT: v_mov_b32_e32 v0, s3 534; GCN-NEXT: s_cselect_b64 vcc, -1, 0 535; GCN-NEXT: s_lshr_b32 s3, s6, 16 536; GCN-NEXT: s_cmp_lg_u32 s2, 10 537; GCN-NEXT: v_cndmask_b32_e32 v0, 1, v0, vcc 538; GCN-NEXT: v_mov_b32_e32 v1, s3 539; GCN-NEXT: s_cselect_b64 vcc, -1, 0 540; GCN-NEXT: s_lshr_b32 s3, s6, 8 541; GCN-NEXT: v_lshlrev_b16_e32 v0, 8, v0 542; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 543; GCN-NEXT: s_cmp_lg_u32 s2, 9 544; GCN-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 545; GCN-NEXT: v_mov_b32_e32 v1, s3 546; GCN-NEXT: s_cselect_b64 vcc, -1, 0 547; GCN-NEXT: s_cmp_lg_u32 s2, 8 548; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 549; GCN-NEXT: v_mov_b32_e32 v2, s6 550; GCN-NEXT: s_cselect_b64 vcc, -1, 0 551; GCN-NEXT: v_lshlrev_b16_e32 v1, 8, v1 552; GCN-NEXT: v_cndmask_b32_e32 v2, 1, v2, vcc 553; GCN-NEXT: s_lshr_b32 s3, s5, 24 554; GCN-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 555; GCN-NEXT: s_cmp_lg_u32 s2, 7 556; GCN-NEXT: v_or_b32_sdwa v2, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 557; GCN-NEXT: v_mov_b32_e32 v0, s3 558; GCN-NEXT: s_cselect_b64 vcc, -1, 0 559; GCN-NEXT: s_lshr_b32 s3, s5, 16 560; GCN-NEXT: s_cmp_lg_u32 s2, 6 561; GCN-NEXT: v_cndmask_b32_e32 v0, 1, v0, vcc 562; GCN-NEXT: v_mov_b32_e32 v1, s3 563; GCN-NEXT: s_cselect_b64 vcc, -1, 0 564; GCN-NEXT: s_lshr_b32 s3, s5, 8 565; GCN-NEXT: v_lshlrev_b16_e32 v0, 8, v0 566; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 567; GCN-NEXT: s_cmp_lg_u32 s2, 5 568; GCN-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 569; GCN-NEXT: v_mov_b32_e32 v1, s3 570; GCN-NEXT: s_cselect_b64 vcc, -1, 0 571; GCN-NEXT: s_cmp_lg_u32 s2, 4 572; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 573; GCN-NEXT: v_mov_b32_e32 v4, s5 574; GCN-NEXT: s_cselect_b64 vcc, -1, 0 575; GCN-NEXT: v_lshlrev_b16_e32 v1, 8, v1 576; GCN-NEXT: v_cndmask_b32_e32 v4, 1, v4, vcc 577; GCN-NEXT: s_lshr_b32 s3, s4, 24 578; GCN-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 579; GCN-NEXT: s_cmp_lg_u32 s2, 3 580; GCN-NEXT: v_or_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 581; GCN-NEXT: v_mov_b32_e32 v0, s3 582; GCN-NEXT: s_cselect_b64 vcc, -1, 0 583; GCN-NEXT: s_lshr_b32 s3, s4, 16 584; GCN-NEXT: s_cmp_lg_u32 s2, 2 585; GCN-NEXT: v_cndmask_b32_e32 v0, 1, v0, vcc 586; GCN-NEXT: v_mov_b32_e32 v4, s3 587; GCN-NEXT: s_cselect_b64 vcc, -1, 0 588; GCN-NEXT: s_lshr_b32 s3, s4, 8 589; GCN-NEXT: v_lshlrev_b16_e32 v0, 8, v0 590; GCN-NEXT: v_cndmask_b32_e32 v4, 1, v4, vcc 591; GCN-NEXT: s_cmp_lg_u32 s2, 1 592; GCN-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 593; GCN-NEXT: v_mov_b32_e32 v4, s3 594; GCN-NEXT: s_cselect_b64 vcc, -1, 0 595; GCN-NEXT: s_cmp_lg_u32 s2, 0 596; GCN-NEXT: v_cndmask_b32_e32 v4, 1, v4, vcc 597; GCN-NEXT: v_mov_b32_e32 v5, s4 598; GCN-NEXT: s_cselect_b64 vcc, -1, 0 599; GCN-NEXT: v_lshlrev_b16_e32 v4, 8, v4 600; GCN-NEXT: v_cndmask_b32_e32 v5, 1, v5, vcc 601; GCN-NEXT: v_or_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 602; GCN-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 603; GCN-NEXT: v_mov_b32_e32 v5, s1 604; GCN-NEXT: v_mov_b32_e32 v4, s0 605; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 606; GCN-NEXT: s_endpgm 607entry: 608 %v = insertelement <16 x i8> %vec, i8 1, i32 %sel 609 store <16 x i8> %v, <16 x i8> addrspace(1)* %out 610 ret void 611} 612 613define amdgpu_kernel void @double2_inselt(<2 x double> addrspace(1)* %out, <2 x double> %vec, i32 %sel) { 614; GCN-LABEL: double2_inselt: 615; GCN: ; %bb.0: ; %entry 616; GCN-NEXT: s_load_dword s2, s[0:1], 0x44 617; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 618; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 619; GCN-NEXT: v_mov_b32_e32 v0, 0x3ff00000 620; GCN-NEXT: s_waitcnt lgkmcnt(0) 621; GCN-NEXT: s_cmp_eq_u32 s2, 1 622; GCN-NEXT: v_mov_b32_e32 v1, s7 623; GCN-NEXT: s_cselect_b64 vcc, -1, 0 624; GCN-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc 625; GCN-NEXT: v_mov_b32_e32 v1, s6 626; GCN-NEXT: s_cmp_eq_u32 s2, 0 627; GCN-NEXT: v_cndmask_b32_e64 v2, v1, 0, vcc 628; GCN-NEXT: v_mov_b32_e32 v1, s5 629; GCN-NEXT: s_cselect_b64 vcc, -1, 0 630; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 631; GCN-NEXT: v_mov_b32_e32 v0, s4 632; GCN-NEXT: v_mov_b32_e32 v5, s1 633; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 634; GCN-NEXT: v_mov_b32_e32 v4, s0 635; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 636; GCN-NEXT: s_endpgm 637entry: 638 %v = insertelement <2 x double> %vec, double 1.000000e+00, i32 %sel 639 store <2 x double> %v, <2 x double> addrspace(1)* %out 640 ret void 641} 642 643define amdgpu_kernel void @double5_inselt(<5 x double> addrspace(1)* %out, <5 x double> %vec, i32 %sel) { 644; GCN-LABEL: double5_inselt: 645; GCN: ; %bb.0: ; %entry 646; GCN-NEXT: s_load_dword s12, s[0:1], 0xa4 647; GCN-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x84 648; GCN-NEXT: s_load_dwordx2 s[10:11], s[0:1], 0x24 649; GCN-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x64 650; GCN-NEXT: v_mov_b32_e32 v4, 0x3ff00000 651; GCN-NEXT: s_waitcnt lgkmcnt(0) 652; GCN-NEXT: s_cmp_eq_u32 s12, 4 653; GCN-NEXT: v_mov_b32_e32 v0, s9 654; GCN-NEXT: s_cselect_b64 vcc, -1, 0 655; GCN-NEXT: v_cndmask_b32_e32 v9, v0, v4, vcc 656; GCN-NEXT: v_mov_b32_e32 v0, s8 657; GCN-NEXT: s_cmp_eq_u32 s12, 1 658; GCN-NEXT: v_cndmask_b32_e64 v8, v0, 0, vcc 659; GCN-NEXT: v_mov_b32_e32 v0, s3 660; GCN-NEXT: s_cselect_b64 vcc, -1, 0 661; GCN-NEXT: v_cndmask_b32_e32 v3, v0, v4, vcc 662; GCN-NEXT: v_mov_b32_e32 v0, s2 663; GCN-NEXT: s_cmp_eq_u32 s12, 0 664; GCN-NEXT: v_cndmask_b32_e64 v2, v0, 0, vcc 665; GCN-NEXT: v_mov_b32_e32 v0, s1 666; GCN-NEXT: s_cselect_b64 vcc, -1, 0 667; GCN-NEXT: v_cndmask_b32_e32 v1, v0, v4, vcc 668; GCN-NEXT: v_mov_b32_e32 v0, s0 669; GCN-NEXT: s_cmp_eq_u32 s12, 3 670; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 671; GCN-NEXT: v_mov_b32_e32 v5, s7 672; GCN-NEXT: s_cselect_b64 vcc, -1, 0 673; GCN-NEXT: v_cndmask_b32_e32 v7, v5, v4, vcc 674; GCN-NEXT: v_mov_b32_e32 v5, s6 675; GCN-NEXT: s_cmp_eq_u32 s12, 2 676; GCN-NEXT: v_cndmask_b32_e64 v6, v5, 0, vcc 677; GCN-NEXT: s_cselect_b64 vcc, -1, 0 678; GCN-NEXT: s_add_u32 s0, s10, 16 679; GCN-NEXT: v_mov_b32_e32 v5, s5 680; GCN-NEXT: s_addc_u32 s1, s11, 0 681; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc 682; GCN-NEXT: v_mov_b32_e32 v4, s4 683; GCN-NEXT: v_mov_b32_e32 v11, s1 684; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc 685; GCN-NEXT: v_mov_b32_e32 v10, s0 686; GCN-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 687; GCN-NEXT: s_add_u32 s0, s10, 32 688; GCN-NEXT: v_mov_b32_e32 v4, s10 689; GCN-NEXT: v_mov_b32_e32 v5, s11 690; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 691; GCN-NEXT: s_addc_u32 s1, s11, 0 692; GCN-NEXT: v_mov_b32_e32 v0, s0 693; GCN-NEXT: v_mov_b32_e32 v1, s1 694; GCN-NEXT: flat_store_dwordx2 v[0:1], v[8:9] 695; GCN-NEXT: s_endpgm 696entry: 697 %v = insertelement <5 x double> %vec, double 1.000000e+00, i32 %sel 698 store <5 x double> %v, <5 x double> addrspace(1)* %out 699 ret void 700} 701 702define amdgpu_kernel void @double8_inselt(<8 x double> addrspace(1)* %out, <8 x double> %vec, i32 %sel) { 703; GCN-LABEL: double8_inselt: 704; GCN: ; %bb.0: ; %entry 705; GCN-NEXT: s_load_dword s2, s[0:1], 0xa4 706; GCN-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 707; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 708; GCN-NEXT: v_mov_b32_e32 v16, 0x3ff00000 709; GCN-NEXT: s_waitcnt lgkmcnt(0) 710; GCN-NEXT: s_lshl_b32 s2, s2, 1 711; GCN-NEXT: v_mov_b32_e32 v0, s4 712; GCN-NEXT: v_mov_b32_e32 v1, s5 713; GCN-NEXT: v_mov_b32_e32 v2, s6 714; GCN-NEXT: v_mov_b32_e32 v3, s7 715; GCN-NEXT: v_mov_b32_e32 v4, s8 716; GCN-NEXT: v_mov_b32_e32 v5, s9 717; GCN-NEXT: v_mov_b32_e32 v6, s10 718; GCN-NEXT: v_mov_b32_e32 v7, s11 719; GCN-NEXT: v_mov_b32_e32 v8, s12 720; GCN-NEXT: v_mov_b32_e32 v9, s13 721; GCN-NEXT: v_mov_b32_e32 v10, s14 722; GCN-NEXT: v_mov_b32_e32 v11, s15 723; GCN-NEXT: v_mov_b32_e32 v12, s16 724; GCN-NEXT: v_mov_b32_e32 v13, s17 725; GCN-NEXT: v_mov_b32_e32 v14, s18 726; GCN-NEXT: v_mov_b32_e32 v15, s19 727; GCN-NEXT: s_mov_b32 m0, s2 728; GCN-NEXT: s_add_u32 s2, s0, 48 729; GCN-NEXT: v_movreld_b32_e32 v0, 0 730; GCN-NEXT: s_addc_u32 s3, s1, 0 731; GCN-NEXT: v_movreld_b32_e32 v1, v16 732; GCN-NEXT: v_mov_b32_e32 v17, s3 733; GCN-NEXT: v_mov_b32_e32 v16, s2 734; GCN-NEXT: s_add_u32 s2, s0, 32 735; GCN-NEXT: s_addc_u32 s3, s1, 0 736; GCN-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 737; GCN-NEXT: s_nop 0 738; GCN-NEXT: v_mov_b32_e32 v13, s3 739; GCN-NEXT: v_mov_b32_e32 v12, s2 740; GCN-NEXT: s_add_u32 s2, s0, 16 741; GCN-NEXT: s_addc_u32 s3, s1, 0 742; GCN-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 743; GCN-NEXT: s_nop 0 744; GCN-NEXT: v_mov_b32_e32 v9, s3 745; GCN-NEXT: v_mov_b32_e32 v8, s2 746; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 747; GCN-NEXT: s_nop 0 748; GCN-NEXT: v_mov_b32_e32 v5, s1 749; GCN-NEXT: v_mov_b32_e32 v4, s0 750; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 751; GCN-NEXT: s_endpgm 752entry: 753 %v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel 754 store <8 x double> %v, <8 x double> addrspace(1)* %out 755 ret void 756} 757 758define amdgpu_kernel void @double7_inselt(<7 x double> addrspace(1)* %out, <7 x double> %vec, i32 %sel) { 759; GCN-LABEL: double7_inselt: 760; GCN: ; %bb.0: ; %entry 761; GCN-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x64 762; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 763; GCN-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x94 764; GCN-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x84 765; GCN-NEXT: s_load_dword s0, s[0:1], 0xa4 766; GCN-NEXT: s_waitcnt lgkmcnt(0) 767; GCN-NEXT: v_mov_b32_e32 v0, s4 768; GCN-NEXT: v_mov_b32_e32 v1, s5 769; GCN-NEXT: v_mov_b32_e32 v2, s6 770; GCN-NEXT: v_mov_b32_e32 v3, s7 771; GCN-NEXT: s_lshl_b32 s0, s0, 1 772; GCN-NEXT: v_mov_b32_e32 v4, s8 773; GCN-NEXT: v_mov_b32_e32 v5, s9 774; GCN-NEXT: v_mov_b32_e32 v6, s10 775; GCN-NEXT: v_mov_b32_e32 v7, s11 776; GCN-NEXT: v_mov_b32_e32 v8, s12 777; GCN-NEXT: v_mov_b32_e32 v9, s13 778; GCN-NEXT: v_mov_b32_e32 v10, s14 779; GCN-NEXT: v_mov_b32_e32 v11, s15 780; GCN-NEXT: v_mov_b32_e32 v12, s16 781; GCN-NEXT: v_mov_b32_e32 v13, s17 782; GCN-NEXT: s_mov_b32 m0, s0 783; GCN-NEXT: v_movreld_b32_e32 v0, 0 784; GCN-NEXT: v_mov_b32_e32 v16, 0x3ff00000 785; GCN-NEXT: s_add_u32 s0, s2, 16 786; GCN-NEXT: v_movreld_b32_e32 v1, v16 787; GCN-NEXT: s_addc_u32 s1, s3, 0 788; GCN-NEXT: v_mov_b32_e32 v15, s1 789; GCN-NEXT: v_mov_b32_e32 v14, s0 790; GCN-NEXT: flat_store_dwordx4 v[14:15], v[4:7] 791; GCN-NEXT: s_add_u32 s0, s2, 48 792; GCN-NEXT: v_mov_b32_e32 v5, s3 793; GCN-NEXT: v_mov_b32_e32 v4, s2 794; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 795; GCN-NEXT: s_addc_u32 s1, s3, 0 796; GCN-NEXT: v_mov_b32_e32 v0, s0 797; GCN-NEXT: v_mov_b32_e32 v1, s1 798; GCN-NEXT: s_add_u32 s0, s2, 32 799; GCN-NEXT: flat_store_dwordx2 v[0:1], v[12:13] 800; GCN-NEXT: s_addc_u32 s1, s3, 0 801; GCN-NEXT: v_mov_b32_e32 v0, s0 802; GCN-NEXT: v_mov_b32_e32 v1, s1 803; GCN-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 804; GCN-NEXT: s_endpgm 805entry: 806 %v = insertelement <7 x double> %vec, double 1.000000e+00, i32 %sel 807 store <7 x double> %v, <7 x double> addrspace(1)* %out 808 ret void 809} 810 811define amdgpu_kernel void @double16_inselt(<16 x double> addrspace(1)* %out, <16 x double> %vec, i32 %sel) { 812; GCN-LABEL: double16_inselt: 813; GCN: ; %bb.0: ; %entry 814; GCN-NEXT: s_load_dword s2, s[0:1], 0x124 815; GCN-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0xa4 816; GCN-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0xe4 817; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 818; GCN-NEXT: v_mov_b32_e32 v32, 0x3ff00000 819; GCN-NEXT: s_waitcnt lgkmcnt(0) 820; GCN-NEXT: v_mov_b32_e32 v0, s36 821; GCN-NEXT: s_lshl_b32 s2, s2, 1 822; GCN-NEXT: v_mov_b32_e32 v1, s37 823; GCN-NEXT: v_mov_b32_e32 v2, s38 824; GCN-NEXT: v_mov_b32_e32 v3, s39 825; GCN-NEXT: v_mov_b32_e32 v4, s40 826; GCN-NEXT: v_mov_b32_e32 v5, s41 827; GCN-NEXT: v_mov_b32_e32 v6, s42 828; GCN-NEXT: v_mov_b32_e32 v7, s43 829; GCN-NEXT: v_mov_b32_e32 v8, s44 830; GCN-NEXT: v_mov_b32_e32 v9, s45 831; GCN-NEXT: v_mov_b32_e32 v10, s46 832; GCN-NEXT: v_mov_b32_e32 v11, s47 833; GCN-NEXT: v_mov_b32_e32 v12, s48 834; GCN-NEXT: v_mov_b32_e32 v13, s49 835; GCN-NEXT: v_mov_b32_e32 v14, s50 836; GCN-NEXT: v_mov_b32_e32 v15, s51 837; GCN-NEXT: v_mov_b32_e32 v16, s4 838; GCN-NEXT: v_mov_b32_e32 v17, s5 839; GCN-NEXT: v_mov_b32_e32 v18, s6 840; GCN-NEXT: v_mov_b32_e32 v19, s7 841; GCN-NEXT: v_mov_b32_e32 v20, s8 842; GCN-NEXT: v_mov_b32_e32 v21, s9 843; GCN-NEXT: v_mov_b32_e32 v22, s10 844; GCN-NEXT: v_mov_b32_e32 v23, s11 845; GCN-NEXT: v_mov_b32_e32 v24, s12 846; GCN-NEXT: v_mov_b32_e32 v25, s13 847; GCN-NEXT: v_mov_b32_e32 v26, s14 848; GCN-NEXT: v_mov_b32_e32 v27, s15 849; GCN-NEXT: v_mov_b32_e32 v28, s16 850; GCN-NEXT: v_mov_b32_e32 v29, s17 851; GCN-NEXT: v_mov_b32_e32 v30, s18 852; GCN-NEXT: v_mov_b32_e32 v31, s19 853; GCN-NEXT: s_mov_b32 m0, s2 854; GCN-NEXT: s_add_u32 s2, s0, 0x70 855; GCN-NEXT: v_movreld_b32_e32 v0, 0 856; GCN-NEXT: s_addc_u32 s3, s1, 0 857; GCN-NEXT: v_movreld_b32_e32 v1, v32 858; GCN-NEXT: v_mov_b32_e32 v33, s3 859; GCN-NEXT: v_mov_b32_e32 v32, s2 860; GCN-NEXT: s_add_u32 s2, s0, 0x60 861; GCN-NEXT: s_addc_u32 s3, s1, 0 862; GCN-NEXT: flat_store_dwordx4 v[32:33], v[28:31] 863; GCN-NEXT: s_nop 0 864; GCN-NEXT: v_mov_b32_e32 v29, s3 865; GCN-NEXT: v_mov_b32_e32 v28, s2 866; GCN-NEXT: s_add_u32 s2, s0, 0x50 867; GCN-NEXT: s_addc_u32 s3, s1, 0 868; GCN-NEXT: flat_store_dwordx4 v[28:29], v[24:27] 869; GCN-NEXT: s_nop 0 870; GCN-NEXT: v_mov_b32_e32 v25, s3 871; GCN-NEXT: v_mov_b32_e32 v24, s2 872; GCN-NEXT: s_add_u32 s2, s0, 64 873; GCN-NEXT: s_addc_u32 s3, s1, 0 874; GCN-NEXT: flat_store_dwordx4 v[24:25], v[20:23] 875; GCN-NEXT: s_nop 0 876; GCN-NEXT: v_mov_b32_e32 v21, s3 877; GCN-NEXT: v_mov_b32_e32 v20, s2 878; GCN-NEXT: s_add_u32 s2, s0, 48 879; GCN-NEXT: s_addc_u32 s3, s1, 0 880; GCN-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 881; GCN-NEXT: s_nop 0 882; GCN-NEXT: v_mov_b32_e32 v17, s3 883; GCN-NEXT: v_mov_b32_e32 v16, s2 884; GCN-NEXT: s_add_u32 s2, s0, 32 885; GCN-NEXT: s_addc_u32 s3, s1, 0 886; GCN-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 887; GCN-NEXT: s_nop 0 888; GCN-NEXT: v_mov_b32_e32 v13, s3 889; GCN-NEXT: v_mov_b32_e32 v12, s2 890; GCN-NEXT: s_add_u32 s2, s0, 16 891; GCN-NEXT: s_addc_u32 s3, s1, 0 892; GCN-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 893; GCN-NEXT: s_nop 0 894; GCN-NEXT: v_mov_b32_e32 v9, s3 895; GCN-NEXT: v_mov_b32_e32 v8, s2 896; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 897; GCN-NEXT: s_nop 0 898; GCN-NEXT: v_mov_b32_e32 v5, s1 899; GCN-NEXT: v_mov_b32_e32 v4, s0 900; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 901; GCN-NEXT: s_endpgm 902entry: 903 %v = insertelement <16 x double> %vec, double 1.000000e+00, i32 %sel 904 store <16 x double> %v, <16 x double> addrspace(1)* %out 905 ret void 906} 907 908define amdgpu_kernel void @double15_inselt(<15 x double> addrspace(1)* %out, <15 x double> %vec, i32 %sel) { 909; GCN-LABEL: double15_inselt: 910; GCN: ; %bb.0: ; %entry 911; GCN-NEXT: s_load_dwordx16 s[8:23], s[0:1], 0xa4 912; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x114 913; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x104 914; GCN-NEXT: s_load_dwordx8 s[24:31], s[0:1], 0xe4 915; GCN-NEXT: v_mov_b32_e32 v32, 0x3ff00000 916; GCN-NEXT: s_waitcnt lgkmcnt(0) 917; GCN-NEXT: v_mov_b32_e32 v0, s8 918; GCN-NEXT: v_mov_b32_e32 v28, s2 919; GCN-NEXT: v_mov_b32_e32 v24, s4 920; GCN-NEXT: s_load_dword s4, s[0:1], 0x124 921; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 922; GCN-NEXT: v_mov_b32_e32 v1, s9 923; GCN-NEXT: v_mov_b32_e32 v2, s10 924; GCN-NEXT: v_mov_b32_e32 v3, s11 925; GCN-NEXT: s_waitcnt lgkmcnt(0) 926; GCN-NEXT: s_lshl_b32 s2, s4, 1 927; GCN-NEXT: v_mov_b32_e32 v4, s12 928; GCN-NEXT: v_mov_b32_e32 v5, s13 929; GCN-NEXT: v_mov_b32_e32 v6, s14 930; GCN-NEXT: v_mov_b32_e32 v7, s15 931; GCN-NEXT: v_mov_b32_e32 v8, s16 932; GCN-NEXT: v_mov_b32_e32 v9, s17 933; GCN-NEXT: v_mov_b32_e32 v10, s18 934; GCN-NEXT: v_mov_b32_e32 v11, s19 935; GCN-NEXT: v_mov_b32_e32 v12, s20 936; GCN-NEXT: v_mov_b32_e32 v13, s21 937; GCN-NEXT: v_mov_b32_e32 v14, s22 938; GCN-NEXT: v_mov_b32_e32 v15, s23 939; GCN-NEXT: v_mov_b32_e32 v16, s24 940; GCN-NEXT: v_mov_b32_e32 v17, s25 941; GCN-NEXT: v_mov_b32_e32 v18, s26 942; GCN-NEXT: v_mov_b32_e32 v19, s27 943; GCN-NEXT: v_mov_b32_e32 v20, s28 944; GCN-NEXT: v_mov_b32_e32 v21, s29 945; GCN-NEXT: v_mov_b32_e32 v22, s30 946; GCN-NEXT: v_mov_b32_e32 v23, s31 947; GCN-NEXT: v_mov_b32_e32 v25, s5 948; GCN-NEXT: v_mov_b32_e32 v26, s6 949; GCN-NEXT: v_mov_b32_e32 v27, s7 950; GCN-NEXT: v_mov_b32_e32 v29, s3 951; GCN-NEXT: s_mov_b32 m0, s2 952; GCN-NEXT: v_movreld_b32_e32 v0, 0 953; GCN-NEXT: s_add_u32 s2, s0, 0x50 954; GCN-NEXT: v_movreld_b32_e32 v1, v32 955; GCN-NEXT: s_addc_u32 s3, s1, 0 956; GCN-NEXT: v_mov_b32_e32 v31, s3 957; GCN-NEXT: v_mov_b32_e32 v30, s2 958; GCN-NEXT: s_add_u32 s2, s0, 64 959; GCN-NEXT: s_addc_u32 s3, s1, 0 960; GCN-NEXT: flat_store_dwordx4 v[30:31], v[20:23] 961; GCN-NEXT: s_nop 0 962; GCN-NEXT: v_mov_b32_e32 v21, s3 963; GCN-NEXT: v_mov_b32_e32 v20, s2 964; GCN-NEXT: s_add_u32 s2, s0, 48 965; GCN-NEXT: s_addc_u32 s3, s1, 0 966; GCN-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 967; GCN-NEXT: s_nop 0 968; GCN-NEXT: v_mov_b32_e32 v17, s3 969; GCN-NEXT: v_mov_b32_e32 v16, s2 970; GCN-NEXT: s_add_u32 s2, s0, 32 971; GCN-NEXT: s_addc_u32 s3, s1, 0 972; GCN-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 973; GCN-NEXT: s_nop 0 974; GCN-NEXT: v_mov_b32_e32 v13, s3 975; GCN-NEXT: v_mov_b32_e32 v12, s2 976; GCN-NEXT: s_add_u32 s2, s0, 16 977; GCN-NEXT: s_addc_u32 s3, s1, 0 978; GCN-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 979; GCN-NEXT: s_nop 0 980; GCN-NEXT: v_mov_b32_e32 v9, s3 981; GCN-NEXT: v_mov_b32_e32 v8, s2 982; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 983; GCN-NEXT: s_add_u32 s2, s0, 0x70 984; GCN-NEXT: v_mov_b32_e32 v5, s1 985; GCN-NEXT: v_mov_b32_e32 v4, s0 986; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 987; GCN-NEXT: s_addc_u32 s3, s1, 0 988; GCN-NEXT: v_mov_b32_e32 v0, s2 989; GCN-NEXT: v_mov_b32_e32 v1, s3 990; GCN-NEXT: s_add_u32 s0, s0, 0x60 991; GCN-NEXT: flat_store_dwordx2 v[0:1], v[28:29] 992; GCN-NEXT: s_addc_u32 s1, s1, 0 993; GCN-NEXT: v_mov_b32_e32 v0, s0 994; GCN-NEXT: v_mov_b32_e32 v1, s1 995; GCN-NEXT: flat_store_dwordx4 v[0:1], v[24:27] 996; GCN-NEXT: s_endpgm 997entry: 998 %v = insertelement <15 x double> %vec, double 1.000000e+00, i32 %sel 999 store <15 x double> %v, <15 x double> addrspace(1)* %out 1000 ret void 1001} 1002 1003define amdgpu_kernel void @bit4_inselt(<4 x i1> addrspace(1)* %out, <4 x i1> %vec, i32 %sel) { 1004; GCN-LABEL: bit4_inselt: 1005; GCN: ; %bb.0: ; %entry 1006; GCN-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 1007; GCN-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1 1008; GCN-NEXT: s_mov_b32 s6, -1 1009; GCN-NEXT: s_mov_b32 s7, 0xe80000 1010; GCN-NEXT: s_add_u32 s4, s4, s3 1011; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 1012; GCN-NEXT: s_addc_u32 s5, s5, 0 1013; GCN-NEXT: v_mov_b32_e32 v0, 4 1014; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1015; GCN-NEXT: s_waitcnt lgkmcnt(0) 1016; GCN-NEXT: s_and_b32 s3, s3, 3 1017; GCN-NEXT: v_mov_b32_e32 v1, s2 1018; GCN-NEXT: v_lshrrev_b16_e64 v2, 1, s2 1019; GCN-NEXT: v_lshrrev_b16_e64 v3, 2, s2 1020; GCN-NEXT: v_lshrrev_b16_e64 v4, 3, s2 1021; GCN-NEXT: v_or_b32_e32 v0, s3, v0 1022; GCN-NEXT: v_and_b32_e32 v2, 1, v2 1023; GCN-NEXT: v_and_b32_e32 v3, 3, v3 1024; GCN-NEXT: v_and_b32_e32 v4, 1, v4 1025; GCN-NEXT: buffer_store_byte v1, off, s[4:7], 0 offset:4 1026; GCN-NEXT: buffer_store_byte v4, off, s[4:7], 0 offset:7 1027; GCN-NEXT: buffer_store_byte v3, off, s[4:7], 0 offset:6 1028; GCN-NEXT: buffer_store_byte v2, off, s[4:7], 0 offset:5 1029; GCN-NEXT: v_mov_b32_e32 v1, 1 1030; GCN-NEXT: buffer_store_byte v1, v0, s[4:7], 0 offen 1031; GCN-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 offset:4 1032; GCN-NEXT: buffer_load_ubyte v1, off, s[4:7], 0 offset:5 1033; GCN-NEXT: buffer_load_ubyte v2, off, s[4:7], 0 offset:6 1034; GCN-NEXT: buffer_load_ubyte v3, off, s[4:7], 0 offset:7 1035; GCN-NEXT: s_waitcnt vmcnt(3) 1036; GCN-NEXT: v_and_b32_e32 v0, 1, v0 1037; GCN-NEXT: s_waitcnt vmcnt(2) 1038; GCN-NEXT: v_and_b32_e32 v1, 1, v1 1039; GCN-NEXT: s_waitcnt vmcnt(1) 1040; GCN-NEXT: v_and_b32_e32 v2, 1, v2 1041; GCN-NEXT: v_lshlrev_b16_e32 v1, 1, v1 1042; GCN-NEXT: v_lshlrev_b16_e32 v2, 2, v2 1043; GCN-NEXT: v_or_b32_e32 v0, v0, v1 1044; GCN-NEXT: s_waitcnt vmcnt(0) 1045; GCN-NEXT: v_lshlrev_b16_e32 v3, 3, v3 1046; GCN-NEXT: v_or_b32_e32 v0, v0, v2 1047; GCN-NEXT: v_or_b32_e32 v0, v0, v3 1048; GCN-NEXT: v_and_b32_e32 v2, 15, v0 1049; GCN-NEXT: v_mov_b32_e32 v0, s0 1050; GCN-NEXT: v_mov_b32_e32 v1, s1 1051; GCN-NEXT: flat_store_byte v[0:1], v2 1052; GCN-NEXT: s_endpgm 1053entry: 1054 %v = insertelement <4 x i1> %vec, i1 1, i32 %sel 1055 store <4 x i1> %v, <4 x i1> addrspace(1)* %out 1056 ret void 1057} 1058 1059define amdgpu_kernel void @bit128_inselt(<128 x i1> addrspace(1)* %out, <128 x i1> %vec, i32 %sel) { 1060; GCN-LABEL: bit128_inselt: 1061; GCN: ; %bb.0: ; %entry 1062; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 1063; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 1064; GCN-NEXT: s_load_dword s0, s[0:1], 0x44 1065; GCN-NEXT: s_waitcnt lgkmcnt(0) 1066; GCN-NEXT: s_lshr_b32 s1, s4, 24 1067; GCN-NEXT: s_lshr_b32 s8, s4, 16 1068; GCN-NEXT: s_lshr_b32 s9, s4, 17 1069; GCN-NEXT: s_lshr_b32 s10, s4, 18 1070; GCN-NEXT: s_lshr_b32 s11, s4, 19 1071; GCN-NEXT: s_lshr_b32 s12, s4, 20 1072; GCN-NEXT: s_lshr_b32 s13, s4, 21 1073; GCN-NEXT: s_lshr_b32 s14, s4, 22 1074; GCN-NEXT: s_lshr_b32 s15, s4, 23 1075; GCN-NEXT: s_lshr_b32 s16, s5, 24 1076; GCN-NEXT: s_lshr_b32 s17, s5, 16 1077; GCN-NEXT: s_lshr_b32 s18, s5, 17 1078; GCN-NEXT: s_lshr_b32 s19, s5, 18 1079; GCN-NEXT: s_lshr_b32 s20, s5, 19 1080; GCN-NEXT: s_lshr_b32 s21, s5, 20 1081; GCN-NEXT: s_lshr_b32 s22, s5, 21 1082; GCN-NEXT: s_lshr_b32 s23, s5, 22 1083; GCN-NEXT: s_lshr_b32 s24, s5, 23 1084; GCN-NEXT: s_lshr_b32 s25, s6, 24 1085; GCN-NEXT: s_lshr_b32 s26, s6, 16 1086; GCN-NEXT: s_lshr_b32 s27, s6, 17 1087; GCN-NEXT: s_lshr_b32 s28, s6, 18 1088; GCN-NEXT: s_lshr_b32 s29, s6, 19 1089; GCN-NEXT: s_lshr_b32 s30, s6, 20 1090; GCN-NEXT: s_lshr_b32 s31, s6, 21 1091; GCN-NEXT: s_lshr_b32 s33, s6, 22 1092; GCN-NEXT: s_lshr_b32 s34, s6, 23 1093; GCN-NEXT: s_lshr_b32 s35, s7, 24 1094; GCN-NEXT: s_lshr_b32 s36, s7, 16 1095; GCN-NEXT: s_lshr_b32 s37, s7, 17 1096; GCN-NEXT: s_lshr_b32 s38, s7, 18 1097; GCN-NEXT: s_lshr_b32 s39, s7, 19 1098; GCN-NEXT: s_lshr_b32 s40, s7, 20 1099; GCN-NEXT: s_lshr_b32 s41, s7, 21 1100; GCN-NEXT: s_lshr_b32 s42, s7, 22 1101; GCN-NEXT: s_lshr_b32 s43, s7, 23 1102; GCN-NEXT: s_cmpk_lg_i32 s0, 0x77 1103; GCN-NEXT: v_mov_b32_e32 v16, s43 1104; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1105; GCN-NEXT: s_cmpk_lg_i32 s0, 0x76 1106; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1107; GCN-NEXT: v_mov_b32_e32 v17, s42 1108; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1109; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1110; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1111; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1112; GCN-NEXT: s_cmpk_lg_i32 s0, 0x75 1113; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1114; GCN-NEXT: v_mov_b32_e32 v17, s41 1115; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1116; GCN-NEXT: s_cmpk_lg_i32 s0, 0x74 1117; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1118; GCN-NEXT: v_mov_b32_e32 v18, s40 1119; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1120; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1121; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1122; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1123; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1124; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 1125; GCN-NEXT: v_and_b32_e32 v17, 3, v17 1126; GCN-NEXT: s_cmpk_lg_i32 s0, 0x73 1127; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1128; GCN-NEXT: v_mov_b32_e32 v17, s39 1129; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1130; GCN-NEXT: s_cmpk_lg_i32 s0, 0x72 1131; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1132; GCN-NEXT: v_mov_b32_e32 v18, s38 1133; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1134; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1135; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1136; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1137; GCN-NEXT: s_cmpk_lg_i32 s0, 0x71 1138; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1139; GCN-NEXT: v_mov_b32_e32 v18, s37 1140; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1141; GCN-NEXT: s_cmpk_lg_i32 s0, 0x70 1142; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1143; GCN-NEXT: v_mov_b32_e32 v19, s36 1144; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1145; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1146; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1147; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1148; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1149; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1150; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1151; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1152; GCN-NEXT: v_lshlrev_b16_e32 v16, 4, v16 1153; GCN-NEXT: v_and_b32_e32 v17, 15, v17 1154; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7f 1155; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1156; GCN-NEXT: v_lshrrev_b16_e64 v17, 7, s35 1157; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1158; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7e 1159; GCN-NEXT: v_lshrrev_b16_e64 v18, 6, s35 1160; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1161; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1162; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1163; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1164; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1165; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7d 1166; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1167; GCN-NEXT: v_lshrrev_b16_e64 v18, 5, s35 1168; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1169; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7c 1170; GCN-NEXT: v_lshrrev_b16_e64 v19, 4, s35 1171; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1172; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1173; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1174; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1175; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1176; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1177; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1178; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1179; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7b 1180; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1181; GCN-NEXT: v_lshrrev_b16_e64 v18, 3, s35 1182; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1183; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7a 1184; GCN-NEXT: v_lshrrev_b16_e64 v19, 2, s35 1185; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1186; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1187; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1188; GCN-NEXT: s_cmpk_lg_i32 s0, 0x78 1189; GCN-NEXT: v_mov_b32_e32 v14, s35 1190; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1191; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1192; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1193; GCN-NEXT: s_cmpk_lg_i32 s0, 0x79 1194; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1195; GCN-NEXT: v_lshrrev_b16_e64 v19, 1, s35 1196; GCN-NEXT: v_cndmask_b32_e32 v14, 1, v14, vcc 1197; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1198; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1199; GCN-NEXT: v_and_b32_e32 v14, 1, v14 1200; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 1201; GCN-NEXT: v_or_b32_e32 v14, v14, v19 1202; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 1203; GCN-NEXT: v_and_b32_e32 v14, 3, v14 1204; GCN-NEXT: v_or_b32_e32 v14, v14, v18 1205; GCN-NEXT: v_lshlrev_b16_e32 v17, 4, v17 1206; GCN-NEXT: v_and_b32_e32 v14, 15, v14 1207; GCN-NEXT: v_or_b32_sdwa v14, v14, v17 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1208; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6f 1209; GCN-NEXT: v_or_b32_sdwa v14, v16, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1210; GCN-NEXT: v_lshrrev_b16_e64 v16, 15, s7 1211; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1212; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6e 1213; GCN-NEXT: v_lshrrev_b16_e64 v17, 14, s7 1214; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1215; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1216; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1217; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1218; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1219; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6d 1220; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1221; GCN-NEXT: v_lshrrev_b16_e64 v17, 13, s7 1222; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1223; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6c 1224; GCN-NEXT: v_lshrrev_b16_e64 v18, 12, s7 1225; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1226; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1227; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1228; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1229; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1230; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1231; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 1232; GCN-NEXT: v_and_b32_e32 v17, 3, v17 1233; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6b 1234; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1235; GCN-NEXT: v_lshrrev_b16_e64 v17, 11, s7 1236; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1237; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6a 1238; GCN-NEXT: v_lshrrev_b16_e64 v18, 10, s7 1239; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1240; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1241; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1242; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1243; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1244; GCN-NEXT: s_cmpk_lg_i32 s0, 0x69 1245; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1246; GCN-NEXT: v_lshrrev_b16_e64 v18, 9, s7 1247; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1248; GCN-NEXT: s_cmpk_lg_i32 s0, 0x68 1249; GCN-NEXT: v_lshrrev_b16_e64 v19, 8, s7 1250; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1251; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1252; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1253; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1254; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1255; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1256; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1257; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1258; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1259; GCN-NEXT: v_lshlrev_b16_e32 v16, 4, v16 1260; GCN-NEXT: v_and_b32_e32 v17, 15, v17 1261; GCN-NEXT: s_cmpk_lg_i32 s0, 0x67 1262; GCN-NEXT: v_or_b32_sdwa v16, v17, v16 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1263; GCN-NEXT: v_lshrrev_b16_e64 v17, 7, s7 1264; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1265; GCN-NEXT: s_cmpk_lg_i32 s0, 0x66 1266; GCN-NEXT: v_lshrrev_b16_e64 v18, 6, s7 1267; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1268; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1269; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1270; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1271; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1272; GCN-NEXT: s_cmpk_lg_i32 s0, 0x65 1273; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1274; GCN-NEXT: v_lshrrev_b16_e64 v18, 5, s7 1275; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1276; GCN-NEXT: s_cmpk_lg_i32 s0, 0x64 1277; GCN-NEXT: v_lshrrev_b16_e64 v19, 4, s7 1278; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1279; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1280; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1281; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1282; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1283; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1284; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1285; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1286; GCN-NEXT: s_cmpk_lg_i32 s0, 0x63 1287; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1288; GCN-NEXT: v_lshrrev_b16_e64 v18, 3, s7 1289; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1290; GCN-NEXT: s_cmpk_lg_i32 s0, 0x62 1291; GCN-NEXT: v_lshrrev_b16_e64 v19, 2, s7 1292; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1293; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1294; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1295; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1296; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1297; GCN-NEXT: s_cmpk_lg_i32 s0, 0x61 1298; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1299; GCN-NEXT: v_lshrrev_b16_e64 v19, 1, s7 1300; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1301; GCN-NEXT: s_cmpk_lg_i32 s0, 0x60 1302; GCN-NEXT: v_mov_b32_e32 v15, s7 1303; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1304; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1305; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1306; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 1307; GCN-NEXT: v_and_b32_e32 v15, 1, v15 1308; GCN-NEXT: v_or_b32_e32 v15, v15, v19 1309; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 1310; GCN-NEXT: v_and_b32_e32 v15, 3, v15 1311; GCN-NEXT: v_or_b32_e32 v15, v15, v18 1312; GCN-NEXT: v_lshlrev_b16_e32 v17, 4, v17 1313; GCN-NEXT: v_and_b32_e32 v15, 15, v15 1314; GCN-NEXT: v_or_b32_e32 v15, v15, v17 1315; GCN-NEXT: s_cmpk_lg_i32 s0, 0x57 1316; GCN-NEXT: v_or_b32_sdwa v15, v15, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1317; GCN-NEXT: v_mov_b32_e32 v16, s34 1318; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1319; GCN-NEXT: s_cmpk_lg_i32 s0, 0x56 1320; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1321; GCN-NEXT: v_mov_b32_e32 v17, s33 1322; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1323; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1324; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1325; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1326; GCN-NEXT: s_cmpk_lg_i32 s0, 0x55 1327; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1328; GCN-NEXT: v_mov_b32_e32 v17, s31 1329; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1330; GCN-NEXT: s_cmpk_lg_i32 s0, 0x54 1331; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1332; GCN-NEXT: v_mov_b32_e32 v18, s30 1333; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1334; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1335; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1336; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1337; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1338; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 1339; GCN-NEXT: v_and_b32_e32 v17, 3, v17 1340; GCN-NEXT: s_cmpk_lg_i32 s0, 0x53 1341; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1342; GCN-NEXT: v_mov_b32_e32 v17, s29 1343; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1344; GCN-NEXT: s_cmpk_lg_i32 s0, 0x52 1345; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1346; GCN-NEXT: v_mov_b32_e32 v18, s28 1347; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1348; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1349; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1350; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1351; GCN-NEXT: s_cmpk_lg_i32 s0, 0x51 1352; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1353; GCN-NEXT: v_mov_b32_e32 v18, s27 1354; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1355; GCN-NEXT: s_cmpk_lg_i32 s0, 0x50 1356; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1357; GCN-NEXT: v_mov_b32_e32 v19, s26 1358; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1359; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1360; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1361; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1362; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1363; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1364; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1365; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1366; GCN-NEXT: v_lshlrev_b16_e32 v16, 4, v16 1367; GCN-NEXT: v_and_b32_e32 v17, 15, v17 1368; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5f 1369; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1370; GCN-NEXT: v_lshrrev_b16_e64 v17, 7, s25 1371; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1372; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5e 1373; GCN-NEXT: v_lshrrev_b16_e64 v18, 6, s25 1374; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1375; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1376; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1377; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1378; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1379; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5d 1380; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1381; GCN-NEXT: v_lshrrev_b16_e64 v18, 5, s25 1382; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1383; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5c 1384; GCN-NEXT: v_lshrrev_b16_e64 v19, 4, s25 1385; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1386; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1387; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1388; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1389; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1390; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1391; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1392; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1393; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5b 1394; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1395; GCN-NEXT: v_lshrrev_b16_e64 v18, 3, s25 1396; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1397; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5a 1398; GCN-NEXT: v_lshrrev_b16_e64 v19, 2, s25 1399; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1400; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1401; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1402; GCN-NEXT: s_cmpk_lg_i32 s0, 0x58 1403; GCN-NEXT: v_mov_b32_e32 v3, s25 1404; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1405; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1406; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1407; GCN-NEXT: s_cmpk_lg_i32 s0, 0x59 1408; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1409; GCN-NEXT: v_lshrrev_b16_e64 v19, 1, s25 1410; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v3, vcc 1411; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1412; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1413; GCN-NEXT: v_and_b32_e32 v3, 1, v3 1414; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 1415; GCN-NEXT: v_or_b32_e32 v3, v3, v19 1416; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 1417; GCN-NEXT: v_and_b32_e32 v3, 3, v3 1418; GCN-NEXT: v_or_b32_e32 v3, v3, v18 1419; GCN-NEXT: v_lshlrev_b16_e32 v17, 4, v17 1420; GCN-NEXT: v_and_b32_e32 v3, 15, v3 1421; GCN-NEXT: v_or_b32_sdwa v3, v3, v17 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1422; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4f 1423; GCN-NEXT: v_or_b32_sdwa v16, v16, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1424; GCN-NEXT: v_lshrrev_b16_e64 v3, 15, s6 1425; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1426; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4e 1427; GCN-NEXT: v_lshrrev_b16_e64 v17, 14, s6 1428; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v3, vcc 1429; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1430; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1431; GCN-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1432; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1433; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4d 1434; GCN-NEXT: v_or_b32_e32 v3, v17, v3 1435; GCN-NEXT: v_lshrrev_b16_e64 v17, 13, s6 1436; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1437; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4c 1438; GCN-NEXT: v_lshrrev_b16_e64 v18, 12, s6 1439; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1440; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1441; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1442; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1443; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1444; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1445; GCN-NEXT: v_lshlrev_b16_e32 v3, 2, v3 1446; GCN-NEXT: v_and_b32_e32 v17, 3, v17 1447; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4b 1448; GCN-NEXT: v_or_b32_e32 v3, v17, v3 1449; GCN-NEXT: v_lshrrev_b16_e64 v17, 11, s6 1450; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1451; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4a 1452; GCN-NEXT: v_lshrrev_b16_e64 v18, 10, s6 1453; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1454; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1455; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1456; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1457; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1458; GCN-NEXT: s_cmpk_lg_i32 s0, 0x49 1459; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1460; GCN-NEXT: v_lshrrev_b16_e64 v18, 9, s6 1461; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1462; GCN-NEXT: s_cmpk_lg_i32 s0, 0x48 1463; GCN-NEXT: v_lshrrev_b16_e64 v19, 8, s6 1464; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1465; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1466; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1467; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1468; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1469; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1470; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1471; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1472; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1473; GCN-NEXT: v_lshlrev_b16_e32 v3, 4, v3 1474; GCN-NEXT: v_and_b32_e32 v17, 15, v17 1475; GCN-NEXT: s_cmpk_lg_i32 s0, 0x47 1476; GCN-NEXT: v_or_b32_sdwa v17, v17, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1477; GCN-NEXT: v_lshrrev_b16_e64 v3, 7, s6 1478; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1479; GCN-NEXT: s_cmpk_lg_i32 s0, 0x46 1480; GCN-NEXT: v_lshrrev_b16_e64 v18, 6, s6 1481; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v3, vcc 1482; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1483; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1484; GCN-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1485; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1486; GCN-NEXT: s_cmpk_lg_i32 s0, 0x45 1487; GCN-NEXT: v_or_b32_e32 v3, v18, v3 1488; GCN-NEXT: v_lshrrev_b16_e64 v18, 5, s6 1489; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1490; GCN-NEXT: s_cmpk_lg_i32 s0, 0x44 1491; GCN-NEXT: v_lshrrev_b16_e64 v19, 4, s6 1492; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1493; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1494; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1495; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1496; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1497; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1498; GCN-NEXT: v_lshlrev_b16_e32 v3, 2, v3 1499; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1500; GCN-NEXT: s_cmpk_lg_i32 s0, 0x43 1501; GCN-NEXT: v_or_b32_e32 v18, v18, v3 1502; GCN-NEXT: v_lshrrev_b16_e64 v3, 3, s6 1503; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1504; GCN-NEXT: s_cmpk_lg_i32 s0, 0x42 1505; GCN-NEXT: v_lshrrev_b16_e64 v19, 2, s6 1506; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v3, vcc 1507; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1508; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1509; GCN-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1510; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1511; GCN-NEXT: s_cmpk_lg_i32 s0, 0x41 1512; GCN-NEXT: v_or_b32_e32 v3, v19, v3 1513; GCN-NEXT: v_lshrrev_b16_e64 v19, 1, s6 1514; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1515; GCN-NEXT: s_cmp_lg_u32 s0, 64 1516; GCN-NEXT: v_mov_b32_e32 v2, s6 1517; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1518; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1519; GCN-NEXT: v_cndmask_b32_e32 v2, 1, v2, vcc 1520; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 1521; GCN-NEXT: v_and_b32_e32 v2, 1, v2 1522; GCN-NEXT: v_or_b32_e32 v2, v2, v19 1523; GCN-NEXT: v_lshlrev_b16_e32 v3, 2, v3 1524; GCN-NEXT: v_and_b32_e32 v2, 3, v2 1525; GCN-NEXT: v_or_b32_e32 v2, v2, v3 1526; GCN-NEXT: v_or_b32_sdwa v3, v15, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1527; GCN-NEXT: v_lshlrev_b16_e32 v14, 4, v18 1528; GCN-NEXT: v_and_b32_e32 v2, 15, v2 1529; GCN-NEXT: s_cmp_lg_u32 s0, 55 1530; GCN-NEXT: v_or_b32_e32 v2, v2, v14 1531; GCN-NEXT: v_mov_b32_e32 v14, s24 1532; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1533; GCN-NEXT: s_cmp_lg_u32 s0, 54 1534; GCN-NEXT: v_cndmask_b32_e32 v14, 1, v14, vcc 1535; GCN-NEXT: v_mov_b32_e32 v15, s23 1536; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1537; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1538; GCN-NEXT: v_lshlrev_b16_e32 v14, 1, v14 1539; GCN-NEXT: v_and_b32_e32 v15, 1, v15 1540; GCN-NEXT: s_cmp_lg_u32 s0, 53 1541; GCN-NEXT: v_or_b32_sdwa v2, v2, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1542; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1543; GCN-NEXT: v_mov_b32_e32 v15, s22 1544; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1545; GCN-NEXT: s_cmp_lg_u32 s0, 52 1546; GCN-NEXT: v_or_b32_sdwa v2, v2, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1547; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1548; GCN-NEXT: v_mov_b32_e32 v16, s21 1549; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1550; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1551; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1552; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1553; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1554; GCN-NEXT: v_lshlrev_b16_e32 v14, 2, v14 1555; GCN-NEXT: v_and_b32_e32 v15, 3, v15 1556; GCN-NEXT: s_cmp_lg_u32 s0, 51 1557; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1558; GCN-NEXT: v_mov_b32_e32 v15, s20 1559; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1560; GCN-NEXT: s_cmp_lg_u32 s0, 50 1561; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1562; GCN-NEXT: v_mov_b32_e32 v16, s19 1563; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1564; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1565; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1566; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1567; GCN-NEXT: s_cmp_lg_u32 s0, 49 1568; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1569; GCN-NEXT: v_mov_b32_e32 v16, s18 1570; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1571; GCN-NEXT: s_cmp_lg_u32 s0, 48 1572; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1573; GCN-NEXT: v_mov_b32_e32 v17, s17 1574; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1575; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1576; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1577; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1578; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1579; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1580; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1581; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1582; GCN-NEXT: v_lshlrev_b16_e32 v14, 4, v14 1583; GCN-NEXT: v_and_b32_e32 v15, 15, v15 1584; GCN-NEXT: s_cmp_lg_u32 s0, 63 1585; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1586; GCN-NEXT: v_lshrrev_b16_e64 v15, 7, s16 1587; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1588; GCN-NEXT: s_cmp_lg_u32 s0, 62 1589; GCN-NEXT: v_lshrrev_b16_e64 v16, 6, s16 1590; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1591; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1592; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1593; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1594; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1595; GCN-NEXT: s_cmp_lg_u32 s0, 61 1596; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1597; GCN-NEXT: v_lshrrev_b16_e64 v16, 5, s16 1598; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1599; GCN-NEXT: s_cmp_lg_u32 s0, 60 1600; GCN-NEXT: v_lshrrev_b16_e64 v17, 4, s16 1601; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1602; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1603; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1604; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1605; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1606; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1607; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1608; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1609; GCN-NEXT: s_cmp_lg_u32 s0, 59 1610; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1611; GCN-NEXT: v_lshrrev_b16_e64 v16, 3, s16 1612; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1613; GCN-NEXT: s_cmp_lg_u32 s0, 58 1614; GCN-NEXT: v_lshrrev_b16_e64 v17, 2, s16 1615; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1616; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1617; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1618; GCN-NEXT: s_cmp_lg_u32 s0, 56 1619; GCN-NEXT: v_mov_b32_e32 v13, s16 1620; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1621; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1622; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1623; GCN-NEXT: s_cmp_lg_u32 s0, 57 1624; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1625; GCN-NEXT: v_lshrrev_b16_e64 v17, 1, s16 1626; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1627; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1628; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1629; GCN-NEXT: v_and_b32_e32 v13, 1, v13 1630; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1631; GCN-NEXT: v_or_b32_e32 v13, v13, v17 1632; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 1633; GCN-NEXT: v_and_b32_e32 v13, 3, v13 1634; GCN-NEXT: v_or_b32_e32 v13, v13, v16 1635; GCN-NEXT: v_lshlrev_b16_e32 v15, 4, v15 1636; GCN-NEXT: v_and_b32_e32 v13, 15, v13 1637; GCN-NEXT: v_or_b32_sdwa v13, v13, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1638; GCN-NEXT: s_cmp_lg_u32 s0, 47 1639; GCN-NEXT: v_or_b32_sdwa v14, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1640; GCN-NEXT: v_lshrrev_b16_e64 v13, 15, s5 1641; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1642; GCN-NEXT: s_cmp_lg_u32 s0, 46 1643; GCN-NEXT: v_lshrrev_b16_e64 v15, 14, s5 1644; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1645; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1646; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1647; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 1648; GCN-NEXT: v_and_b32_e32 v15, 1, v15 1649; GCN-NEXT: s_cmp_lg_u32 s0, 45 1650; GCN-NEXT: v_or_b32_e32 v13, v15, v13 1651; GCN-NEXT: v_lshrrev_b16_e64 v15, 13, s5 1652; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1653; GCN-NEXT: s_cmp_lg_u32 s0, 44 1654; GCN-NEXT: v_lshrrev_b16_e64 v16, 12, s5 1655; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1656; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1657; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1658; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1659; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1660; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1661; GCN-NEXT: v_lshlrev_b16_e32 v13, 2, v13 1662; GCN-NEXT: v_and_b32_e32 v15, 3, v15 1663; GCN-NEXT: s_cmp_lg_u32 s0, 43 1664; GCN-NEXT: v_or_b32_e32 v13, v15, v13 1665; GCN-NEXT: v_lshrrev_b16_e64 v15, 11, s5 1666; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1667; GCN-NEXT: s_cmp_lg_u32 s0, 42 1668; GCN-NEXT: v_lshrrev_b16_e64 v16, 10, s5 1669; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1670; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1671; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1672; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1673; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1674; GCN-NEXT: s_cmp_lg_u32 s0, 41 1675; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1676; GCN-NEXT: v_lshrrev_b16_e64 v16, 9, s5 1677; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1678; GCN-NEXT: s_cmp_lg_u32 s0, 40 1679; GCN-NEXT: v_lshrrev_b16_e64 v17, 8, s5 1680; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1681; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1682; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1683; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1684; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1685; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1686; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1687; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1688; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1689; GCN-NEXT: v_lshlrev_b16_e32 v13, 4, v13 1690; GCN-NEXT: v_and_b32_e32 v15, 15, v15 1691; GCN-NEXT: s_cmp_lg_u32 s0, 39 1692; GCN-NEXT: v_or_b32_sdwa v15, v15, v13 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1693; GCN-NEXT: v_lshrrev_b16_e64 v13, 7, s5 1694; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1695; GCN-NEXT: s_cmp_lg_u32 s0, 38 1696; GCN-NEXT: v_lshrrev_b16_e64 v16, 6, s5 1697; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1698; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1699; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1700; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 1701; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1702; GCN-NEXT: s_cmp_lg_u32 s0, 37 1703; GCN-NEXT: v_or_b32_e32 v13, v16, v13 1704; GCN-NEXT: v_lshrrev_b16_e64 v16, 5, s5 1705; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1706; GCN-NEXT: s_cmp_lg_u32 s0, 36 1707; GCN-NEXT: v_lshrrev_b16_e64 v17, 4, s5 1708; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1709; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1710; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1711; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1712; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1713; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1714; GCN-NEXT: v_lshlrev_b16_e32 v13, 2, v13 1715; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1716; GCN-NEXT: s_cmp_lg_u32 s0, 35 1717; GCN-NEXT: v_or_b32_e32 v16, v16, v13 1718; GCN-NEXT: v_lshrrev_b16_e64 v13, 3, s5 1719; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1720; GCN-NEXT: s_cmp_lg_u32 s0, 34 1721; GCN-NEXT: v_lshrrev_b16_e64 v17, 2, s5 1722; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1723; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1724; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1725; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 1726; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1727; GCN-NEXT: s_cmp_lg_u32 s0, 33 1728; GCN-NEXT: v_or_b32_e32 v17, v17, v13 1729; GCN-NEXT: v_lshrrev_b16_e64 v13, 1, s5 1730; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1731; GCN-NEXT: s_cmp_lg_u32 s0, 32 1732; GCN-NEXT: v_mov_b32_e32 v1, s5 1733; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1734; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1735; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 1736; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 1737; GCN-NEXT: v_and_b32_e32 v1, 1, v1 1738; GCN-NEXT: v_or_b32_e32 v1, v1, v13 1739; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1740; GCN-NEXT: v_and_b32_e32 v1, 3, v1 1741; GCN-NEXT: v_or_b32_e32 v1, v1, v17 1742; GCN-NEXT: v_lshlrev_b16_e32 v16, 4, v16 1743; GCN-NEXT: v_and_b32_e32 v1, 15, v1 1744; GCN-NEXT: v_or_b32_e32 v1, v1, v16 1745; GCN-NEXT: v_or_b32_sdwa v1, v1, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1746; GCN-NEXT: s_cmp_lg_u32 s0, 23 1747; GCN-NEXT: v_or_b32_sdwa v1, v1, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1748; GCN-NEXT: v_mov_b32_e32 v14, s15 1749; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1750; GCN-NEXT: s_cmp_lg_u32 s0, 22 1751; GCN-NEXT: v_cndmask_b32_e32 v14, 1, v14, vcc 1752; GCN-NEXT: v_mov_b32_e32 v15, s14 1753; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1754; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1755; GCN-NEXT: v_lshlrev_b16_e32 v14, 1, v14 1756; GCN-NEXT: v_and_b32_e32 v15, 1, v15 1757; GCN-NEXT: s_cmp_lg_u32 s0, 21 1758; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1759; GCN-NEXT: v_mov_b32_e32 v15, s13 1760; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1761; GCN-NEXT: s_cmp_lg_u32 s0, 20 1762; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1763; GCN-NEXT: v_mov_b32_e32 v16, s12 1764; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1765; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1766; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1767; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1768; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1769; GCN-NEXT: v_lshlrev_b16_e32 v14, 2, v14 1770; GCN-NEXT: v_and_b32_e32 v15, 3, v15 1771; GCN-NEXT: s_cmp_lg_u32 s0, 19 1772; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1773; GCN-NEXT: v_mov_b32_e32 v15, s11 1774; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1775; GCN-NEXT: s_cmp_lg_u32 s0, 18 1776; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1777; GCN-NEXT: v_mov_b32_e32 v16, s10 1778; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1779; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1780; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1781; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1782; GCN-NEXT: s_cmp_lg_u32 s0, 17 1783; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1784; GCN-NEXT: v_mov_b32_e32 v16, s9 1785; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1786; GCN-NEXT: s_cmp_lg_u32 s0, 16 1787; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1788; GCN-NEXT: v_mov_b32_e32 v18, s8 1789; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1790; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1791; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1792; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1793; GCN-NEXT: v_or_b32_e32 v16, v18, v16 1794; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1795; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1796; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1797; GCN-NEXT: v_lshlrev_b16_e32 v14, 4, v14 1798; GCN-NEXT: v_and_b32_e32 v15, 15, v15 1799; GCN-NEXT: s_cmp_lg_u32 s0, 31 1800; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1801; GCN-NEXT: v_lshrrev_b16_e64 v15, 7, s1 1802; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1803; GCN-NEXT: s_cmp_lg_u32 s0, 30 1804; GCN-NEXT: v_lshrrev_b16_e64 v16, 6, s1 1805; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1806; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1807; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1808; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1809; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1810; GCN-NEXT: s_cmp_lg_u32 s0, 29 1811; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1812; GCN-NEXT: v_lshrrev_b16_e64 v16, 5, s1 1813; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1814; GCN-NEXT: s_cmp_lg_u32 s0, 28 1815; GCN-NEXT: v_lshrrev_b16_e64 v18, 4, s1 1816; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1817; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1818; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1819; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1820; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1821; GCN-NEXT: v_or_b32_e32 v16, v18, v16 1822; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1823; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1824; GCN-NEXT: s_cmp_lg_u32 s0, 27 1825; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1826; GCN-NEXT: v_lshrrev_b16_e64 v16, 3, s1 1827; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1828; GCN-NEXT: s_cmp_lg_u32 s0, 26 1829; GCN-NEXT: v_lshrrev_b16_e64 v18, 2, s1 1830; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1831; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1832; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1833; GCN-NEXT: s_cmp_lg_u32 s0, 24 1834; GCN-NEXT: v_mov_b32_e32 v17, s1 1835; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1836; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1837; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1838; GCN-NEXT: s_cmp_lg_u32 s0, 25 1839; GCN-NEXT: v_or_b32_e32 v16, v18, v16 1840; GCN-NEXT: v_lshrrev_b16_e64 v18, 1, s1 1841; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1842; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1843; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1844; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1845; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1846; GCN-NEXT: v_or_b32_e32 v17, v17, v18 1847; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 1848; GCN-NEXT: v_and_b32_e32 v17, 3, v17 1849; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1850; GCN-NEXT: v_lshlrev_b16_e32 v15, 4, v15 1851; GCN-NEXT: v_and_b32_e32 v16, 15, v16 1852; GCN-NEXT: v_or_b32_sdwa v15, v16, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1853; GCN-NEXT: s_cmp_lg_u32 s0, 15 1854; GCN-NEXT: v_or_b32_sdwa v14, v14, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1855; GCN-NEXT: v_lshrrev_b16_e64 v15, 15, s4 1856; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1857; GCN-NEXT: s_cmp_lg_u32 s0, 14 1858; GCN-NEXT: v_lshrrev_b16_e64 v16, 14, s4 1859; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1860; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1861; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1862; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1863; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1864; GCN-NEXT: s_cmp_lg_u32 s0, 13 1865; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1866; GCN-NEXT: v_lshrrev_b16_e64 v16, 13, s4 1867; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1868; GCN-NEXT: s_cmp_lg_u32 s0, 12 1869; GCN-NEXT: v_lshrrev_b16_e64 v17, 12, s4 1870; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1871; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1872; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1873; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1874; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1875; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1876; GCN-NEXT: s_cmp_lg_u32 s0, 11 1877; GCN-NEXT: v_lshrrev_b16_e64 v18, 11, s4 1878; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1879; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1880; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1881; GCN-NEXT: s_cmp_lg_u32 s0, 10 1882; GCN-NEXT: v_lshrrev_b16_e64 v13, 10, s4 1883; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1884; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v18, vcc 1885; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1886; GCN-NEXT: s_cmp_lg_u32 s0, 9 1887; GCN-NEXT: v_lshrrev_b16_e64 v12, 9, s4 1888; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1889; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1890; GCN-NEXT: s_cmp_lg_u32 s0, 8 1891; GCN-NEXT: v_lshrrev_b16_e64 v11, 8, s4 1892; GCN-NEXT: v_cndmask_b32_e32 v12, 1, v12, vcc 1893; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1894; GCN-NEXT: s_cmp_lg_u32 s0, 7 1895; GCN-NEXT: v_lshrrev_b16_e64 v10, 7, s4 1896; GCN-NEXT: v_cndmask_b32_e32 v11, 1, v11, vcc 1897; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1898; GCN-NEXT: s_cmp_lg_u32 s0, 6 1899; GCN-NEXT: v_lshrrev_b16_e64 v9, 6, s4 1900; GCN-NEXT: v_cndmask_b32_e32 v10, 1, v10, vcc 1901; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1902; GCN-NEXT: s_cmp_lg_u32 s0, 5 1903; GCN-NEXT: v_lshrrev_b16_e64 v8, 5, s4 1904; GCN-NEXT: v_cndmask_b32_e32 v9, 1, v9, vcc 1905; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1906; GCN-NEXT: s_cmp_lg_u32 s0, 4 1907; GCN-NEXT: v_lshrrev_b16_e64 v7, 4, s4 1908; GCN-NEXT: v_cndmask_b32_e32 v8, 1, v8, vcc 1909; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1910; GCN-NEXT: s_cmp_lg_u32 s0, 3 1911; GCN-NEXT: v_lshrrev_b16_e64 v6, 3, s4 1912; GCN-NEXT: v_cndmask_b32_e32 v7, 1, v7, vcc 1913; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1914; GCN-NEXT: s_cmp_lg_u32 s0, 2 1915; GCN-NEXT: v_lshrrev_b16_e64 v5, 2, s4 1916; GCN-NEXT: v_cndmask_b32_e32 v6, 1, v6, vcc 1917; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1918; GCN-NEXT: s_cmp_lg_u32 s0, 1 1919; GCN-NEXT: v_lshrrev_b16_e64 v4, 1, s4 1920; GCN-NEXT: v_cndmask_b32_e32 v5, 1, v5, vcc 1921; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1922; GCN-NEXT: s_cmp_lg_u32 s0, 0 1923; GCN-NEXT: v_mov_b32_e32 v0, s4 1924; GCN-NEXT: v_cndmask_b32_e32 v4, 1, v4, vcc 1925; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1926; GCN-NEXT: v_cndmask_b32_e32 v0, 1, v0, vcc 1927; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1928; GCN-NEXT: v_and_b32_e32 v13, 1, v13 1929; GCN-NEXT: v_lshlrev_b16_e32 v12, 1, v12 1930; GCN-NEXT: v_and_b32_e32 v11, 1, v11 1931; GCN-NEXT: v_lshlrev_b16_e32 v10, 1, v10 1932; GCN-NEXT: v_and_b32_e32 v9, 1, v9 1933; GCN-NEXT: v_lshlrev_b16_e32 v8, 1, v8 1934; GCN-NEXT: v_and_b32_e32 v7, 1, v7 1935; GCN-NEXT: v_lshlrev_b16_e32 v6, 1, v6 1936; GCN-NEXT: v_and_b32_e32 v5, 1, v5 1937; GCN-NEXT: v_lshlrev_b16_e32 v4, 1, v4 1938; GCN-NEXT: v_and_b32_e32 v0, 1, v0 1939; GCN-NEXT: v_or_b32_e32 v13, v13, v16 1940; GCN-NEXT: v_or_b32_e32 v11, v11, v12 1941; GCN-NEXT: v_or_b32_e32 v9, v9, v10 1942; GCN-NEXT: v_or_b32_e32 v7, v7, v8 1943; GCN-NEXT: v_or_b32_e32 v5, v5, v6 1944; GCN-NEXT: v_or_b32_e32 v0, v0, v4 1945; GCN-NEXT: v_lshlrev_b16_e32 v13, 2, v13 1946; GCN-NEXT: v_and_b32_e32 v11, 3, v11 1947; GCN-NEXT: v_lshlrev_b16_e32 v9, 2, v9 1948; GCN-NEXT: v_and_b32_e32 v7, 3, v7 1949; GCN-NEXT: v_lshlrev_b16_e32 v5, 2, v5 1950; GCN-NEXT: v_and_b32_e32 v0, 3, v0 1951; GCN-NEXT: v_or_b32_e32 v11, v11, v13 1952; GCN-NEXT: v_or_b32_e32 v7, v7, v9 1953; GCN-NEXT: v_or_b32_e32 v0, v0, v5 1954; GCN-NEXT: v_lshlrev_b16_e32 v15, 4, v15 1955; GCN-NEXT: v_and_b32_e32 v11, 15, v11 1956; GCN-NEXT: v_lshlrev_b16_e32 v7, 4, v7 1957; GCN-NEXT: v_and_b32_e32 v0, 15, v0 1958; GCN-NEXT: v_or_b32_sdwa v11, v11, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1959; GCN-NEXT: v_or_b32_e32 v0, v0, v7 1960; GCN-NEXT: v_or_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1961; GCN-NEXT: v_mov_b32_e32 v5, s3 1962; GCN-NEXT: v_or_b32_sdwa v0, v0, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1963; GCN-NEXT: v_mov_b32_e32 v4, s2 1964; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1965; GCN-NEXT: s_endpgm 1966entry: 1967 %v = insertelement <128 x i1> %vec, i1 1, i32 %sel 1968 store <128 x i1> %v, <128 x i1> addrspace(1)* %out 1969 ret void 1970} 1971 1972define amdgpu_ps <32 x float> @float32_inselt_vec(<32 x float> %vec, i32 %sel) { 1973; GCN-LABEL: float32_inselt_vec: 1974; GCN: ; %bb.0: ; %entry 1975; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v32 1976; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 2, v32 1977; GCN-NEXT: v_cmp_ne_u32_e64 s[2:3], 3, v32 1978; GCN-NEXT: v_cmp_ne_u32_e64 s[4:5], 4, v32 1979; GCN-NEXT: v_cmp_ne_u32_e64 s[6:7], 5, v32 1980; GCN-NEXT: v_cmp_ne_u32_e64 s[8:9], 6, v32 1981; GCN-NEXT: v_cmp_ne_u32_e64 s[10:11], 7, v32 1982; GCN-NEXT: v_cmp_ne_u32_e64 s[12:13], 8, v32 1983; GCN-NEXT: v_cmp_ne_u32_e64 s[14:15], 9, v32 1984; GCN-NEXT: v_cmp_ne_u32_e64 s[16:17], 10, v32 1985; GCN-NEXT: v_cmp_ne_u32_e64 s[18:19], 11, v32 1986; GCN-NEXT: v_cmp_ne_u32_e64 s[20:21], 12, v32 1987; GCN-NEXT: v_cmp_ne_u32_e64 s[22:23], 13, v32 1988; GCN-NEXT: v_cmp_ne_u32_e64 s[24:25], 14, v32 1989; GCN-NEXT: v_cmp_ne_u32_e64 s[26:27], 15, v32 1990; GCN-NEXT: v_cmp_ne_u32_e64 s[28:29], 16, v32 1991; GCN-NEXT: v_cmp_ne_u32_e64 s[30:31], 17, v32 1992; GCN-NEXT: v_cmp_ne_u32_e64 s[34:35], 18, v32 1993; GCN-NEXT: v_cmp_ne_u32_e64 s[36:37], 19, v32 1994; GCN-NEXT: v_cmp_ne_u32_e64 s[38:39], 20, v32 1995; GCN-NEXT: v_cmp_ne_u32_e64 s[40:41], 21, v32 1996; GCN-NEXT: v_cmp_ne_u32_e64 s[42:43], 22, v32 1997; GCN-NEXT: v_cmp_ne_u32_e64 s[44:45], 23, v32 1998; GCN-NEXT: v_cmp_ne_u32_e64 s[46:47], 24, v32 1999; GCN-NEXT: v_cmp_ne_u32_e64 s[48:49], 25, v32 2000; GCN-NEXT: v_cmp_ne_u32_e64 s[50:51], 26, v32 2001; GCN-NEXT: v_cmp_ne_u32_e64 s[52:53], 27, v32 2002; GCN-NEXT: v_cmp_ne_u32_e64 s[54:55], 28, v32 2003; GCN-NEXT: v_cmp_ne_u32_e64 s[56:57], 29, v32 2004; GCN-NEXT: v_cmp_ne_u32_e64 s[58:59], 30, v32 2005; GCN-NEXT: v_cmp_ne_u32_e64 s[60:61], 31, v32 2006; GCN-NEXT: v_cmp_ne_u32_e64 s[62:63], 0, v32 2007; GCN-NEXT: v_cndmask_b32_e64 v0, 1.0, v0, s[62:63] 2008; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc 2009; GCN-NEXT: v_cndmask_b32_e64 v2, 1.0, v2, s[0:1] 2010; GCN-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[2:3] 2011; GCN-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[4:5] 2012; GCN-NEXT: v_cndmask_b32_e64 v5, 1.0, v5, s[6:7] 2013; GCN-NEXT: v_cndmask_b32_e64 v6, 1.0, v6, s[8:9] 2014; GCN-NEXT: v_cndmask_b32_e64 v7, 1.0, v7, s[10:11] 2015; GCN-NEXT: v_cndmask_b32_e64 v8, 1.0, v8, s[12:13] 2016; GCN-NEXT: v_cndmask_b32_e64 v9, 1.0, v9, s[14:15] 2017; GCN-NEXT: v_cndmask_b32_e64 v10, 1.0, v10, s[16:17] 2018; GCN-NEXT: v_cndmask_b32_e64 v11, 1.0, v11, s[18:19] 2019; GCN-NEXT: v_cndmask_b32_e64 v12, 1.0, v12, s[20:21] 2020; GCN-NEXT: v_cndmask_b32_e64 v13, 1.0, v13, s[22:23] 2021; GCN-NEXT: v_cndmask_b32_e64 v14, 1.0, v14, s[24:25] 2022; GCN-NEXT: v_cndmask_b32_e64 v15, 1.0, v15, s[26:27] 2023; GCN-NEXT: v_cndmask_b32_e64 v16, 1.0, v16, s[28:29] 2024; GCN-NEXT: v_cndmask_b32_e64 v17, 1.0, v17, s[30:31] 2025; GCN-NEXT: v_cndmask_b32_e64 v18, 1.0, v18, s[34:35] 2026; GCN-NEXT: v_cndmask_b32_e64 v19, 1.0, v19, s[36:37] 2027; GCN-NEXT: v_cndmask_b32_e64 v20, 1.0, v20, s[38:39] 2028; GCN-NEXT: v_cndmask_b32_e64 v21, 1.0, v21, s[40:41] 2029; GCN-NEXT: v_cndmask_b32_e64 v22, 1.0, v22, s[42:43] 2030; GCN-NEXT: v_cndmask_b32_e64 v23, 1.0, v23, s[44:45] 2031; GCN-NEXT: v_cndmask_b32_e64 v24, 1.0, v24, s[46:47] 2032; GCN-NEXT: v_cndmask_b32_e64 v25, 1.0, v25, s[48:49] 2033; GCN-NEXT: v_cndmask_b32_e64 v26, 1.0, v26, s[50:51] 2034; GCN-NEXT: v_cndmask_b32_e64 v27, 1.0, v27, s[52:53] 2035; GCN-NEXT: v_cndmask_b32_e64 v28, 1.0, v28, s[54:55] 2036; GCN-NEXT: v_cndmask_b32_e64 v29, 1.0, v29, s[56:57] 2037; GCN-NEXT: v_cndmask_b32_e64 v30, 1.0, v30, s[58:59] 2038; GCN-NEXT: v_cndmask_b32_e64 v31, 1.0, v31, s[60:61] 2039; GCN-NEXT: ; return to shader part epilog 2040entry: 2041 %v = insertelement <32 x float> %vec, float 1.000000e+00, i32 %sel 2042 ret <32 x float> %v 2043} 2044 2045define <8 x double> @double8_inselt_vec(<8 x double> %vec, i32 %sel) { 2046; GCN-LABEL: double8_inselt_vec: 2047; GCN: ; %bb.0: ; %entry 2048; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2049; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 2050; GCN-NEXT: v_mov_b32_e32 v17, 0x3ff00000 2051; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 2052; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc 2053; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 2054; GCN-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc 2055; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc 2056; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 2057; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc 2058; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v17, vcc 2059; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 2060; GCN-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc 2061; GCN-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc 2062; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 2063; GCN-NEXT: v_cndmask_b32_e64 v8, v8, 0, vcc 2064; GCN-NEXT: v_cndmask_b32_e32 v9, v9, v17, vcc 2065; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 2066; GCN-NEXT: v_cndmask_b32_e64 v10, v10, 0, vcc 2067; GCN-NEXT: v_cndmask_b32_e32 v11, v11, v17, vcc 2068; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 2069; GCN-NEXT: v_cndmask_b32_e64 v12, v12, 0, vcc 2070; GCN-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc 2071; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 2072; GCN-NEXT: v_cndmask_b32_e64 v14, v14, 0, vcc 2073; GCN-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc 2074; GCN-NEXT: s_setpc_b64 s[30:31] 2075entry: 2076 %v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel 2077 ret <8 x double> %v 2078} 2079