1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s 3 4define amdgpu_kernel void @float4_inselt(<4 x float> addrspace(1)* %out, <4 x float> %vec, i32 %sel) { 5; GCN-LABEL: float4_inselt: 6; GCN: ; %bb.0: ; %entry 7; GCN-NEXT: s_load_dword s2, s[0:1], 0x44 8; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 9; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 10; GCN-NEXT: s_waitcnt lgkmcnt(0) 11; GCN-NEXT: s_cmp_lg_u32 s2, 3 12; GCN-NEXT: v_mov_b32_e32 v0, s7 13; GCN-NEXT: s_cselect_b64 vcc, -1, 0 14; GCN-NEXT: s_cmp_lg_u32 s2, 2 15; GCN-NEXT: v_cndmask_b32_e32 v3, 1.0, v0, vcc 16; GCN-NEXT: v_mov_b32_e32 v0, s6 17; GCN-NEXT: s_cselect_b64 vcc, -1, 0 18; GCN-NEXT: s_cmp_lg_u32 s2, 1 19; GCN-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc 20; GCN-NEXT: v_mov_b32_e32 v0, s5 21; GCN-NEXT: s_cselect_b64 vcc, -1, 0 22; GCN-NEXT: s_cmp_lg_u32 s2, 0 23; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v0, vcc 24; GCN-NEXT: v_mov_b32_e32 v0, s4 25; GCN-NEXT: s_cselect_b64 vcc, -1, 0 26; GCN-NEXT: v_mov_b32_e32 v5, s1 27; GCN-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc 28; GCN-NEXT: v_mov_b32_e32 v4, s0 29; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 30; GCN-NEXT: s_endpgm 31entry: 32 %v = insertelement <4 x float> %vec, float 1.000000e+00, i32 %sel 33 store <4 x float> %v, <4 x float> addrspace(1)* %out 34 ret void 35} 36 37define amdgpu_kernel void @float4_inselt_undef(<4 x float> addrspace(1)* %out, i32 %sel) { 38; GCN-LABEL: float4_inselt_undef: 39; GCN: ; %bb.0: ; %entry 40; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 41; GCN-NEXT: v_mov_b32_e32 v0, 1.0 42; GCN-NEXT: v_mov_b32_e32 v1, v0 43; GCN-NEXT: v_mov_b32_e32 v2, v0 44; GCN-NEXT: v_mov_b32_e32 v3, v0 45; GCN-NEXT: s_waitcnt lgkmcnt(0) 46; GCN-NEXT: v_mov_b32_e32 v5, s1 47; GCN-NEXT: v_mov_b32_e32 v4, s0 48; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 49; GCN-NEXT: s_endpgm 50entry: 51 %v = insertelement <4 x float> undef, float 1.000000e+00, i32 %sel 52 store <4 x float> %v, <4 x float> addrspace(1)* %out 53 ret void 54} 55 56define amdgpu_kernel void @int4_inselt(<4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %sel) { 57; GCN-LABEL: int4_inselt: 58; GCN: ; %bb.0: ; %entry 59; GCN-NEXT: s_load_dword s2, s[0:1], 0x44 60; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 61; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 62; GCN-NEXT: s_waitcnt lgkmcnt(0) 63; GCN-NEXT: s_cmp_lg_u32 s2, 3 64; GCN-NEXT: s_cselect_b32 s3, s7, 1 65; GCN-NEXT: s_cmp_lg_u32 s2, 2 66; GCN-NEXT: s_cselect_b32 s6, s6, 1 67; GCN-NEXT: s_cmp_lg_u32 s2, 1 68; GCN-NEXT: s_cselect_b32 s5, s5, 1 69; GCN-NEXT: s_cmp_lg_u32 s2, 0 70; GCN-NEXT: s_cselect_b32 s2, s4, 1 71; GCN-NEXT: v_mov_b32_e32 v5, s1 72; GCN-NEXT: v_mov_b32_e32 v0, s2 73; GCN-NEXT: v_mov_b32_e32 v1, s5 74; GCN-NEXT: v_mov_b32_e32 v2, s6 75; GCN-NEXT: v_mov_b32_e32 v3, s3 76; GCN-NEXT: v_mov_b32_e32 v4, s0 77; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 78; GCN-NEXT: s_endpgm 79entry: 80 %v = insertelement <4 x i32> %vec, i32 1, i32 %sel 81 store <4 x i32> %v, <4 x i32> addrspace(1)* %out 82 ret void 83} 84 85define amdgpu_kernel void @float2_inselt(<2 x float> addrspace(1)* %out, <2 x float> %vec, i32 %sel) { 86; GCN-LABEL: float2_inselt: 87; GCN: ; %bb.0: ; %entry 88; GCN-NEXT: s_load_dword s4, s[0:1], 0x34 89; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 90; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 91; GCN-NEXT: s_waitcnt lgkmcnt(0) 92; GCN-NEXT: s_cmp_lg_u32 s4, 1 93; GCN-NEXT: v_mov_b32_e32 v0, s3 94; GCN-NEXT: s_cselect_b64 vcc, -1, 0 95; GCN-NEXT: s_cmp_lg_u32 s4, 0 96; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v0, vcc 97; GCN-NEXT: v_mov_b32_e32 v0, s2 98; GCN-NEXT: s_cselect_b64 vcc, -1, 0 99; GCN-NEXT: v_mov_b32_e32 v3, s1 100; GCN-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc 101; GCN-NEXT: v_mov_b32_e32 v2, s0 102; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 103; GCN-NEXT: s_endpgm 104entry: 105 %v = insertelement <2 x float> %vec, float 1.000000e+00, i32 %sel 106 store <2 x float> %v, <2 x float> addrspace(1)* %out 107 ret void 108} 109 110define amdgpu_kernel void @float8_inselt(<8 x float> addrspace(1)* %out, <8 x float> %vec, i32 %sel) { 111; GCN-LABEL: float8_inselt: 112; GCN: ; %bb.0: ; %entry 113; GCN-NEXT: s_load_dword s2, s[0:1], 0x64 114; GCN-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x44 115; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 116; GCN-NEXT: s_waitcnt lgkmcnt(0) 117; GCN-NEXT: s_cmp_lg_u32 s2, 3 118; GCN-NEXT: v_mov_b32_e32 v0, s7 119; GCN-NEXT: s_cselect_b64 vcc, -1, 0 120; GCN-NEXT: s_cmp_lg_u32 s2, 2 121; GCN-NEXT: v_cndmask_b32_e32 v3, 1.0, v0, vcc 122; GCN-NEXT: v_mov_b32_e32 v0, s6 123; GCN-NEXT: s_cselect_b64 vcc, -1, 0 124; GCN-NEXT: s_cmp_lg_u32 s2, 1 125; GCN-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc 126; GCN-NEXT: v_mov_b32_e32 v0, s5 127; GCN-NEXT: s_cselect_b64 vcc, -1, 0 128; GCN-NEXT: s_cmp_lg_u32 s2, 0 129; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v0, vcc 130; GCN-NEXT: v_mov_b32_e32 v0, s4 131; GCN-NEXT: s_cselect_b64 vcc, -1, 0 132; GCN-NEXT: s_cmp_lg_u32 s2, 7 133; GCN-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc 134; GCN-NEXT: v_mov_b32_e32 v4, s11 135; GCN-NEXT: s_cselect_b64 vcc, -1, 0 136; GCN-NEXT: s_cmp_lg_u32 s2, 6 137; GCN-NEXT: v_cndmask_b32_e32 v7, 1.0, v4, vcc 138; GCN-NEXT: v_mov_b32_e32 v4, s10 139; GCN-NEXT: s_cselect_b64 vcc, -1, 0 140; GCN-NEXT: s_cmp_lg_u32 s2, 5 141; GCN-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc 142; GCN-NEXT: v_mov_b32_e32 v4, s9 143; GCN-NEXT: s_cselect_b64 vcc, -1, 0 144; GCN-NEXT: s_cmp_lg_u32 s2, 4 145; GCN-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc 146; GCN-NEXT: s_cselect_b64 vcc, -1, 0 147; GCN-NEXT: s_add_u32 s2, s0, 16 148; GCN-NEXT: s_addc_u32 s3, s1, 0 149; GCN-NEXT: v_mov_b32_e32 v4, s8 150; GCN-NEXT: v_mov_b32_e32 v9, s3 151; GCN-NEXT: v_cndmask_b32_e32 v4, 1.0, v4, vcc 152; GCN-NEXT: v_mov_b32_e32 v8, s2 153; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 154; GCN-NEXT: s_nop 0 155; GCN-NEXT: v_mov_b32_e32 v5, s1 156; GCN-NEXT: v_mov_b32_e32 v4, s0 157; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 158; GCN-NEXT: s_endpgm 159entry: 160 %v = insertelement <8 x float> %vec, float 1.000000e+00, i32 %sel 161 store <8 x float> %v, <8 x float> addrspace(1)* %out 162 ret void 163} 164 165define amdgpu_kernel void @float16_inselt(<16 x float> addrspace(1)* %out, <16 x float> %vec, i32 %sel) { 166; GCN-LABEL: float16_inselt: 167; GCN: ; %bb.0: ; %entry 168; GCN-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 169; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 170; GCN-NEXT: s_load_dword s20, s[0:1], 0xa4 171; GCN-NEXT: s_waitcnt lgkmcnt(0) 172; GCN-NEXT: v_mov_b32_e32 v0, s4 173; GCN-NEXT: s_add_u32 s0, s2, 48 174; GCN-NEXT: s_addc_u32 s1, s3, 0 175; GCN-NEXT: v_mov_b32_e32 v17, s1 176; GCN-NEXT: v_mov_b32_e32 v1, s5 177; GCN-NEXT: v_mov_b32_e32 v2, s6 178; GCN-NEXT: v_mov_b32_e32 v3, s7 179; GCN-NEXT: v_mov_b32_e32 v4, s8 180; GCN-NEXT: v_mov_b32_e32 v5, s9 181; GCN-NEXT: v_mov_b32_e32 v6, s10 182; GCN-NEXT: v_mov_b32_e32 v7, s11 183; GCN-NEXT: v_mov_b32_e32 v8, s12 184; GCN-NEXT: v_mov_b32_e32 v9, s13 185; GCN-NEXT: v_mov_b32_e32 v10, s14 186; GCN-NEXT: v_mov_b32_e32 v11, s15 187; GCN-NEXT: v_mov_b32_e32 v12, s16 188; GCN-NEXT: v_mov_b32_e32 v13, s17 189; GCN-NEXT: v_mov_b32_e32 v14, s18 190; GCN-NEXT: v_mov_b32_e32 v15, s19 191; GCN-NEXT: s_mov_b32 m0, s20 192; GCN-NEXT: v_mov_b32_e32 v16, s0 193; GCN-NEXT: s_add_u32 s0, s2, 32 194; GCN-NEXT: v_movreld_b32_e32 v0, 1.0 195; GCN-NEXT: s_addc_u32 s1, s3, 0 196; GCN-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 197; GCN-NEXT: s_nop 0 198; GCN-NEXT: v_mov_b32_e32 v13, s1 199; GCN-NEXT: v_mov_b32_e32 v12, s0 200; GCN-NEXT: s_add_u32 s0, s2, 16 201; GCN-NEXT: s_addc_u32 s1, s3, 0 202; GCN-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 203; GCN-NEXT: s_nop 0 204; GCN-NEXT: v_mov_b32_e32 v9, s1 205; GCN-NEXT: v_mov_b32_e32 v8, s0 206; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 207; GCN-NEXT: s_nop 0 208; GCN-NEXT: v_mov_b32_e32 v5, s3 209; GCN-NEXT: v_mov_b32_e32 v4, s2 210; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 211; GCN-NEXT: s_endpgm 212entry: 213 %v = insertelement <16 x float> %vec, float 1.000000e+00, i32 %sel 214 store <16 x float> %v, <16 x float> addrspace(1)* %out 215 ret void 216} 217 218define amdgpu_kernel void @float32_inselt(<32 x float> addrspace(1)* %out, <32 x float> %vec, i32 %sel) { 219; GCN-LABEL: float32_inselt: 220; GCN: ; %bb.0: ; %entry 221; GCN-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0xa4 222; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 223; GCN-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0xe4 224; GCN-NEXT: s_load_dword s0, s[0:1], 0x124 225; GCN-NEXT: s_waitcnt lgkmcnt(0) 226; GCN-NEXT: v_mov_b32_e32 v0, s36 227; GCN-NEXT: v_mov_b32_e32 v1, s37 228; GCN-NEXT: v_mov_b32_e32 v2, s38 229; GCN-NEXT: s_mov_b32 m0, s0 230; GCN-NEXT: s_add_u32 s0, s2, 0x70 231; GCN-NEXT: s_addc_u32 s1, s3, 0 232; GCN-NEXT: v_mov_b32_e32 v33, s1 233; GCN-NEXT: v_mov_b32_e32 v3, s39 234; GCN-NEXT: v_mov_b32_e32 v4, s40 235; GCN-NEXT: v_mov_b32_e32 v5, s41 236; GCN-NEXT: v_mov_b32_e32 v6, s42 237; GCN-NEXT: v_mov_b32_e32 v7, s43 238; GCN-NEXT: v_mov_b32_e32 v8, s44 239; GCN-NEXT: v_mov_b32_e32 v9, s45 240; GCN-NEXT: v_mov_b32_e32 v10, s46 241; GCN-NEXT: v_mov_b32_e32 v11, s47 242; GCN-NEXT: v_mov_b32_e32 v12, s48 243; GCN-NEXT: v_mov_b32_e32 v13, s49 244; GCN-NEXT: v_mov_b32_e32 v14, s50 245; GCN-NEXT: v_mov_b32_e32 v15, s51 246; GCN-NEXT: v_mov_b32_e32 v16, s4 247; GCN-NEXT: v_mov_b32_e32 v17, s5 248; GCN-NEXT: v_mov_b32_e32 v18, s6 249; GCN-NEXT: v_mov_b32_e32 v19, s7 250; GCN-NEXT: v_mov_b32_e32 v20, s8 251; GCN-NEXT: v_mov_b32_e32 v21, s9 252; GCN-NEXT: v_mov_b32_e32 v22, s10 253; GCN-NEXT: v_mov_b32_e32 v23, s11 254; GCN-NEXT: v_mov_b32_e32 v24, s12 255; GCN-NEXT: v_mov_b32_e32 v25, s13 256; GCN-NEXT: v_mov_b32_e32 v26, s14 257; GCN-NEXT: v_mov_b32_e32 v27, s15 258; GCN-NEXT: v_mov_b32_e32 v28, s16 259; GCN-NEXT: v_mov_b32_e32 v29, s17 260; GCN-NEXT: v_mov_b32_e32 v30, s18 261; GCN-NEXT: v_mov_b32_e32 v31, s19 262; GCN-NEXT: v_mov_b32_e32 v32, s0 263; GCN-NEXT: s_add_u32 s0, s2, 0x60 264; GCN-NEXT: v_movreld_b32_e32 v0, 1.0 265; GCN-NEXT: s_addc_u32 s1, s3, 0 266; GCN-NEXT: flat_store_dwordx4 v[32:33], v[28:31] 267; GCN-NEXT: s_nop 0 268; GCN-NEXT: v_mov_b32_e32 v29, s1 269; GCN-NEXT: v_mov_b32_e32 v28, s0 270; GCN-NEXT: s_add_u32 s0, s2, 0x50 271; GCN-NEXT: s_addc_u32 s1, s3, 0 272; GCN-NEXT: flat_store_dwordx4 v[28:29], v[24:27] 273; GCN-NEXT: s_nop 0 274; GCN-NEXT: v_mov_b32_e32 v25, s1 275; GCN-NEXT: v_mov_b32_e32 v24, s0 276; GCN-NEXT: s_add_u32 s0, s2, 64 277; GCN-NEXT: s_addc_u32 s1, s3, 0 278; GCN-NEXT: flat_store_dwordx4 v[24:25], v[20:23] 279; GCN-NEXT: s_nop 0 280; GCN-NEXT: v_mov_b32_e32 v21, s1 281; GCN-NEXT: v_mov_b32_e32 v20, s0 282; GCN-NEXT: s_add_u32 s0, s2, 48 283; GCN-NEXT: s_addc_u32 s1, s3, 0 284; GCN-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 285; GCN-NEXT: s_nop 0 286; GCN-NEXT: v_mov_b32_e32 v17, s1 287; GCN-NEXT: v_mov_b32_e32 v16, s0 288; GCN-NEXT: s_add_u32 s0, s2, 32 289; GCN-NEXT: s_addc_u32 s1, s3, 0 290; GCN-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 291; GCN-NEXT: s_nop 0 292; GCN-NEXT: v_mov_b32_e32 v13, s1 293; GCN-NEXT: v_mov_b32_e32 v12, s0 294; GCN-NEXT: s_add_u32 s0, s2, 16 295; GCN-NEXT: s_addc_u32 s1, s3, 0 296; GCN-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 297; GCN-NEXT: s_nop 0 298; GCN-NEXT: v_mov_b32_e32 v9, s1 299; GCN-NEXT: v_mov_b32_e32 v8, s0 300; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 301; GCN-NEXT: s_nop 0 302; GCN-NEXT: v_mov_b32_e32 v5, s3 303; GCN-NEXT: v_mov_b32_e32 v4, s2 304; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 305; GCN-NEXT: s_endpgm 306entry: 307 %v = insertelement <32 x float> %vec, float 1.000000e+00, i32 %sel 308 store <32 x float> %v, <32 x float> addrspace(1)* %out 309 ret void 310} 311 312define amdgpu_kernel void @half4_inselt(<4 x half> addrspace(1)* %out, <4 x half> %vec, i32 %sel) { 313; GCN-LABEL: half4_inselt: 314; GCN: ; %bb.0: ; %entry 315; GCN-NEXT: s_load_dword s6, s[0:1], 0x34 316; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 317; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c 318; GCN-NEXT: s_mov_b64 s[4:5], 0xffff 319; GCN-NEXT: s_waitcnt lgkmcnt(0) 320; GCN-NEXT: s_lshl_b32 s6, s6, 4 321; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], s6 322; GCN-NEXT: s_mov_b32 s6, 0x3c003c00 323; GCN-NEXT: s_mov_b32 s7, s6 324; GCN-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 325; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 326; GCN-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 327; GCN-NEXT: v_mov_b32_e32 v0, s2 328; GCN-NEXT: v_mov_b32_e32 v3, s1 329; GCN-NEXT: v_mov_b32_e32 v1, s3 330; GCN-NEXT: v_mov_b32_e32 v2, s0 331; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 332; GCN-NEXT: s_endpgm 333entry: 334 %v = insertelement <4 x half> %vec, half 1.000000e+00, i32 %sel 335 store <4 x half> %v, <4 x half> addrspace(1)* %out 336 ret void 337} 338 339define amdgpu_kernel void @half2_inselt(<2 x half> addrspace(1)* %out, <2 x half> %vec, i32 %sel) { 340; GCN-LABEL: half2_inselt: 341; GCN: ; %bb.0: ; %entry 342; GCN-NEXT: s_load_dword s2, s[0:1], 0x30 343; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c 344; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 345; GCN-NEXT: s_waitcnt lgkmcnt(0) 346; GCN-NEXT: s_lshl_b32 s2, s2, 4 347; GCN-NEXT: s_lshl_b32 s2, 0xffff, s2 348; GCN-NEXT: s_andn2_b32 s3, s3, s2 349; GCN-NEXT: s_and_b32 s2, s2, 0x3c003c00 350; GCN-NEXT: s_or_b32 s2, s2, s3 351; GCN-NEXT: v_mov_b32_e32 v0, s0 352; GCN-NEXT: v_mov_b32_e32 v1, s1 353; GCN-NEXT: v_mov_b32_e32 v2, s2 354; GCN-NEXT: flat_store_dword v[0:1], v2 355; GCN-NEXT: s_endpgm 356entry: 357 %v = insertelement <2 x half> %vec, half 1.000000e+00, i32 %sel 358 store <2 x half> %v, <2 x half> addrspace(1)* %out 359 ret void 360} 361 362define amdgpu_kernel void @half8_inselt(<8 x half> addrspace(1)* %out, <8 x half> %vec, i32 %sel) { 363; GCN-LABEL: half8_inselt: 364; GCN: ; %bb.0: ; %entry 365; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 366; GCN-NEXT: s_load_dword s2, s[0:1], 0x44 367; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 368; GCN-NEXT: v_mov_b32_e32 v0, 0x3c00 369; GCN-NEXT: s_waitcnt lgkmcnt(0) 370; GCN-NEXT: s_lshr_b32 s3, s7, 16 371; GCN-NEXT: s_cmp_lg_u32 s2, 7 372; GCN-NEXT: v_mov_b32_e32 v1, s3 373; GCN-NEXT: s_cselect_b64 vcc, -1, 0 374; GCN-NEXT: s_cmp_lg_u32 s2, 6 375; GCN-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 376; GCN-NEXT: v_mov_b32_e32 v2, s7 377; GCN-NEXT: s_cselect_b64 vcc, -1, 0 378; GCN-NEXT: s_lshr_b32 s3, s6, 16 379; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 380; GCN-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 381; GCN-NEXT: s_cmp_lg_u32 s2, 5 382; GCN-NEXT: v_or_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 383; GCN-NEXT: v_mov_b32_e32 v1, s3 384; GCN-NEXT: s_cselect_b64 vcc, -1, 0 385; GCN-NEXT: s_cmp_lg_u32 s2, 4 386; GCN-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 387; GCN-NEXT: v_mov_b32_e32 v2, s6 388; GCN-NEXT: s_cselect_b64 vcc, -1, 0 389; GCN-NEXT: s_lshr_b32 s3, s5, 16 390; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 391; GCN-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 392; GCN-NEXT: s_cmp_lg_u32 s2, 3 393; GCN-NEXT: v_or_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 394; GCN-NEXT: v_mov_b32_e32 v1, s3 395; GCN-NEXT: s_cselect_b64 vcc, -1, 0 396; GCN-NEXT: s_cmp_lg_u32 s2, 2 397; GCN-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 398; GCN-NEXT: v_mov_b32_e32 v4, s5 399; GCN-NEXT: s_cselect_b64 vcc, -1, 0 400; GCN-NEXT: s_lshr_b32 s3, s4, 16 401; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 402; GCN-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc 403; GCN-NEXT: s_cmp_lg_u32 s2, 1 404; GCN-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 405; GCN-NEXT: v_mov_b32_e32 v4, s3 406; GCN-NEXT: s_cselect_b64 vcc, -1, 0 407; GCN-NEXT: s_cmp_lg_u32 s2, 0 408; GCN-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc 409; GCN-NEXT: v_mov_b32_e32 v5, s4 410; GCN-NEXT: s_cselect_b64 vcc, -1, 0 411; GCN-NEXT: v_lshlrev_b32_e32 v4, 16, v4 412; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 413; GCN-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 414; GCN-NEXT: v_mov_b32_e32 v5, s1 415; GCN-NEXT: v_mov_b32_e32 v4, s0 416; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 417; GCN-NEXT: s_endpgm 418entry: 419 %v = insertelement <8 x half> %vec, half 1.000000e+00, i32 %sel 420 store <8 x half> %v, <8 x half> addrspace(1)* %out 421 ret void 422} 423 424define amdgpu_kernel void @short2_inselt(<2 x i16> addrspace(1)* %out, <2 x i16> %vec, i32 %sel) { 425; GCN-LABEL: short2_inselt: 426; GCN: ; %bb.0: ; %entry 427; GCN-NEXT: s_load_dword s2, s[0:1], 0x30 428; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c 429; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 430; GCN-NEXT: s_waitcnt lgkmcnt(0) 431; GCN-NEXT: s_lshl_b32 s2, s2, 4 432; GCN-NEXT: s_lshl_b32 s2, 0xffff, s2 433; GCN-NEXT: s_andn2_b32 s3, s3, s2 434; GCN-NEXT: s_and_b32 s2, s2, 0x10001 435; GCN-NEXT: s_or_b32 s2, s2, s3 436; GCN-NEXT: v_mov_b32_e32 v0, s0 437; GCN-NEXT: v_mov_b32_e32 v1, s1 438; GCN-NEXT: v_mov_b32_e32 v2, s2 439; GCN-NEXT: flat_store_dword v[0:1], v2 440; GCN-NEXT: s_endpgm 441entry: 442 %v = insertelement <2 x i16> %vec, i16 1, i32 %sel 443 store <2 x i16> %v, <2 x i16> addrspace(1)* %out 444 ret void 445} 446 447define amdgpu_kernel void @short4_inselt(<4 x i16> addrspace(1)* %out, <4 x i16> %vec, i32 %sel) { 448; GCN-LABEL: short4_inselt: 449; GCN: ; %bb.0: ; %entry 450; GCN-NEXT: s_load_dword s6, s[0:1], 0x34 451; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 452; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c 453; GCN-NEXT: s_mov_b64 s[4:5], 0xffff 454; GCN-NEXT: s_waitcnt lgkmcnt(0) 455; GCN-NEXT: s_lshl_b32 s6, s6, 4 456; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], s6 457; GCN-NEXT: s_mov_b32 s6, 0x10001 458; GCN-NEXT: s_mov_b32 s7, s6 459; GCN-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 460; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 461; GCN-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 462; GCN-NEXT: v_mov_b32_e32 v0, s2 463; GCN-NEXT: v_mov_b32_e32 v3, s1 464; GCN-NEXT: v_mov_b32_e32 v1, s3 465; GCN-NEXT: v_mov_b32_e32 v2, s0 466; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 467; GCN-NEXT: s_endpgm 468entry: 469 %v = insertelement <4 x i16> %vec, i16 1, i32 %sel 470 store <4 x i16> %v, <4 x i16> addrspace(1)* %out 471 ret void 472} 473 474define amdgpu_kernel void @byte8_inselt(<8 x i8> addrspace(1)* %out, <8 x i8> %vec, i32 %sel) { 475; GCN-LABEL: byte8_inselt: 476; GCN: ; %bb.0: ; %entry 477; GCN-NEXT: s_load_dword s6, s[0:1], 0x34 478; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 479; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c 480; GCN-NEXT: s_mov_b64 s[4:5], 0xffff 481; GCN-NEXT: s_waitcnt lgkmcnt(0) 482; GCN-NEXT: s_lshl_b32 s6, s6, 3 483; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], s6 484; GCN-NEXT: s_mov_b32 s6, 0x1010101 485; GCN-NEXT: s_and_b32 s7, s5, s6 486; GCN-NEXT: s_and_b32 s6, s4, s6 487; GCN-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 488; GCN-NEXT: s_or_b64 s[0:1], s[6:7], s[0:1] 489; GCN-NEXT: v_mov_b32_e32 v2, s2 490; GCN-NEXT: v_mov_b32_e32 v0, s0 491; GCN-NEXT: v_mov_b32_e32 v1, s1 492; GCN-NEXT: v_mov_b32_e32 v3, s3 493; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 494; GCN-NEXT: s_endpgm 495entry: 496 %v = insertelement <8 x i8> %vec, i8 1, i32 %sel 497 store <8 x i8> %v, <8 x i8> addrspace(1)* %out 498 ret void 499} 500 501define amdgpu_kernel void @byte16_inselt(<16 x i8> addrspace(1)* %out, <16 x i8> %vec, i32 %sel) { 502; GCN-LABEL: byte16_inselt: 503; GCN: ; %bb.0: ; %entry 504; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 505; GCN-NEXT: s_load_dword s2, s[0:1], 0x44 506; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 507; GCN-NEXT: s_waitcnt lgkmcnt(0) 508; GCN-NEXT: s_lshr_b32 s3, s7, 24 509; GCN-NEXT: s_cmp_lg_u32 s2, 15 510; GCN-NEXT: v_mov_b32_e32 v0, s3 511; GCN-NEXT: s_cselect_b64 vcc, -1, 0 512; GCN-NEXT: s_lshr_b32 s3, s7, 16 513; GCN-NEXT: s_cmp_lg_u32 s2, 14 514; GCN-NEXT: v_cndmask_b32_e32 v0, 1, v0, vcc 515; GCN-NEXT: v_mov_b32_e32 v1, s3 516; GCN-NEXT: s_cselect_b64 vcc, -1, 0 517; GCN-NEXT: s_lshr_b32 s3, s7, 8 518; GCN-NEXT: v_lshlrev_b16_e32 v0, 8, v0 519; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 520; GCN-NEXT: s_cmp_lg_u32 s2, 13 521; GCN-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 522; GCN-NEXT: v_mov_b32_e32 v1, s3 523; GCN-NEXT: s_cselect_b64 vcc, -1, 0 524; GCN-NEXT: s_cmp_lg_u32 s2, 12 525; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 526; GCN-NEXT: v_mov_b32_e32 v2, s7 527; GCN-NEXT: s_cselect_b64 vcc, -1, 0 528; GCN-NEXT: v_lshlrev_b16_e32 v1, 8, v1 529; GCN-NEXT: v_cndmask_b32_e32 v2, 1, v2, vcc 530; GCN-NEXT: s_lshr_b32 s3, s6, 24 531; GCN-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 532; GCN-NEXT: s_cmp_lg_u32 s2, 11 533; GCN-NEXT: v_or_b32_sdwa v3, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 534; GCN-NEXT: v_mov_b32_e32 v0, s3 535; GCN-NEXT: s_cselect_b64 vcc, -1, 0 536; GCN-NEXT: s_lshr_b32 s3, s6, 16 537; GCN-NEXT: s_cmp_lg_u32 s2, 10 538; GCN-NEXT: v_cndmask_b32_e32 v0, 1, v0, vcc 539; GCN-NEXT: v_mov_b32_e32 v1, s3 540; GCN-NEXT: s_cselect_b64 vcc, -1, 0 541; GCN-NEXT: s_lshr_b32 s3, s6, 8 542; GCN-NEXT: v_lshlrev_b16_e32 v0, 8, v0 543; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 544; GCN-NEXT: s_cmp_lg_u32 s2, 9 545; GCN-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 546; GCN-NEXT: v_mov_b32_e32 v1, s3 547; GCN-NEXT: s_cselect_b64 vcc, -1, 0 548; GCN-NEXT: s_cmp_lg_u32 s2, 8 549; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 550; GCN-NEXT: v_mov_b32_e32 v2, s6 551; GCN-NEXT: s_cselect_b64 vcc, -1, 0 552; GCN-NEXT: v_lshlrev_b16_e32 v1, 8, v1 553; GCN-NEXT: v_cndmask_b32_e32 v2, 1, v2, vcc 554; GCN-NEXT: s_lshr_b32 s3, s5, 24 555; GCN-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 556; GCN-NEXT: s_cmp_lg_u32 s2, 7 557; GCN-NEXT: v_or_b32_sdwa v2, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 558; GCN-NEXT: v_mov_b32_e32 v0, s3 559; GCN-NEXT: s_cselect_b64 vcc, -1, 0 560; GCN-NEXT: s_lshr_b32 s3, s5, 16 561; GCN-NEXT: s_cmp_lg_u32 s2, 6 562; GCN-NEXT: v_cndmask_b32_e32 v0, 1, v0, vcc 563; GCN-NEXT: v_mov_b32_e32 v1, s3 564; GCN-NEXT: s_cselect_b64 vcc, -1, 0 565; GCN-NEXT: s_lshr_b32 s3, s5, 8 566; GCN-NEXT: v_lshlrev_b16_e32 v0, 8, v0 567; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 568; GCN-NEXT: s_cmp_lg_u32 s2, 5 569; GCN-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 570; GCN-NEXT: v_mov_b32_e32 v1, s3 571; GCN-NEXT: s_cselect_b64 vcc, -1, 0 572; GCN-NEXT: s_cmp_lg_u32 s2, 4 573; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 574; GCN-NEXT: v_mov_b32_e32 v4, s5 575; GCN-NEXT: s_cselect_b64 vcc, -1, 0 576; GCN-NEXT: v_lshlrev_b16_e32 v1, 8, v1 577; GCN-NEXT: v_cndmask_b32_e32 v4, 1, v4, vcc 578; GCN-NEXT: s_lshr_b32 s3, s4, 24 579; GCN-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 580; GCN-NEXT: s_cmp_lg_u32 s2, 3 581; GCN-NEXT: v_or_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 582; GCN-NEXT: v_mov_b32_e32 v0, s3 583; GCN-NEXT: s_cselect_b64 vcc, -1, 0 584; GCN-NEXT: s_lshr_b32 s3, s4, 16 585; GCN-NEXT: s_cmp_lg_u32 s2, 2 586; GCN-NEXT: v_cndmask_b32_e32 v0, 1, v0, vcc 587; GCN-NEXT: v_mov_b32_e32 v4, s3 588; GCN-NEXT: s_cselect_b64 vcc, -1, 0 589; GCN-NEXT: s_lshr_b32 s3, s4, 8 590; GCN-NEXT: v_lshlrev_b16_e32 v0, 8, v0 591; GCN-NEXT: v_cndmask_b32_e32 v4, 1, v4, vcc 592; GCN-NEXT: s_cmp_lg_u32 s2, 1 593; GCN-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 594; GCN-NEXT: v_mov_b32_e32 v4, s3 595; GCN-NEXT: s_cselect_b64 vcc, -1, 0 596; GCN-NEXT: s_cmp_lg_u32 s2, 0 597; GCN-NEXT: v_cndmask_b32_e32 v4, 1, v4, vcc 598; GCN-NEXT: v_mov_b32_e32 v5, s4 599; GCN-NEXT: s_cselect_b64 vcc, -1, 0 600; GCN-NEXT: v_lshlrev_b16_e32 v4, 8, v4 601; GCN-NEXT: v_cndmask_b32_e32 v5, 1, v5, vcc 602; GCN-NEXT: v_or_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 603; GCN-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 604; GCN-NEXT: v_mov_b32_e32 v5, s1 605; GCN-NEXT: v_mov_b32_e32 v4, s0 606; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 607; GCN-NEXT: s_endpgm 608entry: 609 %v = insertelement <16 x i8> %vec, i8 1, i32 %sel 610 store <16 x i8> %v, <16 x i8> addrspace(1)* %out 611 ret void 612} 613 614define amdgpu_kernel void @double2_inselt(<2 x double> addrspace(1)* %out, <2 x double> %vec, i32 %sel) { 615; GCN-LABEL: double2_inselt: 616; GCN: ; %bb.0: ; %entry 617; GCN-NEXT: s_load_dword s2, s[0:1], 0x44 618; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 619; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 620; GCN-NEXT: v_mov_b32_e32 v0, 0x3ff00000 621; GCN-NEXT: s_waitcnt lgkmcnt(0) 622; GCN-NEXT: s_cmp_eq_u32 s2, 1 623; GCN-NEXT: v_mov_b32_e32 v1, s7 624; GCN-NEXT: s_cselect_b64 vcc, -1, 0 625; GCN-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc 626; GCN-NEXT: v_mov_b32_e32 v1, s6 627; GCN-NEXT: s_cmp_eq_u32 s2, 0 628; GCN-NEXT: v_cndmask_b32_e64 v2, v1, 0, vcc 629; GCN-NEXT: v_mov_b32_e32 v1, s5 630; GCN-NEXT: s_cselect_b64 vcc, -1, 0 631; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 632; GCN-NEXT: v_mov_b32_e32 v0, s4 633; GCN-NEXT: v_mov_b32_e32 v5, s1 634; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 635; GCN-NEXT: v_mov_b32_e32 v4, s0 636; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 637; GCN-NEXT: s_endpgm 638entry: 639 %v = insertelement <2 x double> %vec, double 1.000000e+00, i32 %sel 640 store <2 x double> %v, <2 x double> addrspace(1)* %out 641 ret void 642} 643 644define amdgpu_kernel void @double5_inselt(<5 x double> addrspace(1)* %out, <5 x double> %vec, i32 %sel) { 645; GCN-LABEL: double5_inselt: 646; GCN: ; %bb.0: ; %entry 647; GCN-NEXT: s_load_dword s12, s[0:1], 0xa4 648; GCN-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x84 649; GCN-NEXT: s_load_dwordx2 s[10:11], s[0:1], 0x24 650; GCN-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x64 651; GCN-NEXT: v_mov_b32_e32 v4, 0x3ff00000 652; GCN-NEXT: s_waitcnt lgkmcnt(0) 653; GCN-NEXT: s_cmp_eq_u32 s12, 4 654; GCN-NEXT: v_mov_b32_e32 v0, s9 655; GCN-NEXT: s_cselect_b64 vcc, -1, 0 656; GCN-NEXT: v_cndmask_b32_e32 v9, v0, v4, vcc 657; GCN-NEXT: v_mov_b32_e32 v0, s8 658; GCN-NEXT: s_cmp_eq_u32 s12, 1 659; GCN-NEXT: v_cndmask_b32_e64 v8, v0, 0, vcc 660; GCN-NEXT: v_mov_b32_e32 v0, s3 661; GCN-NEXT: s_cselect_b64 vcc, -1, 0 662; GCN-NEXT: v_cndmask_b32_e32 v3, v0, v4, vcc 663; GCN-NEXT: v_mov_b32_e32 v0, s2 664; GCN-NEXT: s_cmp_eq_u32 s12, 0 665; GCN-NEXT: v_cndmask_b32_e64 v2, v0, 0, vcc 666; GCN-NEXT: v_mov_b32_e32 v0, s1 667; GCN-NEXT: s_cselect_b64 vcc, -1, 0 668; GCN-NEXT: v_cndmask_b32_e32 v1, v0, v4, vcc 669; GCN-NEXT: v_mov_b32_e32 v0, s0 670; GCN-NEXT: s_cmp_eq_u32 s12, 3 671; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 672; GCN-NEXT: v_mov_b32_e32 v5, s7 673; GCN-NEXT: s_cselect_b64 vcc, -1, 0 674; GCN-NEXT: v_cndmask_b32_e32 v7, v5, v4, vcc 675; GCN-NEXT: v_mov_b32_e32 v5, s6 676; GCN-NEXT: s_cmp_eq_u32 s12, 2 677; GCN-NEXT: v_cndmask_b32_e64 v6, v5, 0, vcc 678; GCN-NEXT: s_cselect_b64 vcc, -1, 0 679; GCN-NEXT: s_add_u32 s0, s10, 16 680; GCN-NEXT: v_mov_b32_e32 v5, s5 681; GCN-NEXT: s_addc_u32 s1, s11, 0 682; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc 683; GCN-NEXT: v_mov_b32_e32 v4, s4 684; GCN-NEXT: v_mov_b32_e32 v11, s1 685; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc 686; GCN-NEXT: v_mov_b32_e32 v10, s0 687; GCN-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 688; GCN-NEXT: s_add_u32 s0, s10, 32 689; GCN-NEXT: v_mov_b32_e32 v4, s10 690; GCN-NEXT: v_mov_b32_e32 v5, s11 691; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 692; GCN-NEXT: s_addc_u32 s1, s11, 0 693; GCN-NEXT: v_mov_b32_e32 v0, s0 694; GCN-NEXT: v_mov_b32_e32 v1, s1 695; GCN-NEXT: flat_store_dwordx2 v[0:1], v[8:9] 696; GCN-NEXT: s_endpgm 697entry: 698 %v = insertelement <5 x double> %vec, double 1.000000e+00, i32 %sel 699 store <5 x double> %v, <5 x double> addrspace(1)* %out 700 ret void 701} 702 703define amdgpu_kernel void @double8_inselt(<8 x double> addrspace(1)* %out, <8 x double> %vec, i32 %sel) { 704; GCN-LABEL: double8_inselt: 705; GCN: ; %bb.0: ; %entry 706; GCN-NEXT: s_load_dword s2, s[0:1], 0xa4 707; GCN-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 708; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 709; GCN-NEXT: v_mov_b32_e32 v16, 0x3ff00000 710; GCN-NEXT: s_waitcnt lgkmcnt(0) 711; GCN-NEXT: s_lshl_b32 s2, s2, 1 712; GCN-NEXT: v_mov_b32_e32 v0, s4 713; GCN-NEXT: v_mov_b32_e32 v1, s5 714; GCN-NEXT: v_mov_b32_e32 v2, s6 715; GCN-NEXT: v_mov_b32_e32 v3, s7 716; GCN-NEXT: v_mov_b32_e32 v4, s8 717; GCN-NEXT: v_mov_b32_e32 v5, s9 718; GCN-NEXT: v_mov_b32_e32 v6, s10 719; GCN-NEXT: v_mov_b32_e32 v7, s11 720; GCN-NEXT: v_mov_b32_e32 v8, s12 721; GCN-NEXT: v_mov_b32_e32 v9, s13 722; GCN-NEXT: v_mov_b32_e32 v10, s14 723; GCN-NEXT: v_mov_b32_e32 v11, s15 724; GCN-NEXT: v_mov_b32_e32 v12, s16 725; GCN-NEXT: v_mov_b32_e32 v13, s17 726; GCN-NEXT: v_mov_b32_e32 v14, s18 727; GCN-NEXT: v_mov_b32_e32 v15, s19 728; GCN-NEXT: s_mov_b32 m0, s2 729; GCN-NEXT: s_add_u32 s2, s0, 48 730; GCN-NEXT: v_movreld_b32_e32 v0, 0 731; GCN-NEXT: s_addc_u32 s3, s1, 0 732; GCN-NEXT: v_movreld_b32_e32 v1, v16 733; GCN-NEXT: v_mov_b32_e32 v17, s3 734; GCN-NEXT: v_mov_b32_e32 v16, s2 735; GCN-NEXT: s_add_u32 s2, s0, 32 736; GCN-NEXT: s_addc_u32 s3, s1, 0 737; GCN-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 738; GCN-NEXT: s_nop 0 739; GCN-NEXT: v_mov_b32_e32 v13, s3 740; GCN-NEXT: v_mov_b32_e32 v12, s2 741; GCN-NEXT: s_add_u32 s2, s0, 16 742; GCN-NEXT: s_addc_u32 s3, s1, 0 743; GCN-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 744; GCN-NEXT: s_nop 0 745; GCN-NEXT: v_mov_b32_e32 v9, s3 746; GCN-NEXT: v_mov_b32_e32 v8, s2 747; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 748; GCN-NEXT: s_nop 0 749; GCN-NEXT: v_mov_b32_e32 v5, s1 750; GCN-NEXT: v_mov_b32_e32 v4, s0 751; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 752; GCN-NEXT: s_endpgm 753entry: 754 %v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel 755 store <8 x double> %v, <8 x double> addrspace(1)* %out 756 ret void 757} 758 759define amdgpu_kernel void @double7_inselt(<7 x double> addrspace(1)* %out, <7 x double> %vec, i32 %sel) { 760; GCN-LABEL: double7_inselt: 761; GCN: ; %bb.0: ; %entry 762; GCN-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x64 763; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 764; GCN-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x94 765; GCN-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x84 766; GCN-NEXT: s_load_dword s0, s[0:1], 0xa4 767; GCN-NEXT: s_waitcnt lgkmcnt(0) 768; GCN-NEXT: v_mov_b32_e32 v0, s4 769; GCN-NEXT: v_mov_b32_e32 v1, s5 770; GCN-NEXT: v_mov_b32_e32 v2, s6 771; GCN-NEXT: v_mov_b32_e32 v3, s7 772; GCN-NEXT: s_lshl_b32 s0, s0, 1 773; GCN-NEXT: v_mov_b32_e32 v4, s8 774; GCN-NEXT: v_mov_b32_e32 v5, s9 775; GCN-NEXT: v_mov_b32_e32 v6, s10 776; GCN-NEXT: v_mov_b32_e32 v7, s11 777; GCN-NEXT: v_mov_b32_e32 v8, s12 778; GCN-NEXT: v_mov_b32_e32 v9, s13 779; GCN-NEXT: v_mov_b32_e32 v10, s14 780; GCN-NEXT: v_mov_b32_e32 v11, s15 781; GCN-NEXT: v_mov_b32_e32 v12, s16 782; GCN-NEXT: v_mov_b32_e32 v13, s17 783; GCN-NEXT: s_mov_b32 m0, s0 784; GCN-NEXT: v_movreld_b32_e32 v0, 0 785; GCN-NEXT: v_mov_b32_e32 v16, 0x3ff00000 786; GCN-NEXT: s_add_u32 s0, s2, 16 787; GCN-NEXT: v_movreld_b32_e32 v1, v16 788; GCN-NEXT: s_addc_u32 s1, s3, 0 789; GCN-NEXT: v_mov_b32_e32 v15, s1 790; GCN-NEXT: v_mov_b32_e32 v14, s0 791; GCN-NEXT: flat_store_dwordx4 v[14:15], v[4:7] 792; GCN-NEXT: s_add_u32 s0, s2, 48 793; GCN-NEXT: v_mov_b32_e32 v5, s3 794; GCN-NEXT: v_mov_b32_e32 v4, s2 795; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 796; GCN-NEXT: s_addc_u32 s1, s3, 0 797; GCN-NEXT: v_mov_b32_e32 v0, s0 798; GCN-NEXT: v_mov_b32_e32 v1, s1 799; GCN-NEXT: s_add_u32 s0, s2, 32 800; GCN-NEXT: flat_store_dwordx2 v[0:1], v[12:13] 801; GCN-NEXT: s_addc_u32 s1, s3, 0 802; GCN-NEXT: v_mov_b32_e32 v0, s0 803; GCN-NEXT: v_mov_b32_e32 v1, s1 804; GCN-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 805; GCN-NEXT: s_endpgm 806entry: 807 %v = insertelement <7 x double> %vec, double 1.000000e+00, i32 %sel 808 store <7 x double> %v, <7 x double> addrspace(1)* %out 809 ret void 810} 811 812define amdgpu_kernel void @double16_inselt(<16 x double> addrspace(1)* %out, <16 x double> %vec, i32 %sel) { 813; GCN-LABEL: double16_inselt: 814; GCN: ; %bb.0: ; %entry 815; GCN-NEXT: s_load_dword s2, s[0:1], 0x124 816; GCN-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0xa4 817; GCN-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0xe4 818; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 819; GCN-NEXT: v_mov_b32_e32 v32, 0x3ff00000 820; GCN-NEXT: s_waitcnt lgkmcnt(0) 821; GCN-NEXT: v_mov_b32_e32 v0, s36 822; GCN-NEXT: s_lshl_b32 s2, s2, 1 823; GCN-NEXT: v_mov_b32_e32 v1, s37 824; GCN-NEXT: v_mov_b32_e32 v2, s38 825; GCN-NEXT: v_mov_b32_e32 v3, s39 826; GCN-NEXT: v_mov_b32_e32 v4, s40 827; GCN-NEXT: v_mov_b32_e32 v5, s41 828; GCN-NEXT: v_mov_b32_e32 v6, s42 829; GCN-NEXT: v_mov_b32_e32 v7, s43 830; GCN-NEXT: v_mov_b32_e32 v8, s44 831; GCN-NEXT: v_mov_b32_e32 v9, s45 832; GCN-NEXT: v_mov_b32_e32 v10, s46 833; GCN-NEXT: v_mov_b32_e32 v11, s47 834; GCN-NEXT: v_mov_b32_e32 v12, s48 835; GCN-NEXT: v_mov_b32_e32 v13, s49 836; GCN-NEXT: v_mov_b32_e32 v14, s50 837; GCN-NEXT: v_mov_b32_e32 v15, s51 838; GCN-NEXT: v_mov_b32_e32 v16, s4 839; GCN-NEXT: v_mov_b32_e32 v17, s5 840; GCN-NEXT: v_mov_b32_e32 v18, s6 841; GCN-NEXT: v_mov_b32_e32 v19, s7 842; GCN-NEXT: v_mov_b32_e32 v20, s8 843; GCN-NEXT: v_mov_b32_e32 v21, s9 844; GCN-NEXT: v_mov_b32_e32 v22, s10 845; GCN-NEXT: v_mov_b32_e32 v23, s11 846; GCN-NEXT: v_mov_b32_e32 v24, s12 847; GCN-NEXT: v_mov_b32_e32 v25, s13 848; GCN-NEXT: v_mov_b32_e32 v26, s14 849; GCN-NEXT: v_mov_b32_e32 v27, s15 850; GCN-NEXT: v_mov_b32_e32 v28, s16 851; GCN-NEXT: v_mov_b32_e32 v29, s17 852; GCN-NEXT: v_mov_b32_e32 v30, s18 853; GCN-NEXT: v_mov_b32_e32 v31, s19 854; GCN-NEXT: s_mov_b32 m0, s2 855; GCN-NEXT: s_add_u32 s2, s0, 0x70 856; GCN-NEXT: v_movreld_b32_e32 v0, 0 857; GCN-NEXT: s_addc_u32 s3, s1, 0 858; GCN-NEXT: v_movreld_b32_e32 v1, v32 859; GCN-NEXT: v_mov_b32_e32 v33, s3 860; GCN-NEXT: v_mov_b32_e32 v32, s2 861; GCN-NEXT: s_add_u32 s2, s0, 0x60 862; GCN-NEXT: s_addc_u32 s3, s1, 0 863; GCN-NEXT: flat_store_dwordx4 v[32:33], v[28:31] 864; GCN-NEXT: s_nop 0 865; GCN-NEXT: v_mov_b32_e32 v29, s3 866; GCN-NEXT: v_mov_b32_e32 v28, s2 867; GCN-NEXT: s_add_u32 s2, s0, 0x50 868; GCN-NEXT: s_addc_u32 s3, s1, 0 869; GCN-NEXT: flat_store_dwordx4 v[28:29], v[24:27] 870; GCN-NEXT: s_nop 0 871; GCN-NEXT: v_mov_b32_e32 v25, s3 872; GCN-NEXT: v_mov_b32_e32 v24, s2 873; GCN-NEXT: s_add_u32 s2, s0, 64 874; GCN-NEXT: s_addc_u32 s3, s1, 0 875; GCN-NEXT: flat_store_dwordx4 v[24:25], v[20:23] 876; GCN-NEXT: s_nop 0 877; GCN-NEXT: v_mov_b32_e32 v21, s3 878; GCN-NEXT: v_mov_b32_e32 v20, s2 879; GCN-NEXT: s_add_u32 s2, s0, 48 880; GCN-NEXT: s_addc_u32 s3, s1, 0 881; GCN-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 882; GCN-NEXT: s_nop 0 883; GCN-NEXT: v_mov_b32_e32 v17, s3 884; GCN-NEXT: v_mov_b32_e32 v16, s2 885; GCN-NEXT: s_add_u32 s2, s0, 32 886; GCN-NEXT: s_addc_u32 s3, s1, 0 887; GCN-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 888; GCN-NEXT: s_nop 0 889; GCN-NEXT: v_mov_b32_e32 v13, s3 890; GCN-NEXT: v_mov_b32_e32 v12, s2 891; GCN-NEXT: s_add_u32 s2, s0, 16 892; GCN-NEXT: s_addc_u32 s3, s1, 0 893; GCN-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 894; GCN-NEXT: s_nop 0 895; GCN-NEXT: v_mov_b32_e32 v9, s3 896; GCN-NEXT: v_mov_b32_e32 v8, s2 897; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 898; GCN-NEXT: s_nop 0 899; GCN-NEXT: v_mov_b32_e32 v5, s1 900; GCN-NEXT: v_mov_b32_e32 v4, s0 901; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 902; GCN-NEXT: s_endpgm 903entry: 904 %v = insertelement <16 x double> %vec, double 1.000000e+00, i32 %sel 905 store <16 x double> %v, <16 x double> addrspace(1)* %out 906 ret void 907} 908 909define amdgpu_kernel void @double15_inselt(<15 x double> addrspace(1)* %out, <15 x double> %vec, i32 %sel) { 910; GCN-LABEL: double15_inselt: 911; GCN: ; %bb.0: ; %entry 912; GCN-NEXT: s_load_dwordx16 s[8:23], s[0:1], 0xa4 913; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x114 914; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x104 915; GCN-NEXT: s_load_dwordx8 s[24:31], s[0:1], 0xe4 916; GCN-NEXT: v_mov_b32_e32 v32, 0x3ff00000 917; GCN-NEXT: s_waitcnt lgkmcnt(0) 918; GCN-NEXT: v_mov_b32_e32 v0, s8 919; GCN-NEXT: v_mov_b32_e32 v28, s2 920; GCN-NEXT: v_mov_b32_e32 v24, s4 921; GCN-NEXT: s_load_dword s4, s[0:1], 0x124 922; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 923; GCN-NEXT: v_mov_b32_e32 v1, s9 924; GCN-NEXT: v_mov_b32_e32 v2, s10 925; GCN-NEXT: v_mov_b32_e32 v3, s11 926; GCN-NEXT: s_waitcnt lgkmcnt(0) 927; GCN-NEXT: s_lshl_b32 s2, s4, 1 928; GCN-NEXT: v_mov_b32_e32 v4, s12 929; GCN-NEXT: v_mov_b32_e32 v5, s13 930; GCN-NEXT: v_mov_b32_e32 v6, s14 931; GCN-NEXT: v_mov_b32_e32 v7, s15 932; GCN-NEXT: v_mov_b32_e32 v8, s16 933; GCN-NEXT: v_mov_b32_e32 v9, s17 934; GCN-NEXT: v_mov_b32_e32 v10, s18 935; GCN-NEXT: v_mov_b32_e32 v11, s19 936; GCN-NEXT: v_mov_b32_e32 v12, s20 937; GCN-NEXT: v_mov_b32_e32 v13, s21 938; GCN-NEXT: v_mov_b32_e32 v14, s22 939; GCN-NEXT: v_mov_b32_e32 v15, s23 940; GCN-NEXT: v_mov_b32_e32 v16, s24 941; GCN-NEXT: v_mov_b32_e32 v17, s25 942; GCN-NEXT: v_mov_b32_e32 v18, s26 943; GCN-NEXT: v_mov_b32_e32 v19, s27 944; GCN-NEXT: v_mov_b32_e32 v20, s28 945; GCN-NEXT: v_mov_b32_e32 v21, s29 946; GCN-NEXT: v_mov_b32_e32 v22, s30 947; GCN-NEXT: v_mov_b32_e32 v23, s31 948; GCN-NEXT: v_mov_b32_e32 v25, s5 949; GCN-NEXT: v_mov_b32_e32 v26, s6 950; GCN-NEXT: v_mov_b32_e32 v27, s7 951; GCN-NEXT: v_mov_b32_e32 v29, s3 952; GCN-NEXT: s_mov_b32 m0, s2 953; GCN-NEXT: v_movreld_b32_e32 v0, 0 954; GCN-NEXT: s_add_u32 s2, s0, 0x50 955; GCN-NEXT: v_movreld_b32_e32 v1, v32 956; GCN-NEXT: s_addc_u32 s3, s1, 0 957; GCN-NEXT: v_mov_b32_e32 v31, s3 958; GCN-NEXT: v_mov_b32_e32 v30, s2 959; GCN-NEXT: s_add_u32 s2, s0, 64 960; GCN-NEXT: s_addc_u32 s3, s1, 0 961; GCN-NEXT: flat_store_dwordx4 v[30:31], v[20:23] 962; GCN-NEXT: s_nop 0 963; GCN-NEXT: v_mov_b32_e32 v21, s3 964; GCN-NEXT: v_mov_b32_e32 v20, s2 965; GCN-NEXT: s_add_u32 s2, s0, 48 966; GCN-NEXT: s_addc_u32 s3, s1, 0 967; GCN-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 968; GCN-NEXT: s_nop 0 969; GCN-NEXT: v_mov_b32_e32 v17, s3 970; GCN-NEXT: v_mov_b32_e32 v16, s2 971; GCN-NEXT: s_add_u32 s2, s0, 32 972; GCN-NEXT: s_addc_u32 s3, s1, 0 973; GCN-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 974; GCN-NEXT: s_nop 0 975; GCN-NEXT: v_mov_b32_e32 v13, s3 976; GCN-NEXT: v_mov_b32_e32 v12, s2 977; GCN-NEXT: s_add_u32 s2, s0, 16 978; GCN-NEXT: s_addc_u32 s3, s1, 0 979; GCN-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 980; GCN-NEXT: s_nop 0 981; GCN-NEXT: v_mov_b32_e32 v9, s3 982; GCN-NEXT: v_mov_b32_e32 v8, s2 983; GCN-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 984; GCN-NEXT: s_add_u32 s2, s0, 0x70 985; GCN-NEXT: v_mov_b32_e32 v5, s1 986; GCN-NEXT: v_mov_b32_e32 v4, s0 987; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 988; GCN-NEXT: s_addc_u32 s3, s1, 0 989; GCN-NEXT: v_mov_b32_e32 v0, s2 990; GCN-NEXT: v_mov_b32_e32 v1, s3 991; GCN-NEXT: s_add_u32 s0, s0, 0x60 992; GCN-NEXT: flat_store_dwordx2 v[0:1], v[28:29] 993; GCN-NEXT: s_addc_u32 s1, s1, 0 994; GCN-NEXT: v_mov_b32_e32 v0, s0 995; GCN-NEXT: v_mov_b32_e32 v1, s1 996; GCN-NEXT: flat_store_dwordx4 v[0:1], v[24:27] 997; GCN-NEXT: s_endpgm 998entry: 999 %v = insertelement <15 x double> %vec, double 1.000000e+00, i32 %sel 1000 store <15 x double> %v, <15 x double> addrspace(1)* %out 1001 ret void 1002} 1003 1004define amdgpu_kernel void @bit4_inselt(<4 x i1> addrspace(1)* %out, <4 x i1> %vec, i32 %sel) { 1005; GCN-LABEL: bit4_inselt: 1006; GCN: ; %bb.0: ; %entry 1007; GCN-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 1008; GCN-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1 1009; GCN-NEXT: s_mov_b32 s6, -1 1010; GCN-NEXT: s_mov_b32 s7, 0xe80000 1011; GCN-NEXT: s_add_u32 s4, s4, s3 1012; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 1013; GCN-NEXT: s_addc_u32 s5, s5, 0 1014; GCN-NEXT: v_mov_b32_e32 v0, 4 1015; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1016; GCN-NEXT: s_waitcnt lgkmcnt(0) 1017; GCN-NEXT: s_and_b32 s3, s3, 3 1018; GCN-NEXT: v_mov_b32_e32 v1, s2 1019; GCN-NEXT: v_lshrrev_b16_e64 v2, 1, s2 1020; GCN-NEXT: v_lshrrev_b16_e64 v3, 2, s2 1021; GCN-NEXT: v_lshrrev_b16_e64 v4, 3, s2 1022; GCN-NEXT: v_or_b32_e32 v0, s3, v0 1023; GCN-NEXT: v_and_b32_e32 v2, 1, v2 1024; GCN-NEXT: v_and_b32_e32 v3, 3, v3 1025; GCN-NEXT: v_and_b32_e32 v4, 1, v4 1026; GCN-NEXT: buffer_store_byte v1, off, s[4:7], 0 offset:4 1027; GCN-NEXT: buffer_store_byte v4, off, s[4:7], 0 offset:7 1028; GCN-NEXT: buffer_store_byte v3, off, s[4:7], 0 offset:6 1029; GCN-NEXT: buffer_store_byte v2, off, s[4:7], 0 offset:5 1030; GCN-NEXT: v_mov_b32_e32 v1, 1 1031; GCN-NEXT: buffer_store_byte v1, v0, s[4:7], 0 offen 1032; GCN-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 offset:4 1033; GCN-NEXT: buffer_load_ubyte v1, off, s[4:7], 0 offset:5 1034; GCN-NEXT: buffer_load_ubyte v2, off, s[4:7], 0 offset:6 1035; GCN-NEXT: buffer_load_ubyte v3, off, s[4:7], 0 offset:7 1036; GCN-NEXT: s_waitcnt vmcnt(3) 1037; GCN-NEXT: v_and_b32_e32 v0, 1, v0 1038; GCN-NEXT: s_waitcnt vmcnt(2) 1039; GCN-NEXT: v_and_b32_e32 v1, 1, v1 1040; GCN-NEXT: s_waitcnt vmcnt(1) 1041; GCN-NEXT: v_and_b32_e32 v2, 1, v2 1042; GCN-NEXT: v_lshlrev_b16_e32 v1, 1, v1 1043; GCN-NEXT: v_lshlrev_b16_e32 v2, 2, v2 1044; GCN-NEXT: v_or_b32_e32 v0, v0, v1 1045; GCN-NEXT: s_waitcnt vmcnt(0) 1046; GCN-NEXT: v_lshlrev_b16_e32 v3, 3, v3 1047; GCN-NEXT: v_or_b32_e32 v0, v0, v2 1048; GCN-NEXT: v_or_b32_e32 v0, v0, v3 1049; GCN-NEXT: v_and_b32_e32 v2, 15, v0 1050; GCN-NEXT: v_mov_b32_e32 v0, s0 1051; GCN-NEXT: v_mov_b32_e32 v1, s1 1052; GCN-NEXT: flat_store_byte v[0:1], v2 1053; GCN-NEXT: s_endpgm 1054entry: 1055 %v = insertelement <4 x i1> %vec, i1 1, i32 %sel 1056 store <4 x i1> %v, <4 x i1> addrspace(1)* %out 1057 ret void 1058} 1059 1060define amdgpu_kernel void @bit128_inselt(<128 x i1> addrspace(1)* %out, <128 x i1> %vec, i32 %sel) { 1061; GCN-LABEL: bit128_inselt: 1062; GCN: ; %bb.0: ; %entry 1063; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 1064; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 1065; GCN-NEXT: s_load_dword s0, s[0:1], 0x44 1066; GCN-NEXT: s_waitcnt lgkmcnt(0) 1067; GCN-NEXT: s_lshr_b32 s1, s4, 24 1068; GCN-NEXT: s_lshr_b32 s8, s4, 16 1069; GCN-NEXT: s_lshr_b32 s9, s4, 17 1070; GCN-NEXT: s_lshr_b32 s10, s4, 18 1071; GCN-NEXT: s_lshr_b32 s11, s4, 19 1072; GCN-NEXT: s_lshr_b32 s12, s4, 20 1073; GCN-NEXT: s_lshr_b32 s13, s4, 21 1074; GCN-NEXT: s_lshr_b32 s14, s4, 22 1075; GCN-NEXT: s_lshr_b32 s15, s4, 23 1076; GCN-NEXT: s_lshr_b32 s16, s5, 24 1077; GCN-NEXT: s_lshr_b32 s17, s5, 16 1078; GCN-NEXT: s_lshr_b32 s18, s5, 17 1079; GCN-NEXT: s_lshr_b32 s19, s5, 18 1080; GCN-NEXT: s_lshr_b32 s20, s5, 19 1081; GCN-NEXT: s_lshr_b32 s21, s5, 20 1082; GCN-NEXT: s_lshr_b32 s22, s5, 21 1083; GCN-NEXT: s_lshr_b32 s23, s5, 22 1084; GCN-NEXT: s_lshr_b32 s24, s5, 23 1085; GCN-NEXT: s_lshr_b32 s25, s6, 24 1086; GCN-NEXT: s_lshr_b32 s26, s6, 16 1087; GCN-NEXT: s_lshr_b32 s27, s6, 17 1088; GCN-NEXT: s_lshr_b32 s28, s6, 18 1089; GCN-NEXT: s_lshr_b32 s29, s6, 19 1090; GCN-NEXT: s_lshr_b32 s30, s6, 20 1091; GCN-NEXT: s_lshr_b32 s31, s6, 21 1092; GCN-NEXT: s_lshr_b32 s33, s6, 22 1093; GCN-NEXT: s_lshr_b32 s34, s6, 23 1094; GCN-NEXT: s_lshr_b32 s35, s7, 24 1095; GCN-NEXT: s_lshr_b32 s36, s7, 16 1096; GCN-NEXT: s_lshr_b32 s37, s7, 17 1097; GCN-NEXT: s_lshr_b32 s38, s7, 18 1098; GCN-NEXT: s_lshr_b32 s39, s7, 19 1099; GCN-NEXT: s_lshr_b32 s40, s7, 20 1100; GCN-NEXT: s_lshr_b32 s41, s7, 21 1101; GCN-NEXT: s_lshr_b32 s42, s7, 22 1102; GCN-NEXT: s_lshr_b32 s43, s7, 23 1103; GCN-NEXT: s_cmpk_lg_i32 s0, 0x77 1104; GCN-NEXT: v_mov_b32_e32 v16, s43 1105; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1106; GCN-NEXT: s_cmpk_lg_i32 s0, 0x76 1107; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1108; GCN-NEXT: v_mov_b32_e32 v17, s42 1109; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1110; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1111; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1112; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1113; GCN-NEXT: s_cmpk_lg_i32 s0, 0x75 1114; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1115; GCN-NEXT: v_mov_b32_e32 v17, s41 1116; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1117; GCN-NEXT: s_cmpk_lg_i32 s0, 0x74 1118; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1119; GCN-NEXT: v_mov_b32_e32 v18, s40 1120; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1121; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1122; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1123; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1124; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1125; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 1126; GCN-NEXT: v_and_b32_e32 v17, 3, v17 1127; GCN-NEXT: s_cmpk_lg_i32 s0, 0x73 1128; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1129; GCN-NEXT: v_mov_b32_e32 v17, s39 1130; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1131; GCN-NEXT: s_cmpk_lg_i32 s0, 0x72 1132; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1133; GCN-NEXT: v_mov_b32_e32 v18, s38 1134; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1135; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1136; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1137; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1138; GCN-NEXT: s_cmpk_lg_i32 s0, 0x71 1139; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1140; GCN-NEXT: v_mov_b32_e32 v18, s37 1141; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1142; GCN-NEXT: s_cmpk_lg_i32 s0, 0x70 1143; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1144; GCN-NEXT: v_mov_b32_e32 v19, s36 1145; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1146; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1147; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1148; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1149; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1150; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1151; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1152; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1153; GCN-NEXT: v_lshlrev_b16_e32 v16, 4, v16 1154; GCN-NEXT: v_and_b32_e32 v17, 15, v17 1155; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7f 1156; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1157; GCN-NEXT: v_lshrrev_b16_e64 v17, 7, s35 1158; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1159; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7e 1160; GCN-NEXT: v_lshrrev_b16_e64 v18, 6, s35 1161; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1162; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1163; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1164; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1165; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1166; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7d 1167; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1168; GCN-NEXT: v_lshrrev_b16_e64 v18, 5, s35 1169; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1170; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7c 1171; GCN-NEXT: v_lshrrev_b16_e64 v19, 4, s35 1172; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1173; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1174; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1175; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1176; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1177; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1178; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1179; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1180; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7b 1181; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1182; GCN-NEXT: v_lshrrev_b16_e64 v18, 3, s35 1183; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1184; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7a 1185; GCN-NEXT: v_lshrrev_b16_e64 v19, 2, s35 1186; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1187; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1188; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1189; GCN-NEXT: s_cmpk_lg_i32 s0, 0x78 1190; GCN-NEXT: v_mov_b32_e32 v14, s35 1191; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1192; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1193; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1194; GCN-NEXT: s_cmpk_lg_i32 s0, 0x79 1195; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1196; GCN-NEXT: v_lshrrev_b16_e64 v19, 1, s35 1197; GCN-NEXT: v_cndmask_b32_e32 v14, 1, v14, vcc 1198; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1199; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1200; GCN-NEXT: v_and_b32_e32 v14, 1, v14 1201; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 1202; GCN-NEXT: v_or_b32_e32 v14, v14, v19 1203; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 1204; GCN-NEXT: v_and_b32_e32 v14, 3, v14 1205; GCN-NEXT: v_or_b32_e32 v14, v14, v18 1206; GCN-NEXT: v_lshlrev_b16_e32 v17, 4, v17 1207; GCN-NEXT: v_and_b32_e32 v14, 15, v14 1208; GCN-NEXT: v_or_b32_sdwa v14, v14, v17 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1209; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6f 1210; GCN-NEXT: v_or_b32_sdwa v14, v16, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1211; GCN-NEXT: v_lshrrev_b16_e64 v16, 15, s7 1212; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1213; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6e 1214; GCN-NEXT: v_lshrrev_b16_e64 v17, 14, s7 1215; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1216; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1217; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1218; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1219; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1220; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6d 1221; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1222; GCN-NEXT: v_lshrrev_b16_e64 v17, 13, s7 1223; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1224; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6c 1225; GCN-NEXT: v_lshrrev_b16_e64 v18, 12, s7 1226; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1227; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1228; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1229; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1230; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1231; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1232; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 1233; GCN-NEXT: v_and_b32_e32 v17, 3, v17 1234; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6b 1235; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1236; GCN-NEXT: v_lshrrev_b16_e64 v17, 11, s7 1237; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1238; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6a 1239; GCN-NEXT: v_lshrrev_b16_e64 v18, 10, s7 1240; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1241; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1242; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1243; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1244; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1245; GCN-NEXT: s_cmpk_lg_i32 s0, 0x69 1246; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1247; GCN-NEXT: v_lshrrev_b16_e64 v18, 9, s7 1248; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1249; GCN-NEXT: s_cmpk_lg_i32 s0, 0x68 1250; GCN-NEXT: v_lshrrev_b16_e64 v19, 8, s7 1251; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1252; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1253; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1254; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1255; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1256; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1257; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1258; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1259; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1260; GCN-NEXT: v_lshlrev_b16_e32 v16, 4, v16 1261; GCN-NEXT: v_and_b32_e32 v17, 15, v17 1262; GCN-NEXT: s_cmpk_lg_i32 s0, 0x67 1263; GCN-NEXT: v_or_b32_sdwa v16, v17, v16 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1264; GCN-NEXT: v_lshrrev_b16_e64 v17, 7, s7 1265; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1266; GCN-NEXT: s_cmpk_lg_i32 s0, 0x66 1267; GCN-NEXT: v_lshrrev_b16_e64 v18, 6, s7 1268; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1269; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1270; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1271; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1272; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1273; GCN-NEXT: s_cmpk_lg_i32 s0, 0x65 1274; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1275; GCN-NEXT: v_lshrrev_b16_e64 v18, 5, s7 1276; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1277; GCN-NEXT: s_cmpk_lg_i32 s0, 0x64 1278; GCN-NEXT: v_lshrrev_b16_e64 v19, 4, s7 1279; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1280; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1281; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1282; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1283; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1284; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1285; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1286; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1287; GCN-NEXT: s_cmpk_lg_i32 s0, 0x63 1288; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1289; GCN-NEXT: v_lshrrev_b16_e64 v18, 3, s7 1290; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1291; GCN-NEXT: s_cmpk_lg_i32 s0, 0x62 1292; GCN-NEXT: v_lshrrev_b16_e64 v19, 2, s7 1293; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1294; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1295; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1296; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1297; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1298; GCN-NEXT: s_cmpk_lg_i32 s0, 0x61 1299; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1300; GCN-NEXT: v_lshrrev_b16_e64 v19, 1, s7 1301; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1302; GCN-NEXT: s_cmpk_lg_i32 s0, 0x60 1303; GCN-NEXT: v_mov_b32_e32 v15, s7 1304; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1305; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1306; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1307; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 1308; GCN-NEXT: v_and_b32_e32 v15, 1, v15 1309; GCN-NEXT: v_or_b32_e32 v15, v15, v19 1310; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 1311; GCN-NEXT: v_and_b32_e32 v15, 3, v15 1312; GCN-NEXT: v_or_b32_e32 v15, v15, v18 1313; GCN-NEXT: v_lshlrev_b16_e32 v17, 4, v17 1314; GCN-NEXT: v_and_b32_e32 v15, 15, v15 1315; GCN-NEXT: v_or_b32_e32 v15, v15, v17 1316; GCN-NEXT: s_cmpk_lg_i32 s0, 0x57 1317; GCN-NEXT: v_or_b32_sdwa v15, v15, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1318; GCN-NEXT: v_mov_b32_e32 v16, s34 1319; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1320; GCN-NEXT: s_cmpk_lg_i32 s0, 0x56 1321; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1322; GCN-NEXT: v_mov_b32_e32 v17, s33 1323; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1324; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1325; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1326; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1327; GCN-NEXT: s_cmpk_lg_i32 s0, 0x55 1328; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1329; GCN-NEXT: v_mov_b32_e32 v17, s31 1330; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1331; GCN-NEXT: s_cmpk_lg_i32 s0, 0x54 1332; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1333; GCN-NEXT: v_mov_b32_e32 v18, s30 1334; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1335; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1336; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1337; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1338; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1339; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 1340; GCN-NEXT: v_and_b32_e32 v17, 3, v17 1341; GCN-NEXT: s_cmpk_lg_i32 s0, 0x53 1342; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1343; GCN-NEXT: v_mov_b32_e32 v17, s29 1344; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1345; GCN-NEXT: s_cmpk_lg_i32 s0, 0x52 1346; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1347; GCN-NEXT: v_mov_b32_e32 v18, s28 1348; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1349; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1350; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1351; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1352; GCN-NEXT: s_cmpk_lg_i32 s0, 0x51 1353; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1354; GCN-NEXT: v_mov_b32_e32 v18, s27 1355; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1356; GCN-NEXT: s_cmpk_lg_i32 s0, 0x50 1357; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1358; GCN-NEXT: v_mov_b32_e32 v19, s26 1359; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1360; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1361; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1362; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1363; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1364; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1365; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1366; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1367; GCN-NEXT: v_lshlrev_b16_e32 v16, 4, v16 1368; GCN-NEXT: v_and_b32_e32 v17, 15, v17 1369; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5f 1370; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1371; GCN-NEXT: v_lshrrev_b16_e64 v17, 7, s25 1372; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1373; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5e 1374; GCN-NEXT: v_lshrrev_b16_e64 v18, 6, s25 1375; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1376; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1377; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1378; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1379; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1380; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5d 1381; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1382; GCN-NEXT: v_lshrrev_b16_e64 v18, 5, s25 1383; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1384; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5c 1385; GCN-NEXT: v_lshrrev_b16_e64 v19, 4, s25 1386; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1387; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1388; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1389; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1390; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1391; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1392; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1393; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1394; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5b 1395; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1396; GCN-NEXT: v_lshrrev_b16_e64 v18, 3, s25 1397; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1398; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5a 1399; GCN-NEXT: v_lshrrev_b16_e64 v19, 2, s25 1400; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1401; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1402; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1403; GCN-NEXT: s_cmpk_lg_i32 s0, 0x58 1404; GCN-NEXT: v_mov_b32_e32 v3, s25 1405; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1406; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1407; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1408; GCN-NEXT: s_cmpk_lg_i32 s0, 0x59 1409; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1410; GCN-NEXT: v_lshrrev_b16_e64 v19, 1, s25 1411; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v3, vcc 1412; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1413; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1414; GCN-NEXT: v_and_b32_e32 v3, 1, v3 1415; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 1416; GCN-NEXT: v_or_b32_e32 v3, v3, v19 1417; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 1418; GCN-NEXT: v_and_b32_e32 v3, 3, v3 1419; GCN-NEXT: v_or_b32_e32 v3, v3, v18 1420; GCN-NEXT: v_lshlrev_b16_e32 v17, 4, v17 1421; GCN-NEXT: v_and_b32_e32 v3, 15, v3 1422; GCN-NEXT: v_or_b32_sdwa v3, v3, v17 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1423; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4f 1424; GCN-NEXT: v_or_b32_sdwa v16, v16, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1425; GCN-NEXT: v_lshrrev_b16_e64 v3, 15, s6 1426; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1427; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4e 1428; GCN-NEXT: v_lshrrev_b16_e64 v17, 14, s6 1429; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v3, vcc 1430; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1431; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1432; GCN-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1433; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1434; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4d 1435; GCN-NEXT: v_or_b32_e32 v3, v17, v3 1436; GCN-NEXT: v_lshrrev_b16_e64 v17, 13, s6 1437; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1438; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4c 1439; GCN-NEXT: v_lshrrev_b16_e64 v18, 12, s6 1440; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1441; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1442; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1443; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1444; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1445; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1446; GCN-NEXT: v_lshlrev_b16_e32 v3, 2, v3 1447; GCN-NEXT: v_and_b32_e32 v17, 3, v17 1448; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4b 1449; GCN-NEXT: v_or_b32_e32 v3, v17, v3 1450; GCN-NEXT: v_lshrrev_b16_e64 v17, 11, s6 1451; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1452; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4a 1453; GCN-NEXT: v_lshrrev_b16_e64 v18, 10, s6 1454; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1455; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1456; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1457; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1458; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1459; GCN-NEXT: s_cmpk_lg_i32 s0, 0x49 1460; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1461; GCN-NEXT: v_lshrrev_b16_e64 v18, 9, s6 1462; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1463; GCN-NEXT: s_cmpk_lg_i32 s0, 0x48 1464; GCN-NEXT: v_lshrrev_b16_e64 v19, 8, s6 1465; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1466; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1467; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1468; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1469; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1470; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1471; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1472; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1473; GCN-NEXT: v_or_b32_e32 v17, v18, v17 1474; GCN-NEXT: v_lshlrev_b16_e32 v3, 4, v3 1475; GCN-NEXT: v_and_b32_e32 v17, 15, v17 1476; GCN-NEXT: s_cmpk_lg_i32 s0, 0x47 1477; GCN-NEXT: v_or_b32_sdwa v17, v17, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1478; GCN-NEXT: v_lshrrev_b16_e64 v3, 7, s6 1479; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1480; GCN-NEXT: s_cmpk_lg_i32 s0, 0x46 1481; GCN-NEXT: v_lshrrev_b16_e64 v18, 6, s6 1482; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v3, vcc 1483; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1484; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1485; GCN-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1486; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1487; GCN-NEXT: s_cmpk_lg_i32 s0, 0x45 1488; GCN-NEXT: v_or_b32_e32 v3, v18, v3 1489; GCN-NEXT: v_lshrrev_b16_e64 v18, 5, s6 1490; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1491; GCN-NEXT: s_cmpk_lg_i32 s0, 0x44 1492; GCN-NEXT: v_lshrrev_b16_e64 v19, 4, s6 1493; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1494; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1495; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1496; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1497; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1498; GCN-NEXT: v_or_b32_e32 v18, v19, v18 1499; GCN-NEXT: v_lshlrev_b16_e32 v3, 2, v3 1500; GCN-NEXT: v_and_b32_e32 v18, 3, v18 1501; GCN-NEXT: s_cmpk_lg_i32 s0, 0x43 1502; GCN-NEXT: v_or_b32_e32 v18, v18, v3 1503; GCN-NEXT: v_lshrrev_b16_e64 v3, 3, s6 1504; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1505; GCN-NEXT: s_cmpk_lg_i32 s0, 0x42 1506; GCN-NEXT: v_lshrrev_b16_e64 v19, 2, s6 1507; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v3, vcc 1508; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1509; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1510; GCN-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1511; GCN-NEXT: v_and_b32_e32 v19, 1, v19 1512; GCN-NEXT: s_cmpk_lg_i32 s0, 0x41 1513; GCN-NEXT: v_or_b32_e32 v3, v19, v3 1514; GCN-NEXT: v_lshrrev_b16_e64 v19, 1, s6 1515; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1516; GCN-NEXT: s_cmp_lg_u32 s0, 64 1517; GCN-NEXT: v_mov_b32_e32 v2, s6 1518; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc 1519; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1520; GCN-NEXT: v_cndmask_b32_e32 v2, 1, v2, vcc 1521; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 1522; GCN-NEXT: v_and_b32_e32 v2, 1, v2 1523; GCN-NEXT: v_or_b32_e32 v2, v2, v19 1524; GCN-NEXT: v_lshlrev_b16_e32 v3, 2, v3 1525; GCN-NEXT: v_and_b32_e32 v2, 3, v2 1526; GCN-NEXT: v_or_b32_e32 v2, v2, v3 1527; GCN-NEXT: v_or_b32_sdwa v3, v15, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1528; GCN-NEXT: v_lshlrev_b16_e32 v14, 4, v18 1529; GCN-NEXT: v_and_b32_e32 v2, 15, v2 1530; GCN-NEXT: s_cmp_lg_u32 s0, 55 1531; GCN-NEXT: v_or_b32_e32 v2, v2, v14 1532; GCN-NEXT: v_mov_b32_e32 v14, s24 1533; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1534; GCN-NEXT: s_cmp_lg_u32 s0, 54 1535; GCN-NEXT: v_cndmask_b32_e32 v14, 1, v14, vcc 1536; GCN-NEXT: v_mov_b32_e32 v15, s23 1537; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1538; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1539; GCN-NEXT: v_lshlrev_b16_e32 v14, 1, v14 1540; GCN-NEXT: v_and_b32_e32 v15, 1, v15 1541; GCN-NEXT: s_cmp_lg_u32 s0, 53 1542; GCN-NEXT: v_or_b32_sdwa v2, v2, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1543; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1544; GCN-NEXT: v_mov_b32_e32 v15, s22 1545; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1546; GCN-NEXT: s_cmp_lg_u32 s0, 52 1547; GCN-NEXT: v_or_b32_sdwa v2, v2, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1548; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1549; GCN-NEXT: v_mov_b32_e32 v16, s21 1550; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1551; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1552; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1553; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1554; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1555; GCN-NEXT: v_lshlrev_b16_e32 v14, 2, v14 1556; GCN-NEXT: v_and_b32_e32 v15, 3, v15 1557; GCN-NEXT: s_cmp_lg_u32 s0, 51 1558; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1559; GCN-NEXT: v_mov_b32_e32 v15, s20 1560; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1561; GCN-NEXT: s_cmp_lg_u32 s0, 50 1562; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1563; GCN-NEXT: v_mov_b32_e32 v16, s19 1564; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1565; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1566; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1567; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1568; GCN-NEXT: s_cmp_lg_u32 s0, 49 1569; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1570; GCN-NEXT: v_mov_b32_e32 v16, s18 1571; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1572; GCN-NEXT: s_cmp_lg_u32 s0, 48 1573; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1574; GCN-NEXT: v_mov_b32_e32 v17, s17 1575; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1576; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1577; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1578; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1579; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1580; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1581; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1582; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1583; GCN-NEXT: v_lshlrev_b16_e32 v14, 4, v14 1584; GCN-NEXT: v_and_b32_e32 v15, 15, v15 1585; GCN-NEXT: s_cmp_lg_u32 s0, 63 1586; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1587; GCN-NEXT: v_lshrrev_b16_e64 v15, 7, s16 1588; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1589; GCN-NEXT: s_cmp_lg_u32 s0, 62 1590; GCN-NEXT: v_lshrrev_b16_e64 v16, 6, s16 1591; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1592; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1593; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1594; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1595; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1596; GCN-NEXT: s_cmp_lg_u32 s0, 61 1597; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1598; GCN-NEXT: v_lshrrev_b16_e64 v16, 5, s16 1599; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1600; GCN-NEXT: s_cmp_lg_u32 s0, 60 1601; GCN-NEXT: v_lshrrev_b16_e64 v17, 4, s16 1602; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1603; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1604; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1605; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1606; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1607; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1608; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1609; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1610; GCN-NEXT: s_cmp_lg_u32 s0, 59 1611; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1612; GCN-NEXT: v_lshrrev_b16_e64 v16, 3, s16 1613; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1614; GCN-NEXT: s_cmp_lg_u32 s0, 58 1615; GCN-NEXT: v_lshrrev_b16_e64 v17, 2, s16 1616; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1617; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1618; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1619; GCN-NEXT: s_cmp_lg_u32 s0, 56 1620; GCN-NEXT: v_mov_b32_e32 v13, s16 1621; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1622; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1623; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1624; GCN-NEXT: s_cmp_lg_u32 s0, 57 1625; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1626; GCN-NEXT: v_lshrrev_b16_e64 v17, 1, s16 1627; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1628; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1629; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1630; GCN-NEXT: v_and_b32_e32 v13, 1, v13 1631; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 1632; GCN-NEXT: v_or_b32_e32 v13, v13, v17 1633; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 1634; GCN-NEXT: v_and_b32_e32 v13, 3, v13 1635; GCN-NEXT: v_or_b32_e32 v13, v13, v16 1636; GCN-NEXT: v_lshlrev_b16_e32 v15, 4, v15 1637; GCN-NEXT: v_and_b32_e32 v13, 15, v13 1638; GCN-NEXT: v_or_b32_sdwa v13, v13, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1639; GCN-NEXT: s_cmp_lg_u32 s0, 47 1640; GCN-NEXT: v_or_b32_sdwa v14, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1641; GCN-NEXT: v_lshrrev_b16_e64 v13, 15, s5 1642; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1643; GCN-NEXT: s_cmp_lg_u32 s0, 46 1644; GCN-NEXT: v_lshrrev_b16_e64 v15, 14, s5 1645; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1646; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1647; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1648; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 1649; GCN-NEXT: v_and_b32_e32 v15, 1, v15 1650; GCN-NEXT: s_cmp_lg_u32 s0, 45 1651; GCN-NEXT: v_or_b32_e32 v13, v15, v13 1652; GCN-NEXT: v_lshrrev_b16_e64 v15, 13, s5 1653; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1654; GCN-NEXT: s_cmp_lg_u32 s0, 44 1655; GCN-NEXT: v_lshrrev_b16_e64 v16, 12, s5 1656; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1657; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1658; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1659; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1660; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1661; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1662; GCN-NEXT: v_lshlrev_b16_e32 v13, 2, v13 1663; GCN-NEXT: v_and_b32_e32 v15, 3, v15 1664; GCN-NEXT: s_cmp_lg_u32 s0, 43 1665; GCN-NEXT: v_or_b32_e32 v13, v15, v13 1666; GCN-NEXT: v_lshrrev_b16_e64 v15, 11, s5 1667; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1668; GCN-NEXT: s_cmp_lg_u32 s0, 42 1669; GCN-NEXT: v_lshrrev_b16_e64 v16, 10, s5 1670; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1671; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1672; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1673; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1674; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1675; GCN-NEXT: s_cmp_lg_u32 s0, 41 1676; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1677; GCN-NEXT: v_lshrrev_b16_e64 v16, 9, s5 1678; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1679; GCN-NEXT: s_cmp_lg_u32 s0, 40 1680; GCN-NEXT: v_lshrrev_b16_e64 v17, 8, s5 1681; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1682; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1683; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1684; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1685; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1686; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1687; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1688; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1689; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1690; GCN-NEXT: v_lshlrev_b16_e32 v13, 4, v13 1691; GCN-NEXT: v_and_b32_e32 v15, 15, v15 1692; GCN-NEXT: s_cmp_lg_u32 s0, 39 1693; GCN-NEXT: v_or_b32_sdwa v15, v15, v13 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1694; GCN-NEXT: v_lshrrev_b16_e64 v13, 7, s5 1695; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1696; GCN-NEXT: s_cmp_lg_u32 s0, 38 1697; GCN-NEXT: v_lshrrev_b16_e64 v16, 6, s5 1698; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1699; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1700; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1701; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 1702; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1703; GCN-NEXT: s_cmp_lg_u32 s0, 37 1704; GCN-NEXT: v_or_b32_e32 v13, v16, v13 1705; GCN-NEXT: v_lshrrev_b16_e64 v16, 5, s5 1706; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1707; GCN-NEXT: s_cmp_lg_u32 s0, 36 1708; GCN-NEXT: v_lshrrev_b16_e64 v17, 4, s5 1709; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1710; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1711; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1712; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1713; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1714; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1715; GCN-NEXT: v_lshlrev_b16_e32 v13, 2, v13 1716; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1717; GCN-NEXT: s_cmp_lg_u32 s0, 35 1718; GCN-NEXT: v_or_b32_e32 v16, v16, v13 1719; GCN-NEXT: v_lshrrev_b16_e64 v13, 3, s5 1720; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1721; GCN-NEXT: s_cmp_lg_u32 s0, 34 1722; GCN-NEXT: v_lshrrev_b16_e64 v17, 2, s5 1723; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1724; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1725; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1726; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 1727; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1728; GCN-NEXT: s_cmp_lg_u32 s0, 33 1729; GCN-NEXT: v_or_b32_e32 v17, v17, v13 1730; GCN-NEXT: v_lshrrev_b16_e64 v13, 1, s5 1731; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1732; GCN-NEXT: s_cmp_lg_u32 s0, 32 1733; GCN-NEXT: v_mov_b32_e32 v1, s5 1734; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1735; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1736; GCN-NEXT: v_cndmask_b32_e32 v1, 1, v1, vcc 1737; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 1738; GCN-NEXT: v_and_b32_e32 v1, 1, v1 1739; GCN-NEXT: v_or_b32_e32 v1, v1, v13 1740; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 1741; GCN-NEXT: v_and_b32_e32 v1, 3, v1 1742; GCN-NEXT: v_or_b32_e32 v1, v1, v17 1743; GCN-NEXT: v_lshlrev_b16_e32 v16, 4, v16 1744; GCN-NEXT: v_and_b32_e32 v1, 15, v1 1745; GCN-NEXT: v_or_b32_e32 v1, v1, v16 1746; GCN-NEXT: v_or_b32_sdwa v1, v1, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1747; GCN-NEXT: s_cmp_lg_u32 s0, 23 1748; GCN-NEXT: v_or_b32_sdwa v1, v1, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1749; GCN-NEXT: v_mov_b32_e32 v14, s15 1750; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1751; GCN-NEXT: s_cmp_lg_u32 s0, 22 1752; GCN-NEXT: v_cndmask_b32_e32 v14, 1, v14, vcc 1753; GCN-NEXT: v_mov_b32_e32 v15, s14 1754; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1755; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1756; GCN-NEXT: v_lshlrev_b16_e32 v14, 1, v14 1757; GCN-NEXT: v_and_b32_e32 v15, 1, v15 1758; GCN-NEXT: s_cmp_lg_u32 s0, 21 1759; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1760; GCN-NEXT: v_mov_b32_e32 v15, s13 1761; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1762; GCN-NEXT: s_cmp_lg_u32 s0, 20 1763; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1764; GCN-NEXT: v_mov_b32_e32 v16, s12 1765; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1766; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1767; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1768; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1769; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1770; GCN-NEXT: v_lshlrev_b16_e32 v14, 2, v14 1771; GCN-NEXT: v_and_b32_e32 v15, 3, v15 1772; GCN-NEXT: s_cmp_lg_u32 s0, 19 1773; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1774; GCN-NEXT: v_mov_b32_e32 v15, s11 1775; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1776; GCN-NEXT: s_cmp_lg_u32 s0, 18 1777; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1778; GCN-NEXT: v_mov_b32_e32 v16, s10 1779; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1780; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1781; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1782; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1783; GCN-NEXT: s_cmp_lg_u32 s0, 17 1784; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1785; GCN-NEXT: v_mov_b32_e32 v16, s9 1786; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1787; GCN-NEXT: s_cmp_lg_u32 s0, 16 1788; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1789; GCN-NEXT: v_mov_b32_e32 v18, s8 1790; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1791; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1792; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1793; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1794; GCN-NEXT: v_or_b32_e32 v16, v18, v16 1795; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1796; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1797; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1798; GCN-NEXT: v_lshlrev_b16_e32 v14, 4, v14 1799; GCN-NEXT: v_and_b32_e32 v15, 15, v15 1800; GCN-NEXT: s_cmp_lg_u32 s0, 31 1801; GCN-NEXT: v_or_b32_e32 v14, v15, v14 1802; GCN-NEXT: v_lshrrev_b16_e64 v15, 7, s1 1803; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1804; GCN-NEXT: s_cmp_lg_u32 s0, 30 1805; GCN-NEXT: v_lshrrev_b16_e64 v16, 6, s1 1806; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1807; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1808; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1809; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1810; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1811; GCN-NEXT: s_cmp_lg_u32 s0, 29 1812; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1813; GCN-NEXT: v_lshrrev_b16_e64 v16, 5, s1 1814; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1815; GCN-NEXT: s_cmp_lg_u32 s0, 28 1816; GCN-NEXT: v_lshrrev_b16_e64 v18, 4, s1 1817; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1818; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1819; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1820; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1821; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1822; GCN-NEXT: v_or_b32_e32 v16, v18, v16 1823; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1824; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1825; GCN-NEXT: s_cmp_lg_u32 s0, 27 1826; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1827; GCN-NEXT: v_lshrrev_b16_e64 v16, 3, s1 1828; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1829; GCN-NEXT: s_cmp_lg_u32 s0, 26 1830; GCN-NEXT: v_lshrrev_b16_e64 v18, 2, s1 1831; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1832; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1833; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1834; GCN-NEXT: s_cmp_lg_u32 s0, 24 1835; GCN-NEXT: v_mov_b32_e32 v17, s1 1836; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1837; GCN-NEXT: v_and_b32_e32 v18, 1, v18 1838; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1839; GCN-NEXT: s_cmp_lg_u32 s0, 25 1840; GCN-NEXT: v_or_b32_e32 v16, v18, v16 1841; GCN-NEXT: v_lshrrev_b16_e64 v18, 1, s1 1842; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1843; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1844; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc 1845; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1846; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 1847; GCN-NEXT: v_or_b32_e32 v17, v17, v18 1848; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 1849; GCN-NEXT: v_and_b32_e32 v17, 3, v17 1850; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1851; GCN-NEXT: v_lshlrev_b16_e32 v15, 4, v15 1852; GCN-NEXT: v_and_b32_e32 v16, 15, v16 1853; GCN-NEXT: v_or_b32_sdwa v15, v16, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1854; GCN-NEXT: s_cmp_lg_u32 s0, 15 1855; GCN-NEXT: v_or_b32_sdwa v14, v14, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1856; GCN-NEXT: v_lshrrev_b16_e64 v15, 15, s4 1857; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1858; GCN-NEXT: s_cmp_lg_u32 s0, 14 1859; GCN-NEXT: v_lshrrev_b16_e64 v16, 14, s4 1860; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc 1861; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1862; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1863; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 1864; GCN-NEXT: v_and_b32_e32 v16, 1, v16 1865; GCN-NEXT: s_cmp_lg_u32 s0, 13 1866; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1867; GCN-NEXT: v_lshrrev_b16_e64 v16, 13, s4 1868; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1869; GCN-NEXT: s_cmp_lg_u32 s0, 12 1870; GCN-NEXT: v_lshrrev_b16_e64 v17, 12, s4 1871; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc 1872; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1873; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc 1874; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1875; GCN-NEXT: v_and_b32_e32 v17, 1, v17 1876; GCN-NEXT: v_or_b32_e32 v16, v17, v16 1877; GCN-NEXT: s_cmp_lg_u32 s0, 11 1878; GCN-NEXT: v_lshrrev_b16_e64 v18, 11, s4 1879; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 1880; GCN-NEXT: v_and_b32_e32 v16, 3, v16 1881; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1882; GCN-NEXT: s_cmp_lg_u32 s0, 10 1883; GCN-NEXT: v_lshrrev_b16_e64 v13, 10, s4 1884; GCN-NEXT: v_or_b32_e32 v15, v16, v15 1885; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v18, vcc 1886; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1887; GCN-NEXT: s_cmp_lg_u32 s0, 9 1888; GCN-NEXT: v_lshrrev_b16_e64 v12, 9, s4 1889; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc 1890; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1891; GCN-NEXT: s_cmp_lg_u32 s0, 8 1892; GCN-NEXT: v_lshrrev_b16_e64 v11, 8, s4 1893; GCN-NEXT: v_cndmask_b32_e32 v12, 1, v12, vcc 1894; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1895; GCN-NEXT: s_cmp_lg_u32 s0, 7 1896; GCN-NEXT: v_lshrrev_b16_e64 v10, 7, s4 1897; GCN-NEXT: v_cndmask_b32_e32 v11, 1, v11, vcc 1898; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1899; GCN-NEXT: s_cmp_lg_u32 s0, 6 1900; GCN-NEXT: v_lshrrev_b16_e64 v9, 6, s4 1901; GCN-NEXT: v_cndmask_b32_e32 v10, 1, v10, vcc 1902; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1903; GCN-NEXT: s_cmp_lg_u32 s0, 5 1904; GCN-NEXT: v_lshrrev_b16_e64 v8, 5, s4 1905; GCN-NEXT: v_cndmask_b32_e32 v9, 1, v9, vcc 1906; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1907; GCN-NEXT: s_cmp_lg_u32 s0, 4 1908; GCN-NEXT: v_lshrrev_b16_e64 v7, 4, s4 1909; GCN-NEXT: v_cndmask_b32_e32 v8, 1, v8, vcc 1910; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1911; GCN-NEXT: s_cmp_lg_u32 s0, 3 1912; GCN-NEXT: v_lshrrev_b16_e64 v6, 3, s4 1913; GCN-NEXT: v_cndmask_b32_e32 v7, 1, v7, vcc 1914; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1915; GCN-NEXT: s_cmp_lg_u32 s0, 2 1916; GCN-NEXT: v_lshrrev_b16_e64 v5, 2, s4 1917; GCN-NEXT: v_cndmask_b32_e32 v6, 1, v6, vcc 1918; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1919; GCN-NEXT: s_cmp_lg_u32 s0, 1 1920; GCN-NEXT: v_lshrrev_b16_e64 v4, 1, s4 1921; GCN-NEXT: v_cndmask_b32_e32 v5, 1, v5, vcc 1922; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1923; GCN-NEXT: s_cmp_lg_u32 s0, 0 1924; GCN-NEXT: v_mov_b32_e32 v0, s4 1925; GCN-NEXT: v_cndmask_b32_e32 v4, 1, v4, vcc 1926; GCN-NEXT: s_cselect_b64 vcc, -1, 0 1927; GCN-NEXT: v_cndmask_b32_e32 v0, 1, v0, vcc 1928; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 1929; GCN-NEXT: v_and_b32_e32 v13, 1, v13 1930; GCN-NEXT: v_lshlrev_b16_e32 v12, 1, v12 1931; GCN-NEXT: v_and_b32_e32 v11, 1, v11 1932; GCN-NEXT: v_lshlrev_b16_e32 v10, 1, v10 1933; GCN-NEXT: v_and_b32_e32 v9, 1, v9 1934; GCN-NEXT: v_lshlrev_b16_e32 v8, 1, v8 1935; GCN-NEXT: v_and_b32_e32 v7, 1, v7 1936; GCN-NEXT: v_lshlrev_b16_e32 v6, 1, v6 1937; GCN-NEXT: v_and_b32_e32 v5, 1, v5 1938; GCN-NEXT: v_lshlrev_b16_e32 v4, 1, v4 1939; GCN-NEXT: v_and_b32_e32 v0, 1, v0 1940; GCN-NEXT: v_or_b32_e32 v13, v13, v16 1941; GCN-NEXT: v_or_b32_e32 v11, v11, v12 1942; GCN-NEXT: v_or_b32_e32 v9, v9, v10 1943; GCN-NEXT: v_or_b32_e32 v7, v7, v8 1944; GCN-NEXT: v_or_b32_e32 v5, v5, v6 1945; GCN-NEXT: v_or_b32_e32 v0, v0, v4 1946; GCN-NEXT: v_lshlrev_b16_e32 v13, 2, v13 1947; GCN-NEXT: v_and_b32_e32 v11, 3, v11 1948; GCN-NEXT: v_lshlrev_b16_e32 v9, 2, v9 1949; GCN-NEXT: v_and_b32_e32 v7, 3, v7 1950; GCN-NEXT: v_lshlrev_b16_e32 v5, 2, v5 1951; GCN-NEXT: v_and_b32_e32 v0, 3, v0 1952; GCN-NEXT: v_or_b32_e32 v11, v11, v13 1953; GCN-NEXT: v_or_b32_e32 v7, v7, v9 1954; GCN-NEXT: v_or_b32_e32 v0, v0, v5 1955; GCN-NEXT: v_lshlrev_b16_e32 v15, 4, v15 1956; GCN-NEXT: v_and_b32_e32 v11, 15, v11 1957; GCN-NEXT: v_lshlrev_b16_e32 v7, 4, v7 1958; GCN-NEXT: v_and_b32_e32 v0, 15, v0 1959; GCN-NEXT: v_or_b32_sdwa v11, v11, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1960; GCN-NEXT: v_or_b32_e32 v0, v0, v7 1961; GCN-NEXT: v_or_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1962; GCN-NEXT: v_mov_b32_e32 v5, s3 1963; GCN-NEXT: v_or_b32_sdwa v0, v0, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1964; GCN-NEXT: v_mov_b32_e32 v4, s2 1965; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1966; GCN-NEXT: s_endpgm 1967entry: 1968 %v = insertelement <128 x i1> %vec, i1 1, i32 %sel 1969 store <128 x i1> %v, <128 x i1> addrspace(1)* %out 1970 ret void 1971} 1972 1973define amdgpu_ps <32 x float> @float32_inselt_vec(<32 x float> %vec, i32 %sel) { 1974; GCN-LABEL: float32_inselt_vec: 1975; GCN: ; %bb.0: ; %entry 1976; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v32 1977; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 2, v32 1978; GCN-NEXT: v_cmp_ne_u32_e64 s[2:3], 3, v32 1979; GCN-NEXT: v_cmp_ne_u32_e64 s[4:5], 4, v32 1980; GCN-NEXT: v_cmp_ne_u32_e64 s[6:7], 5, v32 1981; GCN-NEXT: v_cmp_ne_u32_e64 s[8:9], 6, v32 1982; GCN-NEXT: v_cmp_ne_u32_e64 s[10:11], 7, v32 1983; GCN-NEXT: v_cmp_ne_u32_e64 s[12:13], 8, v32 1984; GCN-NEXT: v_cmp_ne_u32_e64 s[14:15], 9, v32 1985; GCN-NEXT: v_cmp_ne_u32_e64 s[16:17], 10, v32 1986; GCN-NEXT: v_cmp_ne_u32_e64 s[18:19], 11, v32 1987; GCN-NEXT: v_cmp_ne_u32_e64 s[20:21], 12, v32 1988; GCN-NEXT: v_cmp_ne_u32_e64 s[22:23], 13, v32 1989; GCN-NEXT: v_cmp_ne_u32_e64 s[24:25], 14, v32 1990; GCN-NEXT: v_cmp_ne_u32_e64 s[26:27], 15, v32 1991; GCN-NEXT: v_cmp_ne_u32_e64 s[28:29], 16, v32 1992; GCN-NEXT: v_cmp_ne_u32_e64 s[30:31], 17, v32 1993; GCN-NEXT: v_cmp_ne_u32_e64 s[34:35], 18, v32 1994; GCN-NEXT: v_cmp_ne_u32_e64 s[36:37], 19, v32 1995; GCN-NEXT: v_cmp_ne_u32_e64 s[38:39], 20, v32 1996; GCN-NEXT: v_cmp_ne_u32_e64 s[40:41], 21, v32 1997; GCN-NEXT: v_cmp_ne_u32_e64 s[42:43], 22, v32 1998; GCN-NEXT: v_cmp_ne_u32_e64 s[44:45], 23, v32 1999; GCN-NEXT: v_cmp_ne_u32_e64 s[46:47], 24, v32 2000; GCN-NEXT: v_cmp_ne_u32_e64 s[48:49], 25, v32 2001; GCN-NEXT: v_cmp_ne_u32_e64 s[50:51], 26, v32 2002; GCN-NEXT: v_cmp_ne_u32_e64 s[52:53], 27, v32 2003; GCN-NEXT: v_cmp_ne_u32_e64 s[54:55], 28, v32 2004; GCN-NEXT: v_cmp_ne_u32_e64 s[56:57], 29, v32 2005; GCN-NEXT: v_cmp_ne_u32_e64 s[58:59], 30, v32 2006; GCN-NEXT: v_cmp_ne_u32_e64 s[60:61], 31, v32 2007; GCN-NEXT: v_cmp_ne_u32_e64 s[62:63], 0, v32 2008; GCN-NEXT: v_cndmask_b32_e64 v0, 1.0, v0, s[62:63] 2009; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc 2010; GCN-NEXT: v_cndmask_b32_e64 v2, 1.0, v2, s[0:1] 2011; GCN-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[2:3] 2012; GCN-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[4:5] 2013; GCN-NEXT: v_cndmask_b32_e64 v5, 1.0, v5, s[6:7] 2014; GCN-NEXT: v_cndmask_b32_e64 v6, 1.0, v6, s[8:9] 2015; GCN-NEXT: v_cndmask_b32_e64 v7, 1.0, v7, s[10:11] 2016; GCN-NEXT: v_cndmask_b32_e64 v8, 1.0, v8, s[12:13] 2017; GCN-NEXT: v_cndmask_b32_e64 v9, 1.0, v9, s[14:15] 2018; GCN-NEXT: v_cndmask_b32_e64 v10, 1.0, v10, s[16:17] 2019; GCN-NEXT: v_cndmask_b32_e64 v11, 1.0, v11, s[18:19] 2020; GCN-NEXT: v_cndmask_b32_e64 v12, 1.0, v12, s[20:21] 2021; GCN-NEXT: v_cndmask_b32_e64 v13, 1.0, v13, s[22:23] 2022; GCN-NEXT: v_cndmask_b32_e64 v14, 1.0, v14, s[24:25] 2023; GCN-NEXT: v_cndmask_b32_e64 v15, 1.0, v15, s[26:27] 2024; GCN-NEXT: v_cndmask_b32_e64 v16, 1.0, v16, s[28:29] 2025; GCN-NEXT: v_cndmask_b32_e64 v17, 1.0, v17, s[30:31] 2026; GCN-NEXT: v_cndmask_b32_e64 v18, 1.0, v18, s[34:35] 2027; GCN-NEXT: v_cndmask_b32_e64 v19, 1.0, v19, s[36:37] 2028; GCN-NEXT: v_cndmask_b32_e64 v20, 1.0, v20, s[38:39] 2029; GCN-NEXT: v_cndmask_b32_e64 v21, 1.0, v21, s[40:41] 2030; GCN-NEXT: v_cndmask_b32_e64 v22, 1.0, v22, s[42:43] 2031; GCN-NEXT: v_cndmask_b32_e64 v23, 1.0, v23, s[44:45] 2032; GCN-NEXT: v_cndmask_b32_e64 v24, 1.0, v24, s[46:47] 2033; GCN-NEXT: v_cndmask_b32_e64 v25, 1.0, v25, s[48:49] 2034; GCN-NEXT: v_cndmask_b32_e64 v26, 1.0, v26, s[50:51] 2035; GCN-NEXT: v_cndmask_b32_e64 v27, 1.0, v27, s[52:53] 2036; GCN-NEXT: v_cndmask_b32_e64 v28, 1.0, v28, s[54:55] 2037; GCN-NEXT: v_cndmask_b32_e64 v29, 1.0, v29, s[56:57] 2038; GCN-NEXT: v_cndmask_b32_e64 v30, 1.0, v30, s[58:59] 2039; GCN-NEXT: v_cndmask_b32_e64 v31, 1.0, v31, s[60:61] 2040; GCN-NEXT: ; return to shader part epilog 2041entry: 2042 %v = insertelement <32 x float> %vec, float 1.000000e+00, i32 %sel 2043 ret <32 x float> %v 2044} 2045 2046define <8 x double> @double8_inselt_vec(<8 x double> %vec, i32 %sel) { 2047; GCN-LABEL: double8_inselt_vec: 2048; GCN: ; %bb.0: ; %entry 2049; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2050; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 2051; GCN-NEXT: v_mov_b32_e32 v17, 0x3ff00000 2052; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 2053; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc 2054; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 2055; GCN-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc 2056; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc 2057; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 2058; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc 2059; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v17, vcc 2060; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 2061; GCN-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc 2062; GCN-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc 2063; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 2064; GCN-NEXT: v_cndmask_b32_e64 v8, v8, 0, vcc 2065; GCN-NEXT: v_cndmask_b32_e32 v9, v9, v17, vcc 2066; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 2067; GCN-NEXT: v_cndmask_b32_e64 v10, v10, 0, vcc 2068; GCN-NEXT: v_cndmask_b32_e32 v11, v11, v17, vcc 2069; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 2070; GCN-NEXT: v_cndmask_b32_e64 v12, v12, 0, vcc 2071; GCN-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc 2072; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 2073; GCN-NEXT: v_cndmask_b32_e64 v14, v14, 0, vcc 2074; GCN-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc 2075; GCN-NEXT: s_setpc_b64 s[30:31] 2076entry: 2077 %v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel 2078 ret <8 x double> %v 2079} 2080