1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 6; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s 7 8define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) { 9; GCN-LABEL: extractelement_sgpr_v4i8_sgpr_idx: 10; GCN: ; %bb.0: 11; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 12; GCN-NEXT: s_waitcnt lgkmcnt(0) 13; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 14; GCN-NEXT: s_lshr_b32 s1, s0, 24 15; GCN-NEXT: s_and_b32 s2, s0, 0xff 16; GCN-NEXT: s_lshl_b32 s3, s3, 8 17; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 18; GCN-NEXT: s_or_b32 s2, s2, s3 19; GCN-NEXT: s_lshl_b32 s0, s0, 16 20; GCN-NEXT: s_or_b32 s0, s2, s0 21; GCN-NEXT: s_lshl_b32 s1, s1, 24 22; GCN-NEXT: s_or_b32 s0, s0, s1 23; GCN-NEXT: s_and_b32 s1, s4, 3 24; GCN-NEXT: s_lshl_b32 s1, s1, 3 25; GCN-NEXT: s_lshr_b32 s0, s0, s1 26; GCN-NEXT: ; return to shader part epilog 27; 28; GFX10-LABEL: extractelement_sgpr_v4i8_sgpr_idx: 29; GFX10: ; %bb.0: 30; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 31; GFX10-NEXT: s_waitcnt lgkmcnt(0) 32; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80008 33; GFX10-NEXT: s_lshr_b32 s1, s0, 24 34; GFX10-NEXT: s_and_b32 s2, s0, 0xff 35; GFX10-NEXT: s_bfe_u32 s0, s0, 0x80010 36; GFX10-NEXT: s_lshl_b32 s3, s3, 8 37; GFX10-NEXT: s_lshl_b32 s0, s0, 16 38; GFX10-NEXT: s_or_b32 s2, s2, s3 39; GFX10-NEXT: s_lshl_b32 s1, s1, 24 40; GFX10-NEXT: s_or_b32 s0, s2, s0 41; GFX10-NEXT: s_and_b32 s2, s4, 3 42; GFX10-NEXT: s_or_b32 s0, s0, s1 43; GFX10-NEXT: s_lshl_b32 s1, s2, 3 44; GFX10-NEXT: s_lshr_b32 s0, s0, s1 45; GFX10-NEXT: ; return to shader part epilog 46; 47; GFX11-LABEL: extractelement_sgpr_v4i8_sgpr_idx: 48; GFX11: ; %bb.0: 49; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0 50; GFX11-NEXT: s_waitcnt lgkmcnt(0) 51; GFX11-NEXT: s_bfe_u32 s3, s0, 0x80008 52; GFX11-NEXT: s_lshr_b32 s1, s0, 24 53; GFX11-NEXT: s_and_b32 s2, s0, 0xff 54; GFX11-NEXT: s_bfe_u32 s0, s0, 0x80010 55; GFX11-NEXT: s_lshl_b32 s3, s3, 8 56; GFX11-NEXT: s_lshl_b32 s0, s0, 16 57; GFX11-NEXT: s_or_b32 s2, s2, s3 58; GFX11-NEXT: s_lshl_b32 s1, s1, 24 59; GFX11-NEXT: s_or_b32 s0, s2, s0 60; GFX11-NEXT: s_and_b32 s2, s4, 3 61; GFX11-NEXT: s_or_b32 s0, s0, s1 62; GFX11-NEXT: s_lshl_b32 s1, s2, 3 63; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 64; GFX11-NEXT: s_lshr_b32 s0, s0, s1 65; GFX11-NEXT: ; return to shader part epilog 66 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 67 %element = extractelement <4 x i8> %vector, i32 %idx 68 ret i8 %element 69} 70 71define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 inreg %idx) { 72; GFX9-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 73; GFX9: ; %bb.0: 74; GFX9-NEXT: global_load_dword v0, v[0:1], off 75; GFX9-NEXT: v_mov_b32_e32 v2, 8 76; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 77; GFX9-NEXT: v_mov_b32_e32 v3, 16 78; GFX9-NEXT: s_and_b32 s0, s2, 3 79; GFX9-NEXT: s_lshl_b32 s0, s0, 3 80; GFX9-NEXT: s_waitcnt vmcnt(0) 81; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 82; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 83; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 84; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 85; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 86; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 87; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 88; GFX9-NEXT: v_readfirstlane_b32 s0, v0 89; GFX9-NEXT: ; return to shader part epilog 90; 91; GFX8-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 92; GFX8: ; %bb.0: 93; GFX8-NEXT: flat_load_dword v0, v[0:1] 94; GFX8-NEXT: v_mov_b32_e32 v1, 8 95; GFX8-NEXT: v_mov_b32_e32 v2, 16 96; GFX8-NEXT: s_and_b32 s0, s2, 3 97; GFX8-NEXT: s_lshl_b32 s0, s0, 3 98; GFX8-NEXT: s_waitcnt vmcnt(0) 99; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 100; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 101; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 102; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 103; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 104; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 105; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 106; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 107; GFX8-NEXT: v_readfirstlane_b32 s0, v0 108; GFX8-NEXT: ; return to shader part epilog 109; 110; GFX7-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 111; GFX7: ; %bb.0: 112; GFX7-NEXT: s_mov_b32 s6, 0 113; GFX7-NEXT: s_mov_b32 s7, 0xf000 114; GFX7-NEXT: s_mov_b64 s[4:5], 0 115; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 116; GFX7-NEXT: s_and_b32 s0, s2, 3 117; GFX7-NEXT: s_lshl_b32 s0, s0, 3 118; GFX7-NEXT: s_waitcnt vmcnt(0) 119; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 120; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 121; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 122; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 123; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 124; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 125; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 126; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 127; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 128; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 129; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 130; GFX7-NEXT: v_readfirstlane_b32 s0, v0 131; GFX7-NEXT: ; return to shader part epilog 132; 133; GFX10-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 134; GFX10: ; %bb.0: 135; GFX10-NEXT: global_load_dword v0, v[0:1], off 136; GFX10-NEXT: v_mov_b32_e32 v1, 8 137; GFX10-NEXT: v_mov_b32_e32 v2, 16 138; GFX10-NEXT: s_and_b32 s0, s2, 3 139; GFX10-NEXT: s_lshl_b32 s0, s0, 3 140; GFX10-NEXT: s_waitcnt vmcnt(0) 141; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 142; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 143; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 144; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 145; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 146; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 147; GFX10-NEXT: v_lshrrev_b32_e32 v0, s0, v0 148; GFX10-NEXT: v_readfirstlane_b32 s0, v0 149; GFX10-NEXT: ; return to shader part epilog 150; 151; GFX11-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 152; GFX11: ; %bb.0: 153; GFX11-NEXT: global_load_b32 v0, v[0:1], off 154; GFX11-NEXT: s_and_b32 s0, s2, 3 155; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_3) 156; GFX11-NEXT: s_lshl_b32 s0, s0, 3 157; GFX11-NEXT: s_waitcnt vmcnt(0) 158; GFX11-NEXT: v_bfe_u32 v1, v0, 8, 8 159; GFX11-NEXT: v_bfe_u32 v2, v0, 16, 8 160; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 161; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v1 162; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 163; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 164; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1 165; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) 166; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 167; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 168; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 169; GFX11-NEXT: v_lshrrev_b32_e32 v0, s0, v0 170; GFX11-NEXT: v_readfirstlane_b32 s0, v0 171; GFX11-NEXT: ; return to shader part epilog 172 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 173 %element = extractelement <4 x i8> %vector, i32 %idx 174 ret i8 %element 175} 176 177define i8 @extractelement_vgpr_v4i8_vgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 %idx) { 178; GFX9-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 179; GFX9: ; %bb.0: 180; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 181; GFX9-NEXT: global_load_dword v0, v[0:1], off 182; GFX9-NEXT: v_mov_b32_e32 v3, 8 183; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 184; GFX9-NEXT: v_mov_b32_e32 v4, 16 185; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 186; GFX9-NEXT: s_waitcnt vmcnt(0) 187; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v0 188; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 189; GFX9-NEXT: v_lshlrev_b32_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 190; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v3 191; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v5 192; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 193; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v2 194; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 195; GFX9-NEXT: s_setpc_b64 s[30:31] 196; 197; GFX8-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 198; GFX8: ; %bb.0: 199; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 200; GFX8-NEXT: flat_load_dword v0, v[0:1] 201; GFX8-NEXT: v_mov_b32_e32 v1, 8 202; GFX8-NEXT: v_mov_b32_e32 v3, 16 203; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 204; GFX8-NEXT: s_waitcnt vmcnt(0) 205; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 206; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 207; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 208; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 209; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 210; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 211; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 212; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v2 213; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 214; GFX8-NEXT: s_setpc_b64 s[30:31] 215; 216; GFX7-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 217; GFX7: ; %bb.0: 218; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 219; GFX7-NEXT: s_mov_b32 s6, 0 220; GFX7-NEXT: s_mov_b32 s7, 0xf000 221; GFX7-NEXT: s_mov_b64 s[4:5], 0 222; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 223; GFX7-NEXT: v_and_b32_e32 v1, 3, v2 224; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v1 225; GFX7-NEXT: s_waitcnt vmcnt(0) 226; GFX7-NEXT: v_bfe_u32 v4, v0, 8, 8 227; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 228; GFX7-NEXT: v_and_b32_e32 v3, 0xff, v0 229; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 230; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 231; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 232; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 233; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 234; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 235; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 236; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 237; GFX7-NEXT: s_setpc_b64 s[30:31] 238; 239; GFX10-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 240; GFX10: ; %bb.0: 241; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 242; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 243; GFX10-NEXT: global_load_dword v0, v[0:1], off 244; GFX10-NEXT: v_mov_b32_e32 v1, 8 245; GFX10-NEXT: v_mov_b32_e32 v3, 16 246; GFX10-NEXT: v_and_b32_e32 v2, 3, v2 247; GFX10-NEXT: s_waitcnt vmcnt(0) 248; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 249; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 250; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 251; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 252; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v4 253; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 254; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v2 255; GFX10-NEXT: v_lshrrev_b32_e32 v0, v1, v0 256; GFX10-NEXT: s_setpc_b64 s[30:31] 257; 258; GFX11-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 259; GFX11: ; %bb.0: 260; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 261; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 262; GFX11-NEXT: global_load_b32 v0, v[0:1], off 263; GFX11-NEXT: s_waitcnt vmcnt(0) 264; GFX11-NEXT: v_bfe_u32 v1, v0, 8, 8 265; GFX11-NEXT: v_bfe_u32 v3, v0, 16, 8 266; GFX11-NEXT: v_lshrrev_b32_e32 v4, 24, v0 267; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 268; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v1 269; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 270; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 271; GFX11-NEXT: v_lshlrev_b32_e32 v4, 24, v4 272; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1 273; GFX11-NEXT: v_and_b32_e32 v1, 3, v2 274; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 275; GFX11-NEXT: v_or3_b32 v0, v0, v3, v4 276; GFX11-NEXT: v_lshlrev_b32_e32 v1, 3, v1 277; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 278; GFX11-NEXT: v_lshrrev_b32_e32 v0, v1, v0 279; GFX11-NEXT: s_setpc_b64 s[30:31] 280 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 281 %element = extractelement <4 x i8> %vector, i32 %idx 282 ret i8 %element 283} 284 285define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 %idx) { 286; GFX9-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 287; GFX9: ; %bb.0: 288; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 289; GFX9-NEXT: v_and_b32_e32 v0, 3, v0 290; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 291; GFX9-NEXT: s_waitcnt lgkmcnt(0) 292; GFX9-NEXT: s_bfe_u32 s3, s0, 0x80008 293; GFX9-NEXT: s_lshr_b32 s1, s0, 24 294; GFX9-NEXT: s_and_b32 s2, s0, 0xff 295; GFX9-NEXT: s_lshl_b32 s3, s3, 8 296; GFX9-NEXT: s_bfe_u32 s0, s0, 0x80010 297; GFX9-NEXT: s_or_b32 s2, s2, s3 298; GFX9-NEXT: s_lshl_b32 s0, s0, 16 299; GFX9-NEXT: s_or_b32 s0, s2, s0 300; GFX9-NEXT: s_lshl_b32 s1, s1, 24 301; GFX9-NEXT: s_or_b32 s0, s0, s1 302; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s0 303; GFX9-NEXT: v_readfirstlane_b32 s0, v0 304; GFX9-NEXT: ; return to shader part epilog 305; 306; GFX8-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 307; GFX8: ; %bb.0: 308; GFX8-NEXT: s_load_dword s0, s[2:3], 0x0 309; GFX8-NEXT: v_and_b32_e32 v0, 3, v0 310; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0 311; GFX8-NEXT: s_waitcnt lgkmcnt(0) 312; GFX8-NEXT: s_bfe_u32 s3, s0, 0x80008 313; GFX8-NEXT: s_lshr_b32 s1, s0, 24 314; GFX8-NEXT: s_and_b32 s2, s0, 0xff 315; GFX8-NEXT: s_lshl_b32 s3, s3, 8 316; GFX8-NEXT: s_bfe_u32 s0, s0, 0x80010 317; GFX8-NEXT: s_or_b32 s2, s2, s3 318; GFX8-NEXT: s_lshl_b32 s0, s0, 16 319; GFX8-NEXT: s_or_b32 s0, s2, s0 320; GFX8-NEXT: s_lshl_b32 s1, s1, 24 321; GFX8-NEXT: s_or_b32 s0, s0, s1 322; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s0 323; GFX8-NEXT: v_readfirstlane_b32 s0, v0 324; GFX8-NEXT: ; return to shader part epilog 325; 326; GFX7-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 327; GFX7: ; %bb.0: 328; GFX7-NEXT: s_load_dword s0, s[2:3], 0x0 329; GFX7-NEXT: v_and_b32_e32 v0, 3, v0 330; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0 331; GFX7-NEXT: s_waitcnt lgkmcnt(0) 332; GFX7-NEXT: s_bfe_u32 s3, s0, 0x80008 333; GFX7-NEXT: s_lshr_b32 s1, s0, 24 334; GFX7-NEXT: s_and_b32 s2, s0, 0xff 335; GFX7-NEXT: s_lshl_b32 s3, s3, 8 336; GFX7-NEXT: s_bfe_u32 s0, s0, 0x80010 337; GFX7-NEXT: s_or_b32 s2, s2, s3 338; GFX7-NEXT: s_lshl_b32 s0, s0, 16 339; GFX7-NEXT: s_or_b32 s0, s2, s0 340; GFX7-NEXT: s_lshl_b32 s1, s1, 24 341; GFX7-NEXT: s_or_b32 s0, s0, s1 342; GFX7-NEXT: v_lshr_b32_e32 v0, s0, v0 343; GFX7-NEXT: v_readfirstlane_b32 s0, v0 344; GFX7-NEXT: ; return to shader part epilog 345; 346; GFX10-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 347; GFX10: ; %bb.0: 348; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 349; GFX10-NEXT: v_and_b32_e32 v0, 3, v0 350; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 351; GFX10-NEXT: s_waitcnt lgkmcnt(0) 352; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 353; GFX10-NEXT: s_and_b32 s1, s0, 0xff 354; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 355; GFX10-NEXT: s_lshl_b32 s2, s2, 8 356; GFX10-NEXT: s_lshl_b32 s3, s3, 16 357; GFX10-NEXT: s_or_b32 s1, s1, s2 358; GFX10-NEXT: s_lshr_b32 s0, s0, 24 359; GFX10-NEXT: s_or_b32 s1, s1, s3 360; GFX10-NEXT: s_lshl_b32 s0, s0, 24 361; GFX10-NEXT: s_or_b32 s0, s1, s0 362; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s0 363; GFX10-NEXT: v_readfirstlane_b32 s0, v0 364; GFX10-NEXT: ; return to shader part epilog 365; 366; GFX11-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 367; GFX11: ; %bb.0: 368; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0 369; GFX11-NEXT: v_and_b32_e32 v0, 3, v0 370; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 371; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0 372; GFX11-NEXT: s_waitcnt lgkmcnt(0) 373; GFX11-NEXT: s_bfe_u32 s2, s0, 0x80008 374; GFX11-NEXT: s_and_b32 s1, s0, 0xff 375; GFX11-NEXT: s_bfe_u32 s3, s0, 0x80010 376; GFX11-NEXT: s_lshl_b32 s2, s2, 8 377; GFX11-NEXT: s_lshl_b32 s3, s3, 16 378; GFX11-NEXT: s_or_b32 s1, s1, s2 379; GFX11-NEXT: s_lshr_b32 s0, s0, 24 380; GFX11-NEXT: s_or_b32 s1, s1, s3 381; GFX11-NEXT: s_lshl_b32 s0, s0, 24 382; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 383; GFX11-NEXT: s_or_b32 s0, s1, s0 384; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s0 385; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 386; GFX11-NEXT: v_readfirstlane_b32 s0, v0 387; GFX11-NEXT: ; return to shader part epilog 388 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 389 %element = extractelement <4 x i8> %vector, i32 %idx 390 ret i8 %element 391} 392 393define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(<4 x i8> addrspace(4)* inreg %ptr) { 394; GCN-LABEL: extractelement_sgpr_v4i8_idx0: 395; GCN: ; %bb.0: 396; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 397; GCN-NEXT: s_waitcnt lgkmcnt(0) 398; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 399; GCN-NEXT: s_lshr_b32 s1, s0, 24 400; GCN-NEXT: s_and_b32 s2, s0, 0xff 401; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 402; GCN-NEXT: s_lshl_b32 s3, s3, 8 403; GCN-NEXT: s_or_b32 s2, s2, s3 404; GCN-NEXT: s_lshl_b32 s0, s0, 16 405; GCN-NEXT: s_or_b32 s0, s2, s0 406; GCN-NEXT: s_lshl_b32 s1, s1, 24 407; GCN-NEXT: s_or_b32 s0, s0, s1 408; GCN-NEXT: ; return to shader part epilog 409; 410; GFX10-LABEL: extractelement_sgpr_v4i8_idx0: 411; GFX10: ; %bb.0: 412; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 413; GFX10-NEXT: s_waitcnt lgkmcnt(0) 414; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 415; GFX10-NEXT: s_and_b32 s1, s0, 0xff 416; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 417; GFX10-NEXT: s_lshl_b32 s2, s2, 8 418; GFX10-NEXT: s_lshl_b32 s3, s3, 16 419; GFX10-NEXT: s_or_b32 s1, s1, s2 420; GFX10-NEXT: s_lshr_b32 s0, s0, 24 421; GFX10-NEXT: s_or_b32 s1, s1, s3 422; GFX10-NEXT: s_lshl_b32 s0, s0, 24 423; GFX10-NEXT: s_or_b32 s0, s1, s0 424; GFX10-NEXT: ; return to shader part epilog 425; 426; GFX11-LABEL: extractelement_sgpr_v4i8_idx0: 427; GFX11: ; %bb.0: 428; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0 429; GFX11-NEXT: s_waitcnt lgkmcnt(0) 430; GFX11-NEXT: s_bfe_u32 s2, s0, 0x80008 431; GFX11-NEXT: s_and_b32 s1, s0, 0xff 432; GFX11-NEXT: s_bfe_u32 s3, s0, 0x80010 433; GFX11-NEXT: s_lshl_b32 s2, s2, 8 434; GFX11-NEXT: s_lshl_b32 s3, s3, 16 435; GFX11-NEXT: s_or_b32 s1, s1, s2 436; GFX11-NEXT: s_lshr_b32 s0, s0, 24 437; GFX11-NEXT: s_or_b32 s1, s1, s3 438; GFX11-NEXT: s_lshl_b32 s0, s0, 24 439; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 440; GFX11-NEXT: s_or_b32 s0, s1, s0 441; GFX11-NEXT: ; return to shader part epilog 442 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 443 %element = extractelement <4 x i8> %vector, i32 0 444 ret i8 %element 445} 446 447define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(<4 x i8> addrspace(4)* inreg %ptr) { 448; GCN-LABEL: extractelement_sgpr_v4i8_idx1: 449; GCN: ; %bb.0: 450; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 451; GCN-NEXT: s_waitcnt lgkmcnt(0) 452; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 453; GCN-NEXT: s_lshr_b32 s1, s0, 24 454; GCN-NEXT: s_and_b32 s2, s0, 0xff 455; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 456; GCN-NEXT: s_lshl_b32 s3, s3, 8 457; GCN-NEXT: s_or_b32 s2, s2, s3 458; GCN-NEXT: s_lshl_b32 s0, s0, 16 459; GCN-NEXT: s_or_b32 s0, s2, s0 460; GCN-NEXT: s_lshl_b32 s1, s1, 24 461; GCN-NEXT: s_or_b32 s0, s0, s1 462; GCN-NEXT: s_lshr_b32 s0, s0, 8 463; GCN-NEXT: ; return to shader part epilog 464; 465; GFX10-LABEL: extractelement_sgpr_v4i8_idx1: 466; GFX10: ; %bb.0: 467; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 468; GFX10-NEXT: s_waitcnt lgkmcnt(0) 469; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 470; GFX10-NEXT: s_and_b32 s1, s0, 0xff 471; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 472; GFX10-NEXT: s_lshl_b32 s2, s2, 8 473; GFX10-NEXT: s_lshl_b32 s3, s3, 16 474; GFX10-NEXT: s_or_b32 s1, s1, s2 475; GFX10-NEXT: s_lshr_b32 s0, s0, 24 476; GFX10-NEXT: s_or_b32 s1, s1, s3 477; GFX10-NEXT: s_lshl_b32 s0, s0, 24 478; GFX10-NEXT: s_or_b32 s0, s1, s0 479; GFX10-NEXT: s_lshr_b32 s0, s0, 8 480; GFX10-NEXT: ; return to shader part epilog 481; 482; GFX11-LABEL: extractelement_sgpr_v4i8_idx1: 483; GFX11: ; %bb.0: 484; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0 485; GFX11-NEXT: s_waitcnt lgkmcnt(0) 486; GFX11-NEXT: s_bfe_u32 s2, s0, 0x80008 487; GFX11-NEXT: s_and_b32 s1, s0, 0xff 488; GFX11-NEXT: s_bfe_u32 s3, s0, 0x80010 489; GFX11-NEXT: s_lshl_b32 s2, s2, 8 490; GFX11-NEXT: s_lshl_b32 s3, s3, 16 491; GFX11-NEXT: s_or_b32 s1, s1, s2 492; GFX11-NEXT: s_lshr_b32 s0, s0, 24 493; GFX11-NEXT: s_or_b32 s1, s1, s3 494; GFX11-NEXT: s_lshl_b32 s0, s0, 24 495; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 496; GFX11-NEXT: s_or_b32 s0, s1, s0 497; GFX11-NEXT: s_lshr_b32 s0, s0, 8 498; GFX11-NEXT: ; return to shader part epilog 499 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 500 %element = extractelement <4 x i8> %vector, i32 1 501 ret i8 %element 502} 503 504define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(<4 x i8> addrspace(4)* inreg %ptr) { 505; GCN-LABEL: extractelement_sgpr_v4i8_idx2: 506; GCN: ; %bb.0: 507; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 508; GCN-NEXT: s_waitcnt lgkmcnt(0) 509; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 510; GCN-NEXT: s_lshr_b32 s1, s0, 24 511; GCN-NEXT: s_and_b32 s2, s0, 0xff 512; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 513; GCN-NEXT: s_lshl_b32 s3, s3, 8 514; GCN-NEXT: s_or_b32 s2, s2, s3 515; GCN-NEXT: s_lshl_b32 s0, s0, 16 516; GCN-NEXT: s_or_b32 s0, s2, s0 517; GCN-NEXT: s_lshl_b32 s1, s1, 24 518; GCN-NEXT: s_or_b32 s0, s0, s1 519; GCN-NEXT: s_lshr_b32 s0, s0, 16 520; GCN-NEXT: ; return to shader part epilog 521; 522; GFX10-LABEL: extractelement_sgpr_v4i8_idx2: 523; GFX10: ; %bb.0: 524; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 525; GFX10-NEXT: s_waitcnt lgkmcnt(0) 526; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 527; GFX10-NEXT: s_and_b32 s1, s0, 0xff 528; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 529; GFX10-NEXT: s_lshl_b32 s2, s2, 8 530; GFX10-NEXT: s_lshl_b32 s3, s3, 16 531; GFX10-NEXT: s_or_b32 s1, s1, s2 532; GFX10-NEXT: s_lshr_b32 s0, s0, 24 533; GFX10-NEXT: s_or_b32 s1, s1, s3 534; GFX10-NEXT: s_lshl_b32 s0, s0, 24 535; GFX10-NEXT: s_or_b32 s0, s1, s0 536; GFX10-NEXT: s_lshr_b32 s0, s0, 16 537; GFX10-NEXT: ; return to shader part epilog 538; 539; GFX11-LABEL: extractelement_sgpr_v4i8_idx2: 540; GFX11: ; %bb.0: 541; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0 542; GFX11-NEXT: s_waitcnt lgkmcnt(0) 543; GFX11-NEXT: s_bfe_u32 s2, s0, 0x80008 544; GFX11-NEXT: s_and_b32 s1, s0, 0xff 545; GFX11-NEXT: s_bfe_u32 s3, s0, 0x80010 546; GFX11-NEXT: s_lshl_b32 s2, s2, 8 547; GFX11-NEXT: s_lshl_b32 s3, s3, 16 548; GFX11-NEXT: s_or_b32 s1, s1, s2 549; GFX11-NEXT: s_lshr_b32 s0, s0, 24 550; GFX11-NEXT: s_or_b32 s1, s1, s3 551; GFX11-NEXT: s_lshl_b32 s0, s0, 24 552; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 553; GFX11-NEXT: s_or_b32 s0, s1, s0 554; GFX11-NEXT: s_lshr_b32 s0, s0, 16 555; GFX11-NEXT: ; return to shader part epilog 556 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 557 %element = extractelement <4 x i8> %vector, i32 2 558 ret i8 %element 559} 560 561define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(<4 x i8> addrspace(4)* inreg %ptr) { 562; GCN-LABEL: extractelement_sgpr_v4i8_idx3: 563; GCN: ; %bb.0: 564; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 565; GCN-NEXT: s_waitcnt lgkmcnt(0) 566; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 567; GCN-NEXT: s_lshr_b32 s1, s0, 24 568; GCN-NEXT: s_and_b32 s2, s0, 0xff 569; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 570; GCN-NEXT: s_lshl_b32 s3, s3, 8 571; GCN-NEXT: s_or_b32 s2, s2, s3 572; GCN-NEXT: s_lshl_b32 s0, s0, 16 573; GCN-NEXT: s_or_b32 s0, s2, s0 574; GCN-NEXT: s_lshl_b32 s1, s1, 24 575; GCN-NEXT: s_or_b32 s0, s0, s1 576; GCN-NEXT: s_lshr_b32 s0, s0, 24 577; GCN-NEXT: ; return to shader part epilog 578; 579; GFX10-LABEL: extractelement_sgpr_v4i8_idx3: 580; GFX10: ; %bb.0: 581; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 582; GFX10-NEXT: s_waitcnt lgkmcnt(0) 583; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 584; GFX10-NEXT: s_and_b32 s1, s0, 0xff 585; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 586; GFX10-NEXT: s_lshl_b32 s2, s2, 8 587; GFX10-NEXT: s_lshl_b32 s3, s3, 16 588; GFX10-NEXT: s_or_b32 s1, s1, s2 589; GFX10-NEXT: s_lshr_b32 s0, s0, 24 590; GFX10-NEXT: s_or_b32 s1, s1, s3 591; GFX10-NEXT: s_lshl_b32 s0, s0, 24 592; GFX10-NEXT: s_or_b32 s0, s1, s0 593; GFX10-NEXT: s_lshr_b32 s0, s0, 24 594; GFX10-NEXT: ; return to shader part epilog 595; 596; GFX11-LABEL: extractelement_sgpr_v4i8_idx3: 597; GFX11: ; %bb.0: 598; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0 599; GFX11-NEXT: s_waitcnt lgkmcnt(0) 600; GFX11-NEXT: s_bfe_u32 s2, s0, 0x80008 601; GFX11-NEXT: s_and_b32 s1, s0, 0xff 602; GFX11-NEXT: s_bfe_u32 s3, s0, 0x80010 603; GFX11-NEXT: s_lshl_b32 s2, s2, 8 604; GFX11-NEXT: s_lshl_b32 s3, s3, 16 605; GFX11-NEXT: s_or_b32 s1, s1, s2 606; GFX11-NEXT: s_lshr_b32 s0, s0, 24 607; GFX11-NEXT: s_or_b32 s1, s1, s3 608; GFX11-NEXT: s_lshl_b32 s0, s0, 24 609; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 610; GFX11-NEXT: s_or_b32 s0, s1, s0 611; GFX11-NEXT: s_lshr_b32 s0, s0, 24 612; GFX11-NEXT: ; return to shader part epilog 613 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 614 %element = extractelement <4 x i8> %vector, i32 3 615 ret i8 %element 616} 617 618define i8 @extractelement_vgpr_v4i8_idx0(<4 x i8> addrspace(1)* %ptr) { 619; GFX9-LABEL: extractelement_vgpr_v4i8_idx0: 620; GFX9: ; %bb.0: 621; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 622; GFX9-NEXT: global_load_dword v0, v[0:1], off 623; GFX9-NEXT: v_mov_b32_e32 v2, 8 624; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 625; GFX9-NEXT: v_mov_b32_e32 v3, 16 626; GFX9-NEXT: s_waitcnt vmcnt(0) 627; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 628; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 629; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 630; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 631; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 632; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 633; GFX9-NEXT: s_setpc_b64 s[30:31] 634; 635; GFX8-LABEL: extractelement_vgpr_v4i8_idx0: 636; GFX8: ; %bb.0: 637; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 638; GFX8-NEXT: flat_load_dword v0, v[0:1] 639; GFX8-NEXT: v_mov_b32_e32 v1, 8 640; GFX8-NEXT: v_mov_b32_e32 v2, 16 641; GFX8-NEXT: s_waitcnt vmcnt(0) 642; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 643; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 644; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 645; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 646; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 647; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 648; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 649; GFX8-NEXT: s_setpc_b64 s[30:31] 650; 651; GFX7-LABEL: extractelement_vgpr_v4i8_idx0: 652; GFX7: ; %bb.0: 653; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 654; GFX7-NEXT: s_mov_b32 s6, 0 655; GFX7-NEXT: s_mov_b32 s7, 0xf000 656; GFX7-NEXT: s_mov_b64 s[4:5], 0 657; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 658; GFX7-NEXT: s_waitcnt vmcnt(0) 659; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 660; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 661; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 662; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 663; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 664; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 665; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 666; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 667; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 668; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 669; GFX7-NEXT: s_setpc_b64 s[30:31] 670; 671; GFX10-LABEL: extractelement_vgpr_v4i8_idx0: 672; GFX10: ; %bb.0: 673; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 674; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 675; GFX10-NEXT: global_load_dword v0, v[0:1], off 676; GFX10-NEXT: v_mov_b32_e32 v1, 8 677; GFX10-NEXT: v_mov_b32_e32 v2, 16 678; GFX10-NEXT: s_waitcnt vmcnt(0) 679; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 680; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 681; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 682; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 683; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 684; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 685; GFX10-NEXT: s_setpc_b64 s[30:31] 686; 687; GFX11-LABEL: extractelement_vgpr_v4i8_idx0: 688; GFX11: ; %bb.0: 689; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 690; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 691; GFX11-NEXT: global_load_b32 v0, v[0:1], off 692; GFX11-NEXT: s_waitcnt vmcnt(0) 693; GFX11-NEXT: v_bfe_u32 v1, v0, 8, 8 694; GFX11-NEXT: v_bfe_u32 v2, v0, 16, 8 695; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 696; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 697; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v1 698; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 699; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 700; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1 701; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 702; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 703; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 704; GFX11-NEXT: s_setpc_b64 s[30:31] 705 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 706 %element = extractelement <4 x i8> %vector, i32 0 707 ret i8 %element 708} 709 710define i8 @extractelement_vgpr_v4i8_idx1(<4 x i8> addrspace(1)* %ptr) { 711; GFX9-LABEL: extractelement_vgpr_v4i8_idx1: 712; GFX9: ; %bb.0: 713; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 714; GFX9-NEXT: global_load_dword v0, v[0:1], off 715; GFX9-NEXT: s_mov_b32 s4, 8 716; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 717; GFX9-NEXT: v_mov_b32_e32 v2, 16 718; GFX9-NEXT: s_waitcnt vmcnt(0) 719; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 720; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 721; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 722; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v4 723; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 724; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 725; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 726; GFX9-NEXT: s_setpc_b64 s[30:31] 727; 728; GFX8-LABEL: extractelement_vgpr_v4i8_idx1: 729; GFX8: ; %bb.0: 730; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 731; GFX8-NEXT: flat_load_dword v0, v[0:1] 732; GFX8-NEXT: v_mov_b32_e32 v1, 8 733; GFX8-NEXT: v_mov_b32_e32 v2, 16 734; GFX8-NEXT: s_waitcnt vmcnt(0) 735; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 736; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 737; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 738; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 739; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 740; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 741; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 742; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 743; GFX8-NEXT: s_setpc_b64 s[30:31] 744; 745; GFX7-LABEL: extractelement_vgpr_v4i8_idx1: 746; GFX7: ; %bb.0: 747; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 748; GFX7-NEXT: s_mov_b32 s6, 0 749; GFX7-NEXT: s_mov_b32 s7, 0xf000 750; GFX7-NEXT: s_mov_b64 s[4:5], 0 751; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 752; GFX7-NEXT: s_waitcnt vmcnt(0) 753; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 754; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 755; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 756; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 757; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 758; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 759; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 760; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 761; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 762; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 763; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 764; GFX7-NEXT: s_setpc_b64 s[30:31] 765; 766; GFX10-LABEL: extractelement_vgpr_v4i8_idx1: 767; GFX10: ; %bb.0: 768; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 769; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 770; GFX10-NEXT: global_load_dword v0, v[0:1], off 771; GFX10-NEXT: s_mov_b32 s4, 8 772; GFX10-NEXT: v_mov_b32_e32 v1, 16 773; GFX10-NEXT: s_waitcnt vmcnt(0) 774; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 775; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 776; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 777; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v2 778; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 779; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 780; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 781; GFX10-NEXT: s_setpc_b64 s[30:31] 782; 783; GFX11-LABEL: extractelement_vgpr_v4i8_idx1: 784; GFX11: ; %bb.0: 785; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 786; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 787; GFX11-NEXT: global_load_b32 v0, v[0:1], off 788; GFX11-NEXT: s_waitcnt vmcnt(0) 789; GFX11-NEXT: v_bfe_u32 v1, v0, 8, 8 790; GFX11-NEXT: v_bfe_u32 v2, v0, 16, 8 791; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 792; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 793; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v1 794; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 795; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 796; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1 797; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 798; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 799; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 800; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0 801; GFX11-NEXT: s_setpc_b64 s[30:31] 802 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 803 %element = extractelement <4 x i8> %vector, i32 1 804 ret i8 %element 805} 806 807define i8 @extractelement_vgpr_v4i8_idx2(<4 x i8> addrspace(1)* %ptr) { 808; GFX9-LABEL: extractelement_vgpr_v4i8_idx2: 809; GFX9: ; %bb.0: 810; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 811; GFX9-NEXT: global_load_dword v0, v[0:1], off 812; GFX9-NEXT: v_mov_b32_e32 v2, 8 813; GFX9-NEXT: s_mov_b32 s4, 16 814; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 815; GFX9-NEXT: s_waitcnt vmcnt(0) 816; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 817; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 818; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 819; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 820; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 821; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 822; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 823; GFX9-NEXT: s_setpc_b64 s[30:31] 824; 825; GFX8-LABEL: extractelement_vgpr_v4i8_idx2: 826; GFX8: ; %bb.0: 827; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 828; GFX8-NEXT: flat_load_dword v0, v[0:1] 829; GFX8-NEXT: v_mov_b32_e32 v1, 8 830; GFX8-NEXT: v_mov_b32_e32 v2, 16 831; GFX8-NEXT: s_waitcnt vmcnt(0) 832; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 833; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 834; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 835; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 836; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 837; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 838; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 839; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 840; GFX8-NEXT: s_setpc_b64 s[30:31] 841; 842; GFX7-LABEL: extractelement_vgpr_v4i8_idx2: 843; GFX7: ; %bb.0: 844; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 845; GFX7-NEXT: s_mov_b32 s6, 0 846; GFX7-NEXT: s_mov_b32 s7, 0xf000 847; GFX7-NEXT: s_mov_b64 s[4:5], 0 848; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 849; GFX7-NEXT: s_waitcnt vmcnt(0) 850; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 851; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 852; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 853; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 854; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 855; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 856; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 857; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 858; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 859; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 860; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 861; GFX7-NEXT: s_setpc_b64 s[30:31] 862; 863; GFX10-LABEL: extractelement_vgpr_v4i8_idx2: 864; GFX10: ; %bb.0: 865; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 866; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 867; GFX10-NEXT: global_load_dword v0, v[0:1], off 868; GFX10-NEXT: v_mov_b32_e32 v1, 8 869; GFX10-NEXT: s_mov_b32 s4, 16 870; GFX10-NEXT: s_waitcnt vmcnt(0) 871; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 872; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 873; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 874; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 875; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 876; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 877; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 878; GFX10-NEXT: s_setpc_b64 s[30:31] 879; 880; GFX11-LABEL: extractelement_vgpr_v4i8_idx2: 881; GFX11: ; %bb.0: 882; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 883; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 884; GFX11-NEXT: global_load_b32 v0, v[0:1], off 885; GFX11-NEXT: s_waitcnt vmcnt(0) 886; GFX11-NEXT: v_bfe_u32 v1, v0, 8, 8 887; GFX11-NEXT: v_bfe_u32 v2, v0, 16, 8 888; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 889; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 890; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v1 891; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 892; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 893; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1 894; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 895; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 896; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 897; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 898; GFX11-NEXT: s_setpc_b64 s[30:31] 899 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 900 %element = extractelement <4 x i8> %vector, i32 2 901 ret i8 %element 902} 903 904define i8 @extractelement_vgpr_v4i8_idx3(<4 x i8> addrspace(1)* %ptr) { 905; GFX9-LABEL: extractelement_vgpr_v4i8_idx3: 906; GFX9: ; %bb.0: 907; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 908; GFX9-NEXT: global_load_dword v0, v[0:1], off 909; GFX9-NEXT: v_mov_b32_e32 v2, 8 910; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 911; GFX9-NEXT: v_mov_b32_e32 v3, 16 912; GFX9-NEXT: s_waitcnt vmcnt(0) 913; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 914; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 915; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 916; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 917; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 918; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 919; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 920; GFX9-NEXT: s_setpc_b64 s[30:31] 921; 922; GFX8-LABEL: extractelement_vgpr_v4i8_idx3: 923; GFX8: ; %bb.0: 924; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 925; GFX8-NEXT: flat_load_dword v0, v[0:1] 926; GFX8-NEXT: v_mov_b32_e32 v1, 8 927; GFX8-NEXT: v_mov_b32_e32 v2, 16 928; GFX8-NEXT: s_waitcnt vmcnt(0) 929; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 930; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 931; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 932; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 933; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 934; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 935; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 936; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 937; GFX8-NEXT: s_setpc_b64 s[30:31] 938; 939; GFX7-LABEL: extractelement_vgpr_v4i8_idx3: 940; GFX7: ; %bb.0: 941; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 942; GFX7-NEXT: s_mov_b32 s6, 0 943; GFX7-NEXT: s_mov_b32 s7, 0xf000 944; GFX7-NEXT: s_mov_b64 s[4:5], 0 945; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 946; GFX7-NEXT: s_waitcnt vmcnt(0) 947; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 948; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 949; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 950; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 951; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 952; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 953; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 954; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 955; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 956; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 957; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 958; GFX7-NEXT: s_setpc_b64 s[30:31] 959; 960; GFX10-LABEL: extractelement_vgpr_v4i8_idx3: 961; GFX10: ; %bb.0: 962; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 963; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 964; GFX10-NEXT: global_load_dword v0, v[0:1], off 965; GFX10-NEXT: v_mov_b32_e32 v1, 8 966; GFX10-NEXT: v_mov_b32_e32 v2, 16 967; GFX10-NEXT: s_waitcnt vmcnt(0) 968; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 969; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 970; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 971; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 972; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 973; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 974; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 975; GFX10-NEXT: s_setpc_b64 s[30:31] 976; 977; GFX11-LABEL: extractelement_vgpr_v4i8_idx3: 978; GFX11: ; %bb.0: 979; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 980; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 981; GFX11-NEXT: global_load_b32 v0, v[0:1], off 982; GFX11-NEXT: s_waitcnt vmcnt(0) 983; GFX11-NEXT: v_bfe_u32 v1, v0, 8, 8 984; GFX11-NEXT: v_bfe_u32 v2, v0, 16, 8 985; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 986; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 987; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v1 988; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 989; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 990; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1 991; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 992; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 993; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 994; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0 995; GFX11-NEXT: s_setpc_b64 s[30:31] 996 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 997 %element = extractelement <4 x i8> %vector, i32 3 998 ret i8 %element 999} 1000 1001define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) { 1002; GCN-LABEL: extractelement_sgpr_v8i8_sgpr_idx: 1003; GCN: ; %bb.0: 1004; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1005; GCN-NEXT: s_waitcnt lgkmcnt(0) 1006; GCN-NEXT: s_bfe_u32 s6, s0, 0x80008 1007; GCN-NEXT: s_lshr_b32 s2, s0, 24 1008; GCN-NEXT: s_and_b32 s5, s0, 0xff 1009; GCN-NEXT: s_lshl_b32 s6, s6, 8 1010; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 1011; GCN-NEXT: s_or_b32 s5, s5, s6 1012; GCN-NEXT: s_lshl_b32 s0, s0, 16 1013; GCN-NEXT: s_or_b32 s0, s5, s0 1014; GCN-NEXT: s_lshl_b32 s2, s2, 24 1015; GCN-NEXT: s_bfe_u32 s5, s1, 0x80008 1016; GCN-NEXT: s_lshr_b32 s3, s1, 24 1017; GCN-NEXT: s_or_b32 s0, s0, s2 1018; GCN-NEXT: s_and_b32 s2, s1, 0xff 1019; GCN-NEXT: s_lshl_b32 s5, s5, 8 1020; GCN-NEXT: s_bfe_u32 s1, s1, 0x80010 1021; GCN-NEXT: s_or_b32 s2, s2, s5 1022; GCN-NEXT: s_lshl_b32 s1, s1, 16 1023; GCN-NEXT: s_or_b32 s1, s2, s1 1024; GCN-NEXT: s_lshl_b32 s2, s3, 24 1025; GCN-NEXT: s_or_b32 s1, s1, s2 1026; GCN-NEXT: s_lshr_b32 s2, s4, 2 1027; GCN-NEXT: s_cmp_eq_u32 s2, 1 1028; GCN-NEXT: s_cselect_b32 s0, s1, s0 1029; GCN-NEXT: s_and_b32 s1, s4, 3 1030; GCN-NEXT: s_lshl_b32 s1, s1, 3 1031; GCN-NEXT: s_lshr_b32 s0, s0, s1 1032; GCN-NEXT: ; return to shader part epilog 1033; 1034; GFX10-LABEL: extractelement_sgpr_v8i8_sgpr_idx: 1035; GFX10: ; %bb.0: 1036; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1037; GFX10-NEXT: s_lshr_b32 s2, s4, 2 1038; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1039; GFX10-NEXT: s_bfe_u32 s7, s0, 0x80008 1040; GFX10-NEXT: s_bfe_u32 s9, s1, 0x80008 1041; GFX10-NEXT: s_lshr_b32 s3, s0, 24 1042; GFX10-NEXT: s_lshr_b32 s5, s1, 24 1043; GFX10-NEXT: s_and_b32 s6, s0, 0xff 1044; GFX10-NEXT: s_bfe_u32 s0, s0, 0x80010 1045; GFX10-NEXT: s_and_b32 s8, s1, 0xff 1046; GFX10-NEXT: s_bfe_u32 s1, s1, 0x80010 1047; GFX10-NEXT: s_lshl_b32 s7, s7, 8 1048; GFX10-NEXT: s_lshl_b32 s9, s9, 8 1049; GFX10-NEXT: s_lshl_b32 s0, s0, 16 1050; GFX10-NEXT: s_lshl_b32 s1, s1, 16 1051; GFX10-NEXT: s_or_b32 s6, s6, s7 1052; GFX10-NEXT: s_or_b32 s7, s8, s9 1053; GFX10-NEXT: s_lshl_b32 s3, s3, 24 1054; GFX10-NEXT: s_lshl_b32 s5, s5, 24 1055; GFX10-NEXT: s_or_b32 s0, s6, s0 1056; GFX10-NEXT: s_or_b32 s1, s7, s1 1057; GFX10-NEXT: s_or_b32 s0, s0, s3 1058; GFX10-NEXT: s_or_b32 s1, s1, s5 1059; GFX10-NEXT: s_cmp_eq_u32 s2, 1 1060; GFX10-NEXT: s_cselect_b32 s0, s1, s0 1061; GFX10-NEXT: s_and_b32 s1, s4, 3 1062; GFX10-NEXT: s_lshl_b32 s1, s1, 3 1063; GFX10-NEXT: s_lshr_b32 s0, s0, s1 1064; GFX10-NEXT: ; return to shader part epilog 1065; 1066; GFX11-LABEL: extractelement_sgpr_v8i8_sgpr_idx: 1067; GFX11: ; %bb.0: 1068; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 1069; GFX11-NEXT: s_lshr_b32 s2, s4, 2 1070; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1071; GFX11-NEXT: s_bfe_u32 s7, s0, 0x80008 1072; GFX11-NEXT: s_bfe_u32 s9, s1, 0x80008 1073; GFX11-NEXT: s_lshr_b32 s3, s0, 24 1074; GFX11-NEXT: s_lshr_b32 s5, s1, 24 1075; GFX11-NEXT: s_and_b32 s6, s0, 0xff 1076; GFX11-NEXT: s_bfe_u32 s0, s0, 0x80010 1077; GFX11-NEXT: s_and_b32 s8, s1, 0xff 1078; GFX11-NEXT: s_bfe_u32 s1, s1, 0x80010 1079; GFX11-NEXT: s_lshl_b32 s7, s7, 8 1080; GFX11-NEXT: s_lshl_b32 s9, s9, 8 1081; GFX11-NEXT: s_lshl_b32 s0, s0, 16 1082; GFX11-NEXT: s_lshl_b32 s1, s1, 16 1083; GFX11-NEXT: s_or_b32 s6, s6, s7 1084; GFX11-NEXT: s_or_b32 s7, s8, s9 1085; GFX11-NEXT: s_lshl_b32 s3, s3, 24 1086; GFX11-NEXT: s_lshl_b32 s5, s5, 24 1087; GFX11-NEXT: s_or_b32 s0, s6, s0 1088; GFX11-NEXT: s_or_b32 s1, s7, s1 1089; GFX11-NEXT: s_or_b32 s0, s0, s3 1090; GFX11-NEXT: s_or_b32 s1, s1, s5 1091; GFX11-NEXT: s_cmp_eq_u32 s2, 1 1092; GFX11-NEXT: s_cselect_b32 s0, s1, s0 1093; GFX11-NEXT: s_and_b32 s1, s4, 3 1094; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 1095; GFX11-NEXT: s_lshl_b32 s1, s1, 3 1096; GFX11-NEXT: s_lshr_b32 s0, s0, s1 1097; GFX11-NEXT: ; return to shader part epilog 1098 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1099 %element = extractelement <8 x i8> %vector, i32 %idx 1100 ret i8 %element 1101} 1102 1103define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 inreg %idx) { 1104; GFX9-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 1105; GFX9: ; %bb.0: 1106; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1107; GFX9-NEXT: s_mov_b32 s0, 8 1108; GFX9-NEXT: s_mov_b32 s1, 16 1109; GFX9-NEXT: s_movk_i32 s3, 0xff 1110; GFX9-NEXT: s_lshr_b32 s4, s2, 2 1111; GFX9-NEXT: s_and_b32 s2, s2, 3 1112; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s4, 1 1113; GFX9-NEXT: s_waitcnt vmcnt(0) 1114; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 1115; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 1116; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1117; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1118; GFX9-NEXT: v_lshlrev_b32_sdwa v5, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1119; GFX9-NEXT: v_lshlrev_b32_sdwa v7, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1120; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v4 1121; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 1122; GFX9-NEXT: v_and_or_b32 v1, v1, s3, v6 1123; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 1124; GFX9-NEXT: v_or3_b32 v0, v0, v5, v2 1125; GFX9-NEXT: v_or3_b32 v1, v1, v7, v3 1126; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1127; GFX9-NEXT: s_lshl_b32 s0, s2, 3 1128; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 1129; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1130; GFX9-NEXT: ; return to shader part epilog 1131; 1132; GFX8-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 1133; GFX8: ; %bb.0: 1134; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1135; GFX8-NEXT: v_mov_b32_e32 v2, 8 1136; GFX8-NEXT: v_mov_b32_e32 v3, 16 1137; GFX8-NEXT: s_lshr_b32 s0, s2, 2 1138; GFX8-NEXT: s_and_b32 s1, s2, 3 1139; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 1140; GFX8-NEXT: s_lshl_b32 s0, s1, 3 1141; GFX8-NEXT: s_waitcnt vmcnt(0) 1142; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1143; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1144; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 1145; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v1 1146; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1147; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1148; GFX8-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1149; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1150; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v4 1151; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v5 1152; GFX8-NEXT: v_or_b32_e32 v0, v0, v7 1153; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 1154; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 1155; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 1156; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1157; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 1158; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1159; GFX8-NEXT: ; return to shader part epilog 1160; 1161; GFX7-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 1162; GFX7: ; %bb.0: 1163; GFX7-NEXT: s_mov_b32 s6, 0 1164; GFX7-NEXT: s_mov_b32 s7, 0xf000 1165; GFX7-NEXT: s_mov_b64 s[4:5], 0 1166; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1167; GFX7-NEXT: s_lshr_b32 s0, s2, 2 1168; GFX7-NEXT: s_and_b32 s1, s2, 3 1169; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 1170; GFX7-NEXT: s_lshl_b32 s0, s1, 3 1171; GFX7-NEXT: s_waitcnt vmcnt(0) 1172; GFX7-NEXT: v_bfe_u32 v5, v0, 8, 8 1173; GFX7-NEXT: v_bfe_u32 v7, v1, 8, 8 1174; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 1175; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 1176; GFX7-NEXT: v_and_b32_e32 v4, 0xff, v0 1177; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 1178; GFX7-NEXT: v_and_b32_e32 v6, 0xff, v1 1179; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 1180; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 1181; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 1182; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1183; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1184; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 1185; GFX7-NEXT: v_or_b32_e32 v5, v6, v7 1186; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 1187; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3 1188; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 1189; GFX7-NEXT: v_or_b32_e32 v1, v5, v1 1190; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 1191; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 1192; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1193; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 1194; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1195; GFX7-NEXT: ; return to shader part epilog 1196; 1197; GFX10-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 1198; GFX10: ; %bb.0: 1199; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1200; GFX10-NEXT: s_mov_b32 s0, 8 1201; GFX10-NEXT: s_mov_b32 s1, 16 1202; GFX10-NEXT: s_waitcnt vmcnt(0) 1203; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 1204; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1205; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v1 1206; GFX10-NEXT: v_lshlrev_b32_sdwa v5, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1207; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1208; GFX10-NEXT: v_lshlrev_b32_sdwa v7, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1209; GFX10-NEXT: v_and_or_b32 v0, v0, 0xff, v3 1210; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 1211; GFX10-NEXT: v_and_or_b32 v1, v1, 0xff, v5 1212; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v4 1213; GFX10-NEXT: s_lshr_b32 s0, s2, 2 1214; GFX10-NEXT: v_or3_b32 v0, v0, v6, v2 1215; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1 1216; GFX10-NEXT: v_or3_b32 v1, v1, v7, v3 1217; GFX10-NEXT: s_and_b32 s0, s2, 3 1218; GFX10-NEXT: s_lshl_b32 s0, s0, 3 1219; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1220; GFX10-NEXT: v_lshrrev_b32_e32 v0, s0, v0 1221; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1222; GFX10-NEXT: ; return to shader part epilog 1223; 1224; GFX11-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 1225; GFX11: ; %bb.0: 1226; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off 1227; GFX11-NEXT: s_lshr_b32 s0, s2, 2 1228; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 1229; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1 1230; GFX11-NEXT: s_and_b32 s0, s2, 3 1231; GFX11-NEXT: s_lshl_b32 s0, s0, 3 1232; GFX11-NEXT: s_waitcnt vmcnt(0) 1233; GFX11-NEXT: v_bfe_u32 v5, v1, 8, 8 1234; GFX11-NEXT: v_lshrrev_b32_e32 v6, 24, v1 1235; GFX11-NEXT: v_bfe_u32 v7, v1, 16, 8 1236; GFX11-NEXT: v_bfe_u32 v3, v0, 8, 8 1237; GFX11-NEXT: v_lshrrev_b32_e32 v2, 24, v0 1238; GFX11-NEXT: v_lshlrev_b32_e32 v5, 8, v5 1239; GFX11-NEXT: v_lshlrev_b32_e32 v6, 24, v6 1240; GFX11-NEXT: v_lshlrev_b32_e32 v7, 16, v7 1241; GFX11-NEXT: v_bfe_u32 v4, v0, 16, 8 1242; GFX11-NEXT: v_lshlrev_b32_e32 v2, 24, v2 1243; GFX11-NEXT: v_and_or_b32 v1, v1, 0xff, v5 1244; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 1245; GFX11-NEXT: v_lshlrev_b32_e32 v4, 16, v4 1246; GFX11-NEXT: v_or3_b32 v1, v1, v7, v6 1247; GFX11-NEXT: v_lshlrev_b32_e32 v3, 8, v3 1248; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1249; GFX11-NEXT: v_and_or_b32 v0, v0, 0xff, v3 1250; GFX11-NEXT: v_or3_b32 v0, v0, v4, v2 1251; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1252; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1253; GFX11-NEXT: v_lshrrev_b32_e32 v0, s0, v0 1254; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1255; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1256; GFX11-NEXT: ; return to shader part epilog 1257 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1258 %element = extractelement <8 x i8> %vector, i32 %idx 1259 ret i8 %element 1260} 1261 1262define i8 @extractelement_vgpr_v8i8_vgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 %idx) { 1263; GFX9-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 1264; GFX9: ; %bb.0: 1265; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1266; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1267; GFX9-NEXT: s_mov_b32 s4, 8 1268; GFX9-NEXT: s_mov_b32 s5, 16 1269; GFX9-NEXT: s_movk_i32 s6, 0xff 1270; GFX9-NEXT: v_lshrrev_b32_e32 v3, 2, v2 1271; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 1272; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 1273; GFX9-NEXT: s_waitcnt vmcnt(0) 1274; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 1275; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1 1276; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1277; GFX9-NEXT: v_lshlrev_b32_sdwa v8, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1278; GFX9-NEXT: v_lshlrev_b32_sdwa v7, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1279; GFX9-NEXT: v_lshlrev_b32_sdwa v9, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1280; GFX9-NEXT: v_and_or_b32 v0, v0, s6, v6 1281; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v4 1282; GFX9-NEXT: v_and_or_b32 v1, v1, s6, v8 1283; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v5 1284; GFX9-NEXT: v_or3_b32 v0, v0, v7, v4 1285; GFX9-NEXT: v_or3_b32 v1, v1, v9, v5 1286; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1287; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v2 1288; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 1289; GFX9-NEXT: s_setpc_b64 s[30:31] 1290; 1291; GFX8-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 1292; GFX8: ; %bb.0: 1293; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1294; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1295; GFX8-NEXT: v_mov_b32_e32 v3, 8 1296; GFX8-NEXT: v_mov_b32_e32 v4, 16 1297; GFX8-NEXT: v_lshrrev_b32_e32 v5, 2, v2 1298; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 1299; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v5 1300; GFX8-NEXT: s_waitcnt vmcnt(0) 1301; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1302; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1303; GFX8-NEXT: v_lshrrev_b32_e32 v6, 24, v0 1304; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v1 1305; GFX8-NEXT: v_lshlrev_b32_sdwa v9, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1306; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1307; GFX8-NEXT: v_or_b32_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1308; GFX8-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1309; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v6 1310; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v7 1311; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 1312; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 1313; GFX8-NEXT: v_or_b32_e32 v0, v0, v6 1314; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 1315; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1316; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v2 1317; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 1318; GFX8-NEXT: s_setpc_b64 s[30:31] 1319; 1320; GFX7-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 1321; GFX7: ; %bb.0: 1322; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1323; GFX7-NEXT: s_mov_b32 s6, 0 1324; GFX7-NEXT: s_mov_b32 s7, 0xf000 1325; GFX7-NEXT: s_mov_b64 s[4:5], 0 1326; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1327; GFX7-NEXT: v_lshrrev_b32_e32 v3, 2, v2 1328; GFX7-NEXT: v_and_b32_e32 v2, 3, v2 1329; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 1330; GFX7-NEXT: s_waitcnt vmcnt(0) 1331; GFX7-NEXT: v_bfe_u32 v7, v0, 8, 8 1332; GFX7-NEXT: v_bfe_u32 v9, v1, 8, 8 1333; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 1334; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v1 1335; GFX7-NEXT: v_and_b32_e32 v6, 0xff, v0 1336; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 1337; GFX7-NEXT: v_and_b32_e32 v8, 0xff, v1 1338; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 1339; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 1340; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9 1341; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1342; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1343; GFX7-NEXT: v_or_b32_e32 v6, v6, v7 1344; GFX7-NEXT: v_or_b32_e32 v7, v8, v9 1345; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 1346; GFX7-NEXT: v_lshlrev_b32_e32 v5, 24, v5 1347; GFX7-NEXT: v_or_b32_e32 v0, v6, v0 1348; GFX7-NEXT: v_or_b32_e32 v1, v7, v1 1349; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 1350; GFX7-NEXT: v_or_b32_e32 v1, v1, v5 1351; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1352; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v2 1353; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 1354; GFX7-NEXT: s_setpc_b64 s[30:31] 1355; 1356; GFX10-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 1357; GFX10: ; %bb.0: 1358; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1359; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1360; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1361; GFX10-NEXT: s_mov_b32 s4, 8 1362; GFX10-NEXT: s_mov_b32 s5, 16 1363; GFX10-NEXT: s_waitcnt vmcnt(0) 1364; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1365; GFX10-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1366; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 1367; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1368; GFX10-NEXT: v_lshlrev_b32_sdwa v7, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1369; GFX10-NEXT: v_lshlrev_b32_sdwa v8, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1370; GFX10-NEXT: v_and_or_b32 v0, v0, 0xff, v4 1371; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 1372; GFX10-NEXT: v_and_or_b32 v1, v1, 0xff, v6 1373; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v5 1374; GFX10-NEXT: v_lshrrev_b32_e32 v5, 2, v2 1375; GFX10-NEXT: v_and_b32_e32 v2, 3, v2 1376; GFX10-NEXT: v_or3_b32 v0, v0, v7, v3 1377; GFX10-NEXT: v_or3_b32 v1, v1, v8, v4 1378; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5 1379; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1380; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v2 1381; GFX10-NEXT: v_lshrrev_b32_e32 v0, v1, v0 1382; GFX10-NEXT: s_setpc_b64 s[30:31] 1383; 1384; GFX11-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 1385; GFX11: ; %bb.0: 1386; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1387; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1388; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off 1389; GFX11-NEXT: s_waitcnt vmcnt(0) 1390; GFX11-NEXT: v_bfe_u32 v4, v0, 8, 8 1391; GFX11-NEXT: v_bfe_u32 v6, v1, 8, 8 1392; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1 1393; GFX11-NEXT: v_bfe_u32 v8, v1, 16, 8 1394; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1395; GFX11-NEXT: v_lshlrev_b32_e32 v4, 8, v4 1396; GFX11-NEXT: v_lshlrev_b32_e32 v6, 8, v6 1397; GFX11-NEXT: v_bfe_u32 v5, v0, 16, 8 1398; GFX11-NEXT: v_lshlrev_b32_e32 v8, 16, v8 1399; GFX11-NEXT: v_lshlrev_b32_e32 v7, 24, v7 1400; GFX11-NEXT: v_and_or_b32 v0, v0, 0xff, v4 1401; GFX11-NEXT: v_and_or_b32 v1, v1, 0xff, v6 1402; GFX11-NEXT: v_lshrrev_b32_e32 v4, 2, v2 1403; GFX11-NEXT: v_lshlrev_b32_e32 v5, 16, v5 1404; GFX11-NEXT: v_and_b32_e32 v2, 3, v2 1405; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1406; GFX11-NEXT: v_or3_b32 v1, v1, v8, v7 1407; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4 1408; GFX11-NEXT: v_lshlrev_b32_e32 v3, 24, v3 1409; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1410; GFX11-NEXT: v_or3_b32 v0, v0, v5, v3 1411; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v1 :: v_dual_lshlrev_b32 v1, 3, v2 1412; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1413; GFX11-NEXT: v_lshrrev_b32_e32 v0, v1, v0 1414; GFX11-NEXT: s_setpc_b64 s[30:31] 1415 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1416 %element = extractelement <8 x i8> %vector, i32 %idx 1417 ret i8 %element 1418} 1419 1420define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 %idx) { 1421; GCN-LABEL: extractelement_sgpr_v8i8_vgpr_idx: 1422; GCN: ; %bb.0: 1423; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1424; GCN-NEXT: v_lshrrev_b32_e32 v1, 2, v0 1425; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 1426; GCN-NEXT: v_and_b32_e32 v0, 3, v0 1427; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 1428; GCN-NEXT: s_waitcnt lgkmcnt(0) 1429; GCN-NEXT: s_bfe_u32 s5, s0, 0x80008 1430; GCN-NEXT: s_lshr_b32 s2, s0, 24 1431; GCN-NEXT: s_and_b32 s4, s0, 0xff 1432; GCN-NEXT: s_lshl_b32 s5, s5, 8 1433; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 1434; GCN-NEXT: s_or_b32 s4, s4, s5 1435; GCN-NEXT: s_lshl_b32 s0, s0, 16 1436; GCN-NEXT: s_or_b32 s0, s4, s0 1437; GCN-NEXT: s_lshl_b32 s2, s2, 24 1438; GCN-NEXT: s_bfe_u32 s4, s1, 0x80008 1439; GCN-NEXT: s_lshr_b32 s3, s1, 24 1440; GCN-NEXT: s_or_b32 s0, s0, s2 1441; GCN-NEXT: s_and_b32 s2, s1, 0xff 1442; GCN-NEXT: s_lshl_b32 s4, s4, 8 1443; GCN-NEXT: s_bfe_u32 s1, s1, 0x80010 1444; GCN-NEXT: s_or_b32 s2, s2, s4 1445; GCN-NEXT: s_lshl_b32 s1, s1, 16 1446; GCN-NEXT: s_or_b32 s1, s2, s1 1447; GCN-NEXT: s_lshl_b32 s2, s3, 24 1448; GCN-NEXT: s_or_b32 s1, s1, s2 1449; GCN-NEXT: v_mov_b32_e32 v2, s0 1450; GCN-NEXT: v_mov_b32_e32 v3, s1 1451; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 1452; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1 1453; GCN-NEXT: v_readfirstlane_b32 s0, v0 1454; GCN-NEXT: ; return to shader part epilog 1455; 1456; GFX10-LABEL: extractelement_sgpr_v8i8_vgpr_idx: 1457; GFX10: ; %bb.0: 1458; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1459; GFX10-NEXT: v_lshrrev_b32_e32 v1, 2, v0 1460; GFX10-NEXT: v_and_b32_e32 v0, 3, v0 1461; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 1462; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 1463; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1464; GFX10-NEXT: s_bfe_u32 s7, s1, 0x80008 1465; GFX10-NEXT: s_lshr_b32 s3, s1, 24 1466; GFX10-NEXT: s_and_b32 s6, s1, 0xff 1467; GFX10-NEXT: s_bfe_u32 s1, s1, 0x80010 1468; GFX10-NEXT: s_lshl_b32 s7, s7, 8 1469; GFX10-NEXT: s_lshl_b32 s1, s1, 16 1470; GFX10-NEXT: s_or_b32 s6, s6, s7 1471; GFX10-NEXT: s_bfe_u32 s5, s0, 0x80008 1472; GFX10-NEXT: s_lshl_b32 s3, s3, 24 1473; GFX10-NEXT: s_or_b32 s1, s6, s1 1474; GFX10-NEXT: s_lshr_b32 s2, s0, 24 1475; GFX10-NEXT: s_and_b32 s4, s0, 0xff 1476; GFX10-NEXT: s_bfe_u32 s0, s0, 0x80010 1477; GFX10-NEXT: s_lshl_b32 s5, s5, 8 1478; GFX10-NEXT: s_or_b32 s1, s1, s3 1479; GFX10-NEXT: s_lshl_b32 s0, s0, 16 1480; GFX10-NEXT: s_or_b32 s3, s4, s5 1481; GFX10-NEXT: v_mov_b32_e32 v2, s1 1482; GFX10-NEXT: s_lshl_b32 s2, s2, 24 1483; GFX10-NEXT: s_or_b32 s0, s3, s0 1484; GFX10-NEXT: s_or_b32 s0, s0, s2 1485; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v2, vcc_lo 1486; GFX10-NEXT: v_lshrrev_b32_e32 v0, v0, v1 1487; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1488; GFX10-NEXT: ; return to shader part epilog 1489; 1490; GFX11-LABEL: extractelement_sgpr_v8i8_vgpr_idx: 1491; GFX11: ; %bb.0: 1492; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 1493; GFX11-NEXT: v_lshrrev_b32_e32 v1, 2, v0 1494; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1495; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 1496; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1497; GFX11-NEXT: s_bfe_u32 s7, s1, 0x80008 1498; GFX11-NEXT: s_lshr_b32 s3, s1, 24 1499; GFX11-NEXT: s_and_b32 s6, s1, 0xff 1500; GFX11-NEXT: s_bfe_u32 s1, s1, 0x80010 1501; GFX11-NEXT: s_lshl_b32 s7, s7, 8 1502; GFX11-NEXT: s_bfe_u32 s5, s0, 0x80008 1503; GFX11-NEXT: s_lshl_b32 s1, s1, 16 1504; GFX11-NEXT: s_or_b32 s6, s6, s7 1505; GFX11-NEXT: s_lshr_b32 s2, s0, 24 1506; GFX11-NEXT: s_and_b32 s4, s0, 0xff 1507; GFX11-NEXT: s_bfe_u32 s0, s0, 0x80010 1508; GFX11-NEXT: s_lshl_b32 s5, s5, 8 1509; GFX11-NEXT: s_lshl_b32 s3, s3, 24 1510; GFX11-NEXT: s_or_b32 s1, s6, s1 1511; GFX11-NEXT: s_lshl_b32 s0, s0, 16 1512; GFX11-NEXT: s_or_b32 s1, s1, s3 1513; GFX11-NEXT: s_or_b32 s3, s4, s5 1514; GFX11-NEXT: s_lshl_b32 s2, s2, 24 1515; GFX11-NEXT: s_or_b32 s0, s3, s0 1516; GFX11-NEXT: v_mov_b32_e32 v2, s1 1517; GFX11-NEXT: s_or_b32 s0, s0, s2 1518; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1519; GFX11-NEXT: v_dual_cndmask_b32 v1, s0, v2 :: v_dual_and_b32 v0, 3, v0 1520; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1521; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0 1522; GFX11-NEXT: v_lshrrev_b32_e32 v0, v0, v1 1523; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1524; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1525; GFX11-NEXT: ; return to shader part epilog 1526 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1527 %element = extractelement <8 x i8> %vector, i32 %idx 1528 ret i8 %element 1529} 1530 1531define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(<8 x i8> addrspace(4)* inreg %ptr) { 1532; GCN-LABEL: extractelement_sgpr_v8i8_idx0: 1533; GCN: ; %bb.0: 1534; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1535; GCN-NEXT: s_waitcnt lgkmcnt(0) 1536; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 1537; GCN-NEXT: s_lshr_b32 s1, s0, 24 1538; GCN-NEXT: s_and_b32 s2, s0, 0xff 1539; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 1540; GCN-NEXT: s_lshl_b32 s3, s3, 8 1541; GCN-NEXT: s_or_b32 s2, s2, s3 1542; GCN-NEXT: s_lshl_b32 s0, s0, 16 1543; GCN-NEXT: s_or_b32 s0, s2, s0 1544; GCN-NEXT: s_lshl_b32 s1, s1, 24 1545; GCN-NEXT: s_or_b32 s0, s0, s1 1546; GCN-NEXT: ; return to shader part epilog 1547; 1548; GFX10-LABEL: extractelement_sgpr_v8i8_idx0: 1549; GFX10: ; %bb.0: 1550; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1551; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1552; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 1553; GFX10-NEXT: s_and_b32 s1, s0, 0xff 1554; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 1555; GFX10-NEXT: s_lshl_b32 s2, s2, 8 1556; GFX10-NEXT: s_lshl_b32 s3, s3, 16 1557; GFX10-NEXT: s_or_b32 s1, s1, s2 1558; GFX10-NEXT: s_lshr_b32 s0, s0, 24 1559; GFX10-NEXT: s_or_b32 s1, s1, s3 1560; GFX10-NEXT: s_lshl_b32 s0, s0, 24 1561; GFX10-NEXT: s_or_b32 s0, s1, s0 1562; GFX10-NEXT: ; return to shader part epilog 1563; 1564; GFX11-LABEL: extractelement_sgpr_v8i8_idx0: 1565; GFX11: ; %bb.0: 1566; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 1567; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1568; GFX11-NEXT: s_bfe_u32 s2, s0, 0x80008 1569; GFX11-NEXT: s_and_b32 s1, s0, 0xff 1570; GFX11-NEXT: s_bfe_u32 s3, s0, 0x80010 1571; GFX11-NEXT: s_lshl_b32 s2, s2, 8 1572; GFX11-NEXT: s_lshl_b32 s3, s3, 16 1573; GFX11-NEXT: s_or_b32 s1, s1, s2 1574; GFX11-NEXT: s_lshr_b32 s0, s0, 24 1575; GFX11-NEXT: s_or_b32 s1, s1, s3 1576; GFX11-NEXT: s_lshl_b32 s0, s0, 24 1577; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1578; GFX11-NEXT: s_or_b32 s0, s1, s0 1579; GFX11-NEXT: ; return to shader part epilog 1580 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1581 %element = extractelement <8 x i8> %vector, i32 0 1582 ret i8 %element 1583} 1584 1585define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(<8 x i8> addrspace(4)* inreg %ptr) { 1586; GCN-LABEL: extractelement_sgpr_v8i8_idx1: 1587; GCN: ; %bb.0: 1588; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1589; GCN-NEXT: s_waitcnt lgkmcnt(0) 1590; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 1591; GCN-NEXT: s_lshr_b32 s1, s0, 24 1592; GCN-NEXT: s_and_b32 s2, s0, 0xff 1593; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 1594; GCN-NEXT: s_lshl_b32 s3, s3, 8 1595; GCN-NEXT: s_or_b32 s2, s2, s3 1596; GCN-NEXT: s_lshl_b32 s0, s0, 16 1597; GCN-NEXT: s_or_b32 s0, s2, s0 1598; GCN-NEXT: s_lshl_b32 s1, s1, 24 1599; GCN-NEXT: s_or_b32 s0, s0, s1 1600; GCN-NEXT: s_lshr_b32 s0, s0, 8 1601; GCN-NEXT: ; return to shader part epilog 1602; 1603; GFX10-LABEL: extractelement_sgpr_v8i8_idx1: 1604; GFX10: ; %bb.0: 1605; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1606; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1607; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 1608; GFX10-NEXT: s_and_b32 s1, s0, 0xff 1609; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 1610; GFX10-NEXT: s_lshl_b32 s2, s2, 8 1611; GFX10-NEXT: s_lshl_b32 s3, s3, 16 1612; GFX10-NEXT: s_or_b32 s1, s1, s2 1613; GFX10-NEXT: s_lshr_b32 s0, s0, 24 1614; GFX10-NEXT: s_or_b32 s1, s1, s3 1615; GFX10-NEXT: s_lshl_b32 s0, s0, 24 1616; GFX10-NEXT: s_or_b32 s0, s1, s0 1617; GFX10-NEXT: s_lshr_b32 s0, s0, 8 1618; GFX10-NEXT: ; return to shader part epilog 1619; 1620; GFX11-LABEL: extractelement_sgpr_v8i8_idx1: 1621; GFX11: ; %bb.0: 1622; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 1623; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1624; GFX11-NEXT: s_bfe_u32 s2, s0, 0x80008 1625; GFX11-NEXT: s_and_b32 s1, s0, 0xff 1626; GFX11-NEXT: s_bfe_u32 s3, s0, 0x80010 1627; GFX11-NEXT: s_lshl_b32 s2, s2, 8 1628; GFX11-NEXT: s_lshl_b32 s3, s3, 16 1629; GFX11-NEXT: s_or_b32 s1, s1, s2 1630; GFX11-NEXT: s_lshr_b32 s0, s0, 24 1631; GFX11-NEXT: s_or_b32 s1, s1, s3 1632; GFX11-NEXT: s_lshl_b32 s0, s0, 24 1633; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 1634; GFX11-NEXT: s_or_b32 s0, s1, s0 1635; GFX11-NEXT: s_lshr_b32 s0, s0, 8 1636; GFX11-NEXT: ; return to shader part epilog 1637 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1638 %element = extractelement <8 x i8> %vector, i32 1 1639 ret i8 %element 1640} 1641 1642define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(<8 x i8> addrspace(4)* inreg %ptr) { 1643; GCN-LABEL: extractelement_sgpr_v8i8_idx2: 1644; GCN: ; %bb.0: 1645; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1646; GCN-NEXT: s_waitcnt lgkmcnt(0) 1647; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 1648; GCN-NEXT: s_lshr_b32 s1, s0, 24 1649; GCN-NEXT: s_and_b32 s2, s0, 0xff 1650; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 1651; GCN-NEXT: s_lshl_b32 s3, s3, 8 1652; GCN-NEXT: s_or_b32 s2, s2, s3 1653; GCN-NEXT: s_lshl_b32 s0, s0, 16 1654; GCN-NEXT: s_or_b32 s0, s2, s0 1655; GCN-NEXT: s_lshl_b32 s1, s1, 24 1656; GCN-NEXT: s_or_b32 s0, s0, s1 1657; GCN-NEXT: s_lshr_b32 s0, s0, 16 1658; GCN-NEXT: ; return to shader part epilog 1659; 1660; GFX10-LABEL: extractelement_sgpr_v8i8_idx2: 1661; GFX10: ; %bb.0: 1662; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1663; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1664; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 1665; GFX10-NEXT: s_and_b32 s1, s0, 0xff 1666; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 1667; GFX10-NEXT: s_lshl_b32 s2, s2, 8 1668; GFX10-NEXT: s_lshl_b32 s3, s3, 16 1669; GFX10-NEXT: s_or_b32 s1, s1, s2 1670; GFX10-NEXT: s_lshr_b32 s0, s0, 24 1671; GFX10-NEXT: s_or_b32 s1, s1, s3 1672; GFX10-NEXT: s_lshl_b32 s0, s0, 24 1673; GFX10-NEXT: s_or_b32 s0, s1, s0 1674; GFX10-NEXT: s_lshr_b32 s0, s0, 16 1675; GFX10-NEXT: ; return to shader part epilog 1676; 1677; GFX11-LABEL: extractelement_sgpr_v8i8_idx2: 1678; GFX11: ; %bb.0: 1679; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 1680; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1681; GFX11-NEXT: s_bfe_u32 s2, s0, 0x80008 1682; GFX11-NEXT: s_and_b32 s1, s0, 0xff 1683; GFX11-NEXT: s_bfe_u32 s3, s0, 0x80010 1684; GFX11-NEXT: s_lshl_b32 s2, s2, 8 1685; GFX11-NEXT: s_lshl_b32 s3, s3, 16 1686; GFX11-NEXT: s_or_b32 s1, s1, s2 1687; GFX11-NEXT: s_lshr_b32 s0, s0, 24 1688; GFX11-NEXT: s_or_b32 s1, s1, s3 1689; GFX11-NEXT: s_lshl_b32 s0, s0, 24 1690; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 1691; GFX11-NEXT: s_or_b32 s0, s1, s0 1692; GFX11-NEXT: s_lshr_b32 s0, s0, 16 1693; GFX11-NEXT: ; return to shader part epilog 1694 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1695 %element = extractelement <8 x i8> %vector, i32 2 1696 ret i8 %element 1697} 1698 1699define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(<8 x i8> addrspace(4)* inreg %ptr) { 1700; GCN-LABEL: extractelement_sgpr_v8i8_idx3: 1701; GCN: ; %bb.0: 1702; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1703; GCN-NEXT: s_waitcnt lgkmcnt(0) 1704; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 1705; GCN-NEXT: s_lshr_b32 s1, s0, 24 1706; GCN-NEXT: s_and_b32 s2, s0, 0xff 1707; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 1708; GCN-NEXT: s_lshl_b32 s3, s3, 8 1709; GCN-NEXT: s_or_b32 s2, s2, s3 1710; GCN-NEXT: s_lshl_b32 s0, s0, 16 1711; GCN-NEXT: s_or_b32 s0, s2, s0 1712; GCN-NEXT: s_lshl_b32 s1, s1, 24 1713; GCN-NEXT: s_or_b32 s0, s0, s1 1714; GCN-NEXT: s_lshr_b32 s0, s0, 24 1715; GCN-NEXT: ; return to shader part epilog 1716; 1717; GFX10-LABEL: extractelement_sgpr_v8i8_idx3: 1718; GFX10: ; %bb.0: 1719; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1720; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1721; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 1722; GFX10-NEXT: s_and_b32 s1, s0, 0xff 1723; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 1724; GFX10-NEXT: s_lshl_b32 s2, s2, 8 1725; GFX10-NEXT: s_lshl_b32 s3, s3, 16 1726; GFX10-NEXT: s_or_b32 s1, s1, s2 1727; GFX10-NEXT: s_lshr_b32 s0, s0, 24 1728; GFX10-NEXT: s_or_b32 s1, s1, s3 1729; GFX10-NEXT: s_lshl_b32 s0, s0, 24 1730; GFX10-NEXT: s_or_b32 s0, s1, s0 1731; GFX10-NEXT: s_lshr_b32 s0, s0, 24 1732; GFX10-NEXT: ; return to shader part epilog 1733; 1734; GFX11-LABEL: extractelement_sgpr_v8i8_idx3: 1735; GFX11: ; %bb.0: 1736; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 1737; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1738; GFX11-NEXT: s_bfe_u32 s2, s0, 0x80008 1739; GFX11-NEXT: s_and_b32 s1, s0, 0xff 1740; GFX11-NEXT: s_bfe_u32 s3, s0, 0x80010 1741; GFX11-NEXT: s_lshl_b32 s2, s2, 8 1742; GFX11-NEXT: s_lshl_b32 s3, s3, 16 1743; GFX11-NEXT: s_or_b32 s1, s1, s2 1744; GFX11-NEXT: s_lshr_b32 s0, s0, 24 1745; GFX11-NEXT: s_or_b32 s1, s1, s3 1746; GFX11-NEXT: s_lshl_b32 s0, s0, 24 1747; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 1748; GFX11-NEXT: s_or_b32 s0, s1, s0 1749; GFX11-NEXT: s_lshr_b32 s0, s0, 24 1750; GFX11-NEXT: ; return to shader part epilog 1751 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1752 %element = extractelement <8 x i8> %vector, i32 3 1753 ret i8 %element 1754} 1755 1756define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(<8 x i8> addrspace(4)* inreg %ptr) { 1757; GCN-LABEL: extractelement_sgpr_v8i8_idx4: 1758; GCN: ; %bb.0: 1759; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1760; GCN-NEXT: s_waitcnt lgkmcnt(0) 1761; GCN-NEXT: s_bfe_u32 s3, s1, 0x80008 1762; GCN-NEXT: s_lshr_b32 s0, s1, 24 1763; GCN-NEXT: s_and_b32 s2, s1, 0xff 1764; GCN-NEXT: s_bfe_u32 s1, s1, 0x80010 1765; GCN-NEXT: s_lshl_b32 s3, s3, 8 1766; GCN-NEXT: s_or_b32 s2, s2, s3 1767; GCN-NEXT: s_lshl_b32 s1, s1, 16 1768; GCN-NEXT: s_or_b32 s1, s2, s1 1769; GCN-NEXT: s_lshl_b32 s0, s0, 24 1770; GCN-NEXT: s_or_b32 s0, s1, s0 1771; GCN-NEXT: ; return to shader part epilog 1772; 1773; GFX10-LABEL: extractelement_sgpr_v8i8_idx4: 1774; GFX10: ; %bb.0: 1775; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1776; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1777; GFX10-NEXT: s_bfe_u32 s2, s1, 0x80008 1778; GFX10-NEXT: s_and_b32 s0, s1, 0xff 1779; GFX10-NEXT: s_bfe_u32 s3, s1, 0x80010 1780; GFX10-NEXT: s_lshl_b32 s2, s2, 8 1781; GFX10-NEXT: s_lshl_b32 s3, s3, 16 1782; GFX10-NEXT: s_or_b32 s0, s0, s2 1783; GFX10-NEXT: s_lshr_b32 s1, s1, 24 1784; GFX10-NEXT: s_or_b32 s0, s0, s3 1785; GFX10-NEXT: s_lshl_b32 s1, s1, 24 1786; GFX10-NEXT: s_or_b32 s0, s0, s1 1787; GFX10-NEXT: ; return to shader part epilog 1788; 1789; GFX11-LABEL: extractelement_sgpr_v8i8_idx4: 1790; GFX11: ; %bb.0: 1791; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 1792; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1793; GFX11-NEXT: s_bfe_u32 s2, s1, 0x80008 1794; GFX11-NEXT: s_and_b32 s0, s1, 0xff 1795; GFX11-NEXT: s_bfe_u32 s3, s1, 0x80010 1796; GFX11-NEXT: s_lshl_b32 s2, s2, 8 1797; GFX11-NEXT: s_lshl_b32 s3, s3, 16 1798; GFX11-NEXT: s_or_b32 s0, s0, s2 1799; GFX11-NEXT: s_lshr_b32 s1, s1, 24 1800; GFX11-NEXT: s_or_b32 s0, s0, s3 1801; GFX11-NEXT: s_lshl_b32 s1, s1, 24 1802; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1803; GFX11-NEXT: s_or_b32 s0, s0, s1 1804; GFX11-NEXT: ; return to shader part epilog 1805 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1806 %element = extractelement <8 x i8> %vector, i32 4 1807 ret i8 %element 1808} 1809 1810define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(<8 x i8> addrspace(4)* inreg %ptr) { 1811; GCN-LABEL: extractelement_sgpr_v8i8_idx5: 1812; GCN: ; %bb.0: 1813; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1814; GCN-NEXT: s_waitcnt lgkmcnt(0) 1815; GCN-NEXT: s_bfe_u32 s3, s1, 0x80008 1816; GCN-NEXT: s_lshr_b32 s0, s1, 24 1817; GCN-NEXT: s_and_b32 s2, s1, 0xff 1818; GCN-NEXT: s_bfe_u32 s1, s1, 0x80010 1819; GCN-NEXT: s_lshl_b32 s3, s3, 8 1820; GCN-NEXT: s_or_b32 s2, s2, s3 1821; GCN-NEXT: s_lshl_b32 s1, s1, 16 1822; GCN-NEXT: s_or_b32 s1, s2, s1 1823; GCN-NEXT: s_lshl_b32 s0, s0, 24 1824; GCN-NEXT: s_or_b32 s0, s1, s0 1825; GCN-NEXT: s_lshr_b32 s0, s0, 8 1826; GCN-NEXT: ; return to shader part epilog 1827; 1828; GFX10-LABEL: extractelement_sgpr_v8i8_idx5: 1829; GFX10: ; %bb.0: 1830; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1831; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1832; GFX10-NEXT: s_bfe_u32 s2, s1, 0x80008 1833; GFX10-NEXT: s_and_b32 s0, s1, 0xff 1834; GFX10-NEXT: s_bfe_u32 s3, s1, 0x80010 1835; GFX10-NEXT: s_lshl_b32 s2, s2, 8 1836; GFX10-NEXT: s_lshl_b32 s3, s3, 16 1837; GFX10-NEXT: s_or_b32 s0, s0, s2 1838; GFX10-NEXT: s_lshr_b32 s1, s1, 24 1839; GFX10-NEXT: s_or_b32 s0, s0, s3 1840; GFX10-NEXT: s_lshl_b32 s1, s1, 24 1841; GFX10-NEXT: s_or_b32 s0, s0, s1 1842; GFX10-NEXT: s_lshr_b32 s0, s0, 8 1843; GFX10-NEXT: ; return to shader part epilog 1844; 1845; GFX11-LABEL: extractelement_sgpr_v8i8_idx5: 1846; GFX11: ; %bb.0: 1847; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 1848; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1849; GFX11-NEXT: s_bfe_u32 s2, s1, 0x80008 1850; GFX11-NEXT: s_and_b32 s0, s1, 0xff 1851; GFX11-NEXT: s_bfe_u32 s3, s1, 0x80010 1852; GFX11-NEXT: s_lshl_b32 s2, s2, 8 1853; GFX11-NEXT: s_lshl_b32 s3, s3, 16 1854; GFX11-NEXT: s_or_b32 s0, s0, s2 1855; GFX11-NEXT: s_lshr_b32 s1, s1, 24 1856; GFX11-NEXT: s_or_b32 s0, s0, s3 1857; GFX11-NEXT: s_lshl_b32 s1, s1, 24 1858; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 1859; GFX11-NEXT: s_or_b32 s0, s0, s1 1860; GFX11-NEXT: s_lshr_b32 s0, s0, 8 1861; GFX11-NEXT: ; return to shader part epilog 1862 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1863 %element = extractelement <8 x i8> %vector, i32 5 1864 ret i8 %element 1865} 1866 1867define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(<8 x i8> addrspace(4)* inreg %ptr) { 1868; GCN-LABEL: extractelement_sgpr_v8i8_idx6: 1869; GCN: ; %bb.0: 1870; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1871; GCN-NEXT: s_waitcnt lgkmcnt(0) 1872; GCN-NEXT: s_bfe_u32 s3, s1, 0x80008 1873; GCN-NEXT: s_lshr_b32 s0, s1, 24 1874; GCN-NEXT: s_and_b32 s2, s1, 0xff 1875; GCN-NEXT: s_bfe_u32 s1, s1, 0x80010 1876; GCN-NEXT: s_lshl_b32 s3, s3, 8 1877; GCN-NEXT: s_or_b32 s2, s2, s3 1878; GCN-NEXT: s_lshl_b32 s1, s1, 16 1879; GCN-NEXT: s_or_b32 s1, s2, s1 1880; GCN-NEXT: s_lshl_b32 s0, s0, 24 1881; GCN-NEXT: s_or_b32 s0, s1, s0 1882; GCN-NEXT: s_lshr_b32 s0, s0, 16 1883; GCN-NEXT: ; return to shader part epilog 1884; 1885; GFX10-LABEL: extractelement_sgpr_v8i8_idx6: 1886; GFX10: ; %bb.0: 1887; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1888; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1889; GFX10-NEXT: s_bfe_u32 s2, s1, 0x80008 1890; GFX10-NEXT: s_and_b32 s0, s1, 0xff 1891; GFX10-NEXT: s_bfe_u32 s3, s1, 0x80010 1892; GFX10-NEXT: s_lshl_b32 s2, s2, 8 1893; GFX10-NEXT: s_lshl_b32 s3, s3, 16 1894; GFX10-NEXT: s_or_b32 s0, s0, s2 1895; GFX10-NEXT: s_lshr_b32 s1, s1, 24 1896; GFX10-NEXT: s_or_b32 s0, s0, s3 1897; GFX10-NEXT: s_lshl_b32 s1, s1, 24 1898; GFX10-NEXT: s_or_b32 s0, s0, s1 1899; GFX10-NEXT: s_lshr_b32 s0, s0, 16 1900; GFX10-NEXT: ; return to shader part epilog 1901; 1902; GFX11-LABEL: extractelement_sgpr_v8i8_idx6: 1903; GFX11: ; %bb.0: 1904; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 1905; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1906; GFX11-NEXT: s_bfe_u32 s2, s1, 0x80008 1907; GFX11-NEXT: s_and_b32 s0, s1, 0xff 1908; GFX11-NEXT: s_bfe_u32 s3, s1, 0x80010 1909; GFX11-NEXT: s_lshl_b32 s2, s2, 8 1910; GFX11-NEXT: s_lshl_b32 s3, s3, 16 1911; GFX11-NEXT: s_or_b32 s0, s0, s2 1912; GFX11-NEXT: s_lshr_b32 s1, s1, 24 1913; GFX11-NEXT: s_or_b32 s0, s0, s3 1914; GFX11-NEXT: s_lshl_b32 s1, s1, 24 1915; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 1916; GFX11-NEXT: s_or_b32 s0, s0, s1 1917; GFX11-NEXT: s_lshr_b32 s0, s0, 16 1918; GFX11-NEXT: ; return to shader part epilog 1919 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1920 %element = extractelement <8 x i8> %vector, i32 6 1921 ret i8 %element 1922} 1923 1924define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(<8 x i8> addrspace(4)* inreg %ptr) { 1925; GCN-LABEL: extractelement_sgpr_v8i8_idx7: 1926; GCN: ; %bb.0: 1927; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1928; GCN-NEXT: s_waitcnt lgkmcnt(0) 1929; GCN-NEXT: s_bfe_u32 s3, s1, 0x80008 1930; GCN-NEXT: s_lshr_b32 s0, s1, 24 1931; GCN-NEXT: s_and_b32 s2, s1, 0xff 1932; GCN-NEXT: s_bfe_u32 s1, s1, 0x80010 1933; GCN-NEXT: s_lshl_b32 s3, s3, 8 1934; GCN-NEXT: s_or_b32 s2, s2, s3 1935; GCN-NEXT: s_lshl_b32 s1, s1, 16 1936; GCN-NEXT: s_or_b32 s1, s2, s1 1937; GCN-NEXT: s_lshl_b32 s0, s0, 24 1938; GCN-NEXT: s_or_b32 s0, s1, s0 1939; GCN-NEXT: s_lshr_b32 s0, s0, 24 1940; GCN-NEXT: ; return to shader part epilog 1941; 1942; GFX10-LABEL: extractelement_sgpr_v8i8_idx7: 1943; GFX10: ; %bb.0: 1944; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1945; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1946; GFX10-NEXT: s_bfe_u32 s2, s1, 0x80008 1947; GFX10-NEXT: s_and_b32 s0, s1, 0xff 1948; GFX10-NEXT: s_bfe_u32 s3, s1, 0x80010 1949; GFX10-NEXT: s_lshl_b32 s2, s2, 8 1950; GFX10-NEXT: s_lshl_b32 s3, s3, 16 1951; GFX10-NEXT: s_or_b32 s0, s0, s2 1952; GFX10-NEXT: s_lshr_b32 s1, s1, 24 1953; GFX10-NEXT: s_or_b32 s0, s0, s3 1954; GFX10-NEXT: s_lshl_b32 s1, s1, 24 1955; GFX10-NEXT: s_or_b32 s0, s0, s1 1956; GFX10-NEXT: s_lshr_b32 s0, s0, 24 1957; GFX10-NEXT: ; return to shader part epilog 1958; 1959; GFX11-LABEL: extractelement_sgpr_v8i8_idx7: 1960; GFX11: ; %bb.0: 1961; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 1962; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1963; GFX11-NEXT: s_bfe_u32 s2, s1, 0x80008 1964; GFX11-NEXT: s_and_b32 s0, s1, 0xff 1965; GFX11-NEXT: s_bfe_u32 s3, s1, 0x80010 1966; GFX11-NEXT: s_lshl_b32 s2, s2, 8 1967; GFX11-NEXT: s_lshl_b32 s3, s3, 16 1968; GFX11-NEXT: s_or_b32 s0, s0, s2 1969; GFX11-NEXT: s_lshr_b32 s1, s1, 24 1970; GFX11-NEXT: s_or_b32 s0, s0, s3 1971; GFX11-NEXT: s_lshl_b32 s1, s1, 24 1972; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 1973; GFX11-NEXT: s_or_b32 s0, s0, s1 1974; GFX11-NEXT: s_lshr_b32 s0, s0, 24 1975; GFX11-NEXT: ; return to shader part epilog 1976 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1977 %element = extractelement <8 x i8> %vector, i32 7 1978 ret i8 %element 1979} 1980 1981define i8 @extractelement_vgpr_v8i8_idx0(<8 x i8> addrspace(1)* %ptr) { 1982; GFX9-LABEL: extractelement_vgpr_v8i8_idx0: 1983; GFX9: ; %bb.0: 1984; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1985; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1986; GFX9-NEXT: v_mov_b32_e32 v2, 8 1987; GFX9-NEXT: s_waitcnt vmcnt(0) 1988; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 1989; GFX9-NEXT: v_mov_b32_e32 v3, 16 1990; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 1991; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1992; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1993; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 1994; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 1995; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 1996; GFX9-NEXT: s_setpc_b64 s[30:31] 1997; 1998; GFX8-LABEL: extractelement_vgpr_v8i8_idx0: 1999; GFX8: ; %bb.0: 2000; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2001; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 2002; GFX8-NEXT: s_waitcnt vmcnt(0) 2003; GFX8-NEXT: v_mov_b32_e32 v1, 8 2004; GFX8-NEXT: v_mov_b32_e32 v2, 16 2005; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2006; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2007; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2008; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2009; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2010; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2011; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2012; GFX8-NEXT: s_setpc_b64 s[30:31] 2013; 2014; GFX7-LABEL: extractelement_vgpr_v8i8_idx0: 2015; GFX7: ; %bb.0: 2016; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2017; GFX7-NEXT: s_mov_b32 s6, 0 2018; GFX7-NEXT: s_mov_b32 s7, 0xf000 2019; GFX7-NEXT: s_mov_b64 s[4:5], 0 2020; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 2021; GFX7-NEXT: s_waitcnt vmcnt(0) 2022; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 2023; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 2024; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 2025; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 2026; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 2027; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2028; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 2029; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 2030; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 2031; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2032; GFX7-NEXT: s_setpc_b64 s[30:31] 2033; 2034; GFX10-LABEL: extractelement_vgpr_v8i8_idx0: 2035; GFX10: ; %bb.0: 2036; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2037; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2038; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2039; GFX10-NEXT: s_waitcnt vmcnt(0) 2040; GFX10-NEXT: v_mov_b32_e32 v1, 8 2041; GFX10-NEXT: v_mov_b32_e32 v2, 16 2042; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2043; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2044; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2045; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 2046; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2047; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 2048; GFX10-NEXT: s_setpc_b64 s[30:31] 2049; 2050; GFX11-LABEL: extractelement_vgpr_v8i8_idx0: 2051; GFX11: ; %bb.0: 2052; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2053; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2054; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off 2055; GFX11-NEXT: s_waitcnt vmcnt(0) 2056; GFX11-NEXT: v_bfe_u32 v1, v0, 8, 8 2057; GFX11-NEXT: v_bfe_u32 v2, v0, 16, 8 2058; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2059; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 2060; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v1 2061; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2062; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 2063; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1 2064; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2065; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2066; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 2067; GFX11-NEXT: s_setpc_b64 s[30:31] 2068 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 2069 %element = extractelement <8 x i8> %vector, i32 0 2070 ret i8 %element 2071} 2072 2073define i8 @extractelement_vgpr_v8i8_idx1(<8 x i8> addrspace(1)* %ptr) { 2074; GFX9-LABEL: extractelement_vgpr_v8i8_idx1: 2075; GFX9: ; %bb.0: 2076; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2077; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2078; GFX9-NEXT: s_mov_b32 s4, 8 2079; GFX9-NEXT: s_waitcnt vmcnt(0) 2080; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 2081; GFX9-NEXT: v_mov_b32_e32 v2, 16 2082; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2083; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2084; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2085; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v4 2086; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2087; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 2088; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2089; GFX9-NEXT: s_setpc_b64 s[30:31] 2090; 2091; GFX8-LABEL: extractelement_vgpr_v8i8_idx1: 2092; GFX8: ; %bb.0: 2093; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2094; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 2095; GFX8-NEXT: s_waitcnt vmcnt(0) 2096; GFX8-NEXT: v_mov_b32_e32 v1, 8 2097; GFX8-NEXT: v_mov_b32_e32 v2, 16 2098; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2099; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2100; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2101; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2102; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2103; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2104; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2105; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2106; GFX8-NEXT: s_setpc_b64 s[30:31] 2107; 2108; GFX7-LABEL: extractelement_vgpr_v8i8_idx1: 2109; GFX7: ; %bb.0: 2110; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2111; GFX7-NEXT: s_mov_b32 s6, 0 2112; GFX7-NEXT: s_mov_b32 s7, 0xf000 2113; GFX7-NEXT: s_mov_b64 s[4:5], 0 2114; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 2115; GFX7-NEXT: s_waitcnt vmcnt(0) 2116; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 2117; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 2118; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 2119; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 2120; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 2121; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2122; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 2123; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 2124; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 2125; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2126; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2127; GFX7-NEXT: s_setpc_b64 s[30:31] 2128; 2129; GFX10-LABEL: extractelement_vgpr_v8i8_idx1: 2130; GFX10: ; %bb.0: 2131; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2132; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2133; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2134; GFX10-NEXT: s_mov_b32 s4, 8 2135; GFX10-NEXT: s_waitcnt vmcnt(0) 2136; GFX10-NEXT: v_mov_b32_e32 v1, 16 2137; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2138; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2139; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2140; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v2 2141; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 2142; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 2143; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2144; GFX10-NEXT: s_setpc_b64 s[30:31] 2145; 2146; GFX11-LABEL: extractelement_vgpr_v8i8_idx1: 2147; GFX11: ; %bb.0: 2148; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2149; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2150; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off 2151; GFX11-NEXT: s_waitcnt vmcnt(0) 2152; GFX11-NEXT: v_bfe_u32 v1, v0, 8, 8 2153; GFX11-NEXT: v_bfe_u32 v2, v0, 16, 8 2154; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2155; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 2156; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v1 2157; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2158; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 2159; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1 2160; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2161; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2162; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 2163; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2164; GFX11-NEXT: s_setpc_b64 s[30:31] 2165 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 2166 %element = extractelement <8 x i8> %vector, i32 1 2167 ret i8 %element 2168} 2169 2170define i8 @extractelement_vgpr_v8i8_idx2(<8 x i8> addrspace(1)* %ptr) { 2171; GFX9-LABEL: extractelement_vgpr_v8i8_idx2: 2172; GFX9: ; %bb.0: 2173; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2174; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2175; GFX9-NEXT: v_mov_b32_e32 v2, 8 2176; GFX9-NEXT: s_mov_b32 s4, 16 2177; GFX9-NEXT: s_waitcnt vmcnt(0) 2178; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 2179; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2180; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2181; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2182; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 2183; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2184; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 2185; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2186; GFX9-NEXT: s_setpc_b64 s[30:31] 2187; 2188; GFX8-LABEL: extractelement_vgpr_v8i8_idx2: 2189; GFX8: ; %bb.0: 2190; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2191; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 2192; GFX8-NEXT: s_waitcnt vmcnt(0) 2193; GFX8-NEXT: v_mov_b32_e32 v1, 8 2194; GFX8-NEXT: v_mov_b32_e32 v2, 16 2195; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2196; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2197; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2198; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2199; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2200; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2201; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2202; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2203; GFX8-NEXT: s_setpc_b64 s[30:31] 2204; 2205; GFX7-LABEL: extractelement_vgpr_v8i8_idx2: 2206; GFX7: ; %bb.0: 2207; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2208; GFX7-NEXT: s_mov_b32 s6, 0 2209; GFX7-NEXT: s_mov_b32 s7, 0xf000 2210; GFX7-NEXT: s_mov_b64 s[4:5], 0 2211; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 2212; GFX7-NEXT: s_waitcnt vmcnt(0) 2213; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 2214; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 2215; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 2216; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 2217; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 2218; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2219; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 2220; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 2221; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 2222; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2223; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2224; GFX7-NEXT: s_setpc_b64 s[30:31] 2225; 2226; GFX10-LABEL: extractelement_vgpr_v8i8_idx2: 2227; GFX10: ; %bb.0: 2228; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2229; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2230; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2231; GFX10-NEXT: s_waitcnt vmcnt(0) 2232; GFX10-NEXT: v_mov_b32_e32 v1, 8 2233; GFX10-NEXT: s_mov_b32 s4, 16 2234; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2235; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 2236; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2237; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 2238; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 2239; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 2240; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2241; GFX10-NEXT: s_setpc_b64 s[30:31] 2242; 2243; GFX11-LABEL: extractelement_vgpr_v8i8_idx2: 2244; GFX11: ; %bb.0: 2245; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2246; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2247; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off 2248; GFX11-NEXT: s_waitcnt vmcnt(0) 2249; GFX11-NEXT: v_bfe_u32 v1, v0, 8, 8 2250; GFX11-NEXT: v_bfe_u32 v2, v0, 16, 8 2251; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2252; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 2253; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v1 2254; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2255; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 2256; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1 2257; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2258; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2259; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 2260; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2261; GFX11-NEXT: s_setpc_b64 s[30:31] 2262 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 2263 %element = extractelement <8 x i8> %vector, i32 2 2264 ret i8 %element 2265} 2266 2267define i8 @extractelement_vgpr_v8i8_idx3(<8 x i8> addrspace(1)* %ptr) { 2268; GFX9-LABEL: extractelement_vgpr_v8i8_idx3: 2269; GFX9: ; %bb.0: 2270; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2271; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2272; GFX9-NEXT: v_mov_b32_e32 v2, 8 2273; GFX9-NEXT: s_waitcnt vmcnt(0) 2274; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 2275; GFX9-NEXT: v_mov_b32_e32 v3, 16 2276; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 2277; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2278; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2279; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 2280; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2281; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2282; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2283; GFX9-NEXT: s_setpc_b64 s[30:31] 2284; 2285; GFX8-LABEL: extractelement_vgpr_v8i8_idx3: 2286; GFX8: ; %bb.0: 2287; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2288; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 2289; GFX8-NEXT: s_waitcnt vmcnt(0) 2290; GFX8-NEXT: v_mov_b32_e32 v1, 8 2291; GFX8-NEXT: v_mov_b32_e32 v2, 16 2292; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2293; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2294; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2295; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2296; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2297; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2298; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2299; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2300; GFX8-NEXT: s_setpc_b64 s[30:31] 2301; 2302; GFX7-LABEL: extractelement_vgpr_v8i8_idx3: 2303; GFX7: ; %bb.0: 2304; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2305; GFX7-NEXT: s_mov_b32 s6, 0 2306; GFX7-NEXT: s_mov_b32 s7, 0xf000 2307; GFX7-NEXT: s_mov_b64 s[4:5], 0 2308; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 2309; GFX7-NEXT: s_waitcnt vmcnt(0) 2310; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 2311; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 2312; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 2313; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 2314; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 2315; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2316; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 2317; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 2318; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 2319; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2320; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2321; GFX7-NEXT: s_setpc_b64 s[30:31] 2322; 2323; GFX10-LABEL: extractelement_vgpr_v8i8_idx3: 2324; GFX10: ; %bb.0: 2325; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2326; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2327; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2328; GFX10-NEXT: s_waitcnt vmcnt(0) 2329; GFX10-NEXT: v_mov_b32_e32 v1, 8 2330; GFX10-NEXT: v_mov_b32_e32 v2, 16 2331; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2332; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2333; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2334; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 2335; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2336; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 2337; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2338; GFX10-NEXT: s_setpc_b64 s[30:31] 2339; 2340; GFX11-LABEL: extractelement_vgpr_v8i8_idx3: 2341; GFX11: ; %bb.0: 2342; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2343; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2344; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off 2345; GFX11-NEXT: s_waitcnt vmcnt(0) 2346; GFX11-NEXT: v_bfe_u32 v1, v0, 8, 8 2347; GFX11-NEXT: v_bfe_u32 v2, v0, 16, 8 2348; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2349; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 2350; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v1 2351; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2352; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 2353; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1 2354; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2355; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2356; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 2357; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2358; GFX11-NEXT: s_setpc_b64 s[30:31] 2359 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 2360 %element = extractelement <8 x i8> %vector, i32 3 2361 ret i8 %element 2362} 2363 2364define i8 @extractelement_vgpr_v8i8_idx4(<8 x i8> addrspace(1)* %ptr) { 2365; GFX9-LABEL: extractelement_vgpr_v8i8_idx4: 2366; GFX9: ; %bb.0: 2367; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2368; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2369; GFX9-NEXT: v_mov_b32_e32 v2, 8 2370; GFX9-NEXT: s_waitcnt vmcnt(0) 2371; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 2372; GFX9-NEXT: v_mov_b32_e32 v3, 16 2373; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v1 2374; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2375; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2376; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 2377; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2378; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2379; GFX9-NEXT: s_setpc_b64 s[30:31] 2380; 2381; GFX8-LABEL: extractelement_vgpr_v8i8_idx4: 2382; GFX8: ; %bb.0: 2383; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2384; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 2385; GFX8-NEXT: s_waitcnt vmcnt(0) 2386; GFX8-NEXT: v_mov_b32_e32 v0, 8 2387; GFX8-NEXT: v_mov_b32_e32 v2, 16 2388; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2389; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2390; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2391; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2392; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2393; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2394; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2395; GFX8-NEXT: s_setpc_b64 s[30:31] 2396; 2397; GFX7-LABEL: extractelement_vgpr_v8i8_idx4: 2398; GFX7: ; %bb.0: 2399; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2400; GFX7-NEXT: s_mov_b32 s6, 0 2401; GFX7-NEXT: s_mov_b32 s7, 0xf000 2402; GFX7-NEXT: s_mov_b64 s[4:5], 0 2403; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 2404; GFX7-NEXT: s_waitcnt vmcnt(0) 2405; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 2406; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 2407; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 2408; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 2409; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 2410; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2411; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 2412; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 2413; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 2414; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 2415; GFX7-NEXT: s_setpc_b64 s[30:31] 2416; 2417; GFX10-LABEL: extractelement_vgpr_v8i8_idx4: 2418; GFX10: ; %bb.0: 2419; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2420; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2421; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2422; GFX10-NEXT: s_waitcnt vmcnt(0) 2423; GFX10-NEXT: v_mov_b32_e32 v0, 8 2424; GFX10-NEXT: v_mov_b32_e32 v2, 16 2425; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2426; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2427; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2428; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 2429; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2430; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 2431; GFX10-NEXT: s_setpc_b64 s[30:31] 2432; 2433; GFX11-LABEL: extractelement_vgpr_v8i8_idx4: 2434; GFX11: ; %bb.0: 2435; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2436; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2437; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off 2438; GFX11-NEXT: s_waitcnt vmcnt(0) 2439; GFX11-NEXT: v_bfe_u32 v0, v1, 8, 8 2440; GFX11-NEXT: v_bfe_u32 v2, v1, 16, 8 2441; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2442; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 2443; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v0 2444; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2445; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 2446; GFX11-NEXT: v_and_or_b32 v0, 0xff, v1, v0 2447; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2448; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2449; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 2450; GFX11-NEXT: s_setpc_b64 s[30:31] 2451 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 2452 %element = extractelement <8 x i8> %vector, i32 4 2453 ret i8 %element 2454} 2455 2456define i8 @extractelement_vgpr_v8i8_idx5(<8 x i8> addrspace(1)* %ptr) { 2457; GFX9-LABEL: extractelement_vgpr_v8i8_idx5: 2458; GFX9: ; %bb.0: 2459; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2460; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2461; GFX9-NEXT: s_mov_b32 s4, 8 2462; GFX9-NEXT: s_waitcnt vmcnt(0) 2463; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 2464; GFX9-NEXT: v_mov_b32_e32 v2, 16 2465; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2466; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2467; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2468; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v4 2469; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2470; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 2471; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2472; GFX9-NEXT: s_setpc_b64 s[30:31] 2473; 2474; GFX8-LABEL: extractelement_vgpr_v8i8_idx5: 2475; GFX8: ; %bb.0: 2476; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2477; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 2478; GFX8-NEXT: s_waitcnt vmcnt(0) 2479; GFX8-NEXT: v_mov_b32_e32 v0, 8 2480; GFX8-NEXT: v_mov_b32_e32 v2, 16 2481; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2482; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2483; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2484; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2485; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2486; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2487; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2488; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2489; GFX8-NEXT: s_setpc_b64 s[30:31] 2490; 2491; GFX7-LABEL: extractelement_vgpr_v8i8_idx5: 2492; GFX7: ; %bb.0: 2493; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2494; GFX7-NEXT: s_mov_b32 s6, 0 2495; GFX7-NEXT: s_mov_b32 s7, 0xf000 2496; GFX7-NEXT: s_mov_b64 s[4:5], 0 2497; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 2498; GFX7-NEXT: s_waitcnt vmcnt(0) 2499; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 2500; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 2501; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 2502; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 2503; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 2504; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2505; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 2506; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 2507; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 2508; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 2509; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2510; GFX7-NEXT: s_setpc_b64 s[30:31] 2511; 2512; GFX10-LABEL: extractelement_vgpr_v8i8_idx5: 2513; GFX10: ; %bb.0: 2514; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2515; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2516; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2517; GFX10-NEXT: s_mov_b32 s4, 8 2518; GFX10-NEXT: s_waitcnt vmcnt(0) 2519; GFX10-NEXT: v_mov_b32_e32 v0, 16 2520; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2521; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2522; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2523; GFX10-NEXT: v_and_or_b32 v1, 0xff, v1, v2 2524; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 2525; GFX10-NEXT: v_or3_b32 v0, v1, v0, v2 2526; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2527; GFX10-NEXT: s_setpc_b64 s[30:31] 2528; 2529; GFX11-LABEL: extractelement_vgpr_v8i8_idx5: 2530; GFX11: ; %bb.0: 2531; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2532; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2533; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off 2534; GFX11-NEXT: s_waitcnt vmcnt(0) 2535; GFX11-NEXT: v_bfe_u32 v0, v1, 8, 8 2536; GFX11-NEXT: v_bfe_u32 v2, v1, 16, 8 2537; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2538; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 2539; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v0 2540; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2541; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 2542; GFX11-NEXT: v_and_or_b32 v0, 0xff, v1, v0 2543; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2544; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2545; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 2546; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2547; GFX11-NEXT: s_setpc_b64 s[30:31] 2548 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 2549 %element = extractelement <8 x i8> %vector, i32 5 2550 ret i8 %element 2551} 2552 2553define i8 @extractelement_vgpr_v8i8_idx6(<8 x i8> addrspace(1)* %ptr) { 2554; GFX9-LABEL: extractelement_vgpr_v8i8_idx6: 2555; GFX9: ; %bb.0: 2556; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2557; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2558; GFX9-NEXT: v_mov_b32_e32 v2, 8 2559; GFX9-NEXT: s_mov_b32 s4, 16 2560; GFX9-NEXT: s_waitcnt vmcnt(0) 2561; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 2562; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2563; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2564; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2565; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 2566; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2567; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 2568; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2569; GFX9-NEXT: s_setpc_b64 s[30:31] 2570; 2571; GFX8-LABEL: extractelement_vgpr_v8i8_idx6: 2572; GFX8: ; %bb.0: 2573; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2574; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 2575; GFX8-NEXT: s_waitcnt vmcnt(0) 2576; GFX8-NEXT: v_mov_b32_e32 v0, 8 2577; GFX8-NEXT: v_mov_b32_e32 v2, 16 2578; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2579; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2580; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2581; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2582; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2583; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2584; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2585; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2586; GFX8-NEXT: s_setpc_b64 s[30:31] 2587; 2588; GFX7-LABEL: extractelement_vgpr_v8i8_idx6: 2589; GFX7: ; %bb.0: 2590; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2591; GFX7-NEXT: s_mov_b32 s6, 0 2592; GFX7-NEXT: s_mov_b32 s7, 0xf000 2593; GFX7-NEXT: s_mov_b64 s[4:5], 0 2594; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 2595; GFX7-NEXT: s_waitcnt vmcnt(0) 2596; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 2597; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 2598; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 2599; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 2600; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 2601; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2602; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 2603; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 2604; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 2605; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 2606; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2607; GFX7-NEXT: s_setpc_b64 s[30:31] 2608; 2609; GFX10-LABEL: extractelement_vgpr_v8i8_idx6: 2610; GFX10: ; %bb.0: 2611; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2612; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2613; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2614; GFX10-NEXT: s_waitcnt vmcnt(0) 2615; GFX10-NEXT: v_mov_b32_e32 v0, 8 2616; GFX10-NEXT: s_mov_b32 s4, 16 2617; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2618; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v1 2619; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2620; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 2621; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 2622; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 2623; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2624; GFX10-NEXT: s_setpc_b64 s[30:31] 2625; 2626; GFX11-LABEL: extractelement_vgpr_v8i8_idx6: 2627; GFX11: ; %bb.0: 2628; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2629; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2630; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off 2631; GFX11-NEXT: s_waitcnt vmcnt(0) 2632; GFX11-NEXT: v_bfe_u32 v0, v1, 8, 8 2633; GFX11-NEXT: v_bfe_u32 v2, v1, 16, 8 2634; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2635; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 2636; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v0 2637; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2638; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 2639; GFX11-NEXT: v_and_or_b32 v0, 0xff, v1, v0 2640; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2641; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2642; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 2643; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2644; GFX11-NEXT: s_setpc_b64 s[30:31] 2645 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 2646 %element = extractelement <8 x i8> %vector, i32 6 2647 ret i8 %element 2648} 2649 2650define i8 @extractelement_vgpr_v8i8_idx7(<8 x i8> addrspace(1)* %ptr) { 2651; GFX9-LABEL: extractelement_vgpr_v8i8_idx7: 2652; GFX9: ; %bb.0: 2653; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2654; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2655; GFX9-NEXT: v_mov_b32_e32 v2, 8 2656; GFX9-NEXT: s_waitcnt vmcnt(0) 2657; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 2658; GFX9-NEXT: v_mov_b32_e32 v3, 16 2659; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v1 2660; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2661; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2662; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 2663; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2664; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2665; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2666; GFX9-NEXT: s_setpc_b64 s[30:31] 2667; 2668; GFX8-LABEL: extractelement_vgpr_v8i8_idx7: 2669; GFX8: ; %bb.0: 2670; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2671; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 2672; GFX8-NEXT: s_waitcnt vmcnt(0) 2673; GFX8-NEXT: v_mov_b32_e32 v0, 8 2674; GFX8-NEXT: v_mov_b32_e32 v2, 16 2675; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2676; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2677; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2678; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2679; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2680; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2681; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2682; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2683; GFX8-NEXT: s_setpc_b64 s[30:31] 2684; 2685; GFX7-LABEL: extractelement_vgpr_v8i8_idx7: 2686; GFX7: ; %bb.0: 2687; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2688; GFX7-NEXT: s_mov_b32 s6, 0 2689; GFX7-NEXT: s_mov_b32 s7, 0xf000 2690; GFX7-NEXT: s_mov_b64 s[4:5], 0 2691; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 2692; GFX7-NEXT: s_waitcnt vmcnt(0) 2693; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 2694; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 2695; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 2696; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 2697; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 2698; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2699; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 2700; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 2701; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 2702; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 2703; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2704; GFX7-NEXT: s_setpc_b64 s[30:31] 2705; 2706; GFX10-LABEL: extractelement_vgpr_v8i8_idx7: 2707; GFX10: ; %bb.0: 2708; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2709; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2710; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2711; GFX10-NEXT: s_waitcnt vmcnt(0) 2712; GFX10-NEXT: v_mov_b32_e32 v0, 8 2713; GFX10-NEXT: v_mov_b32_e32 v2, 16 2714; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2715; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2716; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2717; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 2718; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2719; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 2720; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2721; GFX10-NEXT: s_setpc_b64 s[30:31] 2722; 2723; GFX11-LABEL: extractelement_vgpr_v8i8_idx7: 2724; GFX11: ; %bb.0: 2725; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2726; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2727; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off 2728; GFX11-NEXT: s_waitcnt vmcnt(0) 2729; GFX11-NEXT: v_bfe_u32 v0, v1, 8, 8 2730; GFX11-NEXT: v_bfe_u32 v2, v1, 16, 8 2731; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2732; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 2733; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v0 2734; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2735; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 2736; GFX11-NEXT: v_and_or_b32 v0, 0xff, v1, v0 2737; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2738; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2739; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 2740; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2741; GFX11-NEXT: s_setpc_b64 s[30:31] 2742 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 2743 %element = extractelement <8 x i8> %vector, i32 7 2744 ret i8 %element 2745} 2746 2747define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) { 2748; GCN-LABEL: extractelement_sgpr_v16i8_sgpr_idx: 2749; GCN: ; %bb.0: 2750; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 2751; GCN-NEXT: s_waitcnt lgkmcnt(0) 2752; GCN-NEXT: s_bfe_u32 s10, s0, 0x80008 2753; GCN-NEXT: s_lshr_b32 s5, s0, 24 2754; GCN-NEXT: s_and_b32 s9, s0, 0xff 2755; GCN-NEXT: s_lshl_b32 s10, s10, 8 2756; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 2757; GCN-NEXT: s_or_b32 s9, s9, s10 2758; GCN-NEXT: s_lshl_b32 s0, s0, 16 2759; GCN-NEXT: s_or_b32 s0, s9, s0 2760; GCN-NEXT: s_lshl_b32 s5, s5, 24 2761; GCN-NEXT: s_bfe_u32 s9, s1, 0x80008 2762; GCN-NEXT: s_lshr_b32 s6, s1, 24 2763; GCN-NEXT: s_or_b32 s0, s0, s5 2764; GCN-NEXT: s_and_b32 s5, s1, 0xff 2765; GCN-NEXT: s_lshl_b32 s9, s9, 8 2766; GCN-NEXT: s_bfe_u32 s1, s1, 0x80010 2767; GCN-NEXT: s_or_b32 s5, s5, s9 2768; GCN-NEXT: s_lshl_b32 s1, s1, 16 2769; GCN-NEXT: s_or_b32 s1, s5, s1 2770; GCN-NEXT: s_lshl_b32 s5, s6, 24 2771; GCN-NEXT: s_bfe_u32 s6, s2, 0x80008 2772; GCN-NEXT: s_lshr_b32 s7, s2, 24 2773; GCN-NEXT: s_or_b32 s1, s1, s5 2774; GCN-NEXT: s_and_b32 s5, s2, 0xff 2775; GCN-NEXT: s_lshl_b32 s6, s6, 8 2776; GCN-NEXT: s_bfe_u32 s2, s2, 0x80010 2777; GCN-NEXT: s_or_b32 s5, s5, s6 2778; GCN-NEXT: s_lshl_b32 s2, s2, 16 2779; GCN-NEXT: s_or_b32 s2, s5, s2 2780; GCN-NEXT: s_lshl_b32 s5, s7, 24 2781; GCN-NEXT: s_bfe_u32 s6, s3, 0x80008 2782; GCN-NEXT: s_lshr_b32 s8, s3, 24 2783; GCN-NEXT: s_or_b32 s2, s2, s5 2784; GCN-NEXT: s_and_b32 s5, s3, 0xff 2785; GCN-NEXT: s_lshl_b32 s6, s6, 8 2786; GCN-NEXT: s_bfe_u32 s3, s3, 0x80010 2787; GCN-NEXT: s_or_b32 s5, s5, s6 2788; GCN-NEXT: s_lshl_b32 s3, s3, 16 2789; GCN-NEXT: s_or_b32 s3, s5, s3 2790; GCN-NEXT: s_lshl_b32 s5, s8, 24 2791; GCN-NEXT: s_or_b32 s3, s3, s5 2792; GCN-NEXT: s_lshr_b32 s5, s4, 2 2793; GCN-NEXT: s_cmp_eq_u32 s5, 1 2794; GCN-NEXT: s_cselect_b32 s0, s1, s0 2795; GCN-NEXT: s_cmp_eq_u32 s5, 2 2796; GCN-NEXT: s_cselect_b32 s0, s2, s0 2797; GCN-NEXT: s_cmp_eq_u32 s5, 3 2798; GCN-NEXT: s_cselect_b32 s0, s3, s0 2799; GCN-NEXT: s_and_b32 s1, s4, 3 2800; GCN-NEXT: s_lshl_b32 s1, s1, 3 2801; GCN-NEXT: s_lshr_b32 s0, s0, s1 2802; GCN-NEXT: ; return to shader part epilog 2803; 2804; GFX10-LABEL: extractelement_sgpr_v16i8_sgpr_idx: 2805; GFX10: ; %bb.0: 2806; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 2807; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2808; GFX10-NEXT: s_bfe_u32 s10, s0, 0x80008 2809; GFX10-NEXT: s_bfe_u32 s12, s1, 0x80008 2810; GFX10-NEXT: s_lshr_b32 s6, s1, 24 2811; GFX10-NEXT: s_and_b32 s9, s0, 0xff 2812; GFX10-NEXT: s_and_b32 s11, s1, 0xff 2813; GFX10-NEXT: s_bfe_u32 s1, s1, 0x80010 2814; GFX10-NEXT: s_lshl_b32 s10, s10, 8 2815; GFX10-NEXT: s_lshl_b32 s12, s12, 8 2816; GFX10-NEXT: s_lshr_b32 s5, s0, 24 2817; GFX10-NEXT: s_bfe_u32 s0, s0, 0x80010 2818; GFX10-NEXT: s_lshl_b32 s1, s1, 16 2819; GFX10-NEXT: s_or_b32 s9, s9, s10 2820; GFX10-NEXT: s_or_b32 s10, s11, s12 2821; GFX10-NEXT: s_bfe_u32 s14, s2, 0x80008 2822; GFX10-NEXT: s_lshl_b32 s0, s0, 16 2823; GFX10-NEXT: s_lshl_b32 s6, s6, 24 2824; GFX10-NEXT: s_or_b32 s1, s10, s1 2825; GFX10-NEXT: s_lshr_b32 s7, s2, 24 2826; GFX10-NEXT: s_and_b32 s13, s2, 0xff 2827; GFX10-NEXT: s_bfe_u32 s2, s2, 0x80010 2828; GFX10-NEXT: s_lshl_b32 s5, s5, 24 2829; GFX10-NEXT: s_lshl_b32 s14, s14, 8 2830; GFX10-NEXT: s_or_b32 s0, s9, s0 2831; GFX10-NEXT: s_or_b32 s1, s1, s6 2832; GFX10-NEXT: s_bfe_u32 s6, s3, 0x80008 2833; GFX10-NEXT: s_lshr_b32 s8, s3, 24 2834; GFX10-NEXT: s_lshl_b32 s2, s2, 16 2835; GFX10-NEXT: s_or_b32 s11, s13, s14 2836; GFX10-NEXT: s_or_b32 s0, s0, s5 2837; GFX10-NEXT: s_lshl_b32 s5, s7, 24 2838; GFX10-NEXT: s_and_b32 s7, s3, 0xff 2839; GFX10-NEXT: s_lshl_b32 s6, s6, 8 2840; GFX10-NEXT: s_bfe_u32 s3, s3, 0x80010 2841; GFX10-NEXT: s_or_b32 s2, s11, s2 2842; GFX10-NEXT: s_or_b32 s6, s7, s6 2843; GFX10-NEXT: s_lshl_b32 s3, s3, 16 2844; GFX10-NEXT: s_or_b32 s2, s2, s5 2845; GFX10-NEXT: s_or_b32 s3, s6, s3 2846; GFX10-NEXT: s_lshl_b32 s5, s8, 24 2847; GFX10-NEXT: s_lshr_b32 s6, s4, 2 2848; GFX10-NEXT: s_or_b32 s3, s3, s5 2849; GFX10-NEXT: s_cmp_eq_u32 s6, 1 2850; GFX10-NEXT: s_cselect_b32 s0, s1, s0 2851; GFX10-NEXT: s_cmp_eq_u32 s6, 2 2852; GFX10-NEXT: s_cselect_b32 s0, s2, s0 2853; GFX10-NEXT: s_cmp_eq_u32 s6, 3 2854; GFX10-NEXT: s_cselect_b32 s0, s3, s0 2855; GFX10-NEXT: s_and_b32 s1, s4, 3 2856; GFX10-NEXT: s_lshl_b32 s1, s1, 3 2857; GFX10-NEXT: s_lshr_b32 s0, s0, s1 2858; GFX10-NEXT: ; return to shader part epilog 2859; 2860; GFX11-LABEL: extractelement_sgpr_v16i8_sgpr_idx: 2861; GFX11: ; %bb.0: 2862; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0 2863; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2864; GFX11-NEXT: s_bfe_u32 s10, s0, 0x80008 2865; GFX11-NEXT: s_bfe_u32 s12, s1, 0x80008 2866; GFX11-NEXT: s_lshr_b32 s6, s1, 24 2867; GFX11-NEXT: s_and_b32 s9, s0, 0xff 2868; GFX11-NEXT: s_and_b32 s11, s1, 0xff 2869; GFX11-NEXT: s_bfe_u32 s1, s1, 0x80010 2870; GFX11-NEXT: s_lshl_b32 s10, s10, 8 2871; GFX11-NEXT: s_lshl_b32 s12, s12, 8 2872; GFX11-NEXT: s_lshr_b32 s5, s0, 24 2873; GFX11-NEXT: s_bfe_u32 s0, s0, 0x80010 2874; GFX11-NEXT: s_lshl_b32 s1, s1, 16 2875; GFX11-NEXT: s_or_b32 s9, s9, s10 2876; GFX11-NEXT: s_or_b32 s10, s11, s12 2877; GFX11-NEXT: s_bfe_u32 s14, s2, 0x80008 2878; GFX11-NEXT: s_lshl_b32 s0, s0, 16 2879; GFX11-NEXT: s_lshl_b32 s6, s6, 24 2880; GFX11-NEXT: s_or_b32 s1, s10, s1 2881; GFX11-NEXT: s_lshr_b32 s7, s2, 24 2882; GFX11-NEXT: s_and_b32 s13, s2, 0xff 2883; GFX11-NEXT: s_bfe_u32 s2, s2, 0x80010 2884; GFX11-NEXT: s_lshl_b32 s5, s5, 24 2885; GFX11-NEXT: s_lshl_b32 s14, s14, 8 2886; GFX11-NEXT: s_or_b32 s0, s9, s0 2887; GFX11-NEXT: s_or_b32 s1, s1, s6 2888; GFX11-NEXT: s_bfe_u32 s6, s3, 0x80008 2889; GFX11-NEXT: s_lshr_b32 s8, s3, 24 2890; GFX11-NEXT: s_lshl_b32 s2, s2, 16 2891; GFX11-NEXT: s_or_b32 s11, s13, s14 2892; GFX11-NEXT: s_or_b32 s0, s0, s5 2893; GFX11-NEXT: s_lshl_b32 s5, s7, 24 2894; GFX11-NEXT: s_and_b32 s7, s3, 0xff 2895; GFX11-NEXT: s_lshl_b32 s6, s6, 8 2896; GFX11-NEXT: s_bfe_u32 s3, s3, 0x80010 2897; GFX11-NEXT: s_or_b32 s2, s11, s2 2898; GFX11-NEXT: s_or_b32 s6, s7, s6 2899; GFX11-NEXT: s_lshl_b32 s3, s3, 16 2900; GFX11-NEXT: s_or_b32 s2, s2, s5 2901; GFX11-NEXT: s_or_b32 s3, s6, s3 2902; GFX11-NEXT: s_lshl_b32 s5, s8, 24 2903; GFX11-NEXT: s_lshr_b32 s6, s4, 2 2904; GFX11-NEXT: s_or_b32 s3, s3, s5 2905; GFX11-NEXT: s_cmp_eq_u32 s6, 1 2906; GFX11-NEXT: s_cselect_b32 s0, s1, s0 2907; GFX11-NEXT: s_cmp_eq_u32 s6, 2 2908; GFX11-NEXT: s_cselect_b32 s0, s2, s0 2909; GFX11-NEXT: s_cmp_eq_u32 s6, 3 2910; GFX11-NEXT: s_cselect_b32 s0, s3, s0 2911; GFX11-NEXT: s_and_b32 s1, s4, 3 2912; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 2913; GFX11-NEXT: s_lshl_b32 s1, s1, 3 2914; GFX11-NEXT: s_lshr_b32 s0, s0, s1 2915; GFX11-NEXT: ; return to shader part epilog 2916 %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr 2917 %element = extractelement <16 x i8> %vector, i32 %idx 2918 ret i8 %element 2919} 2920 2921define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 inreg %idx) { 2922; GFX9-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 2923; GFX9: ; %bb.0: 2924; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2925; GFX9-NEXT: s_mov_b32 s0, 8 2926; GFX9-NEXT: s_mov_b32 s1, 16 2927; GFX9-NEXT: s_movk_i32 s3, 0xff 2928; GFX9-NEXT: v_mov_b32_e32 v5, 8 2929; GFX9-NEXT: v_mov_b32_e32 v4, 0xff 2930; GFX9-NEXT: v_mov_b32_e32 v6, 16 2931; GFX9-NEXT: s_lshr_b32 s4, s2, 2 2932; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s4, 1 2933; GFX9-NEXT: s_and_b32 s2, s2, 3 2934; GFX9-NEXT: s_waitcnt vmcnt(0) 2935; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v0 2936; GFX9-NEXT: v_lshrrev_b32_e32 v8, 24, v1 2937; GFX9-NEXT: v_lshlrev_b32_sdwa v11, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2938; GFX9-NEXT: v_lshlrev_b32_sdwa v13, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2939; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v2 2940; GFX9-NEXT: v_lshlrev_b32_sdwa v12, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2941; GFX9-NEXT: v_lshlrev_b32_sdwa v14, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2942; GFX9-NEXT: v_lshlrev_b32_sdwa v15, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2943; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v11 2944; GFX9-NEXT: v_lshlrev_b32_e32 v7, 24, v7 2945; GFX9-NEXT: v_and_or_b32 v1, v1, s3, v13 2946; GFX9-NEXT: v_lshlrev_b32_e32 v8, 24, v8 2947; GFX9-NEXT: v_lshrrev_b32_e32 v10, 24, v3 2948; GFX9-NEXT: v_lshlrev_b32_sdwa v16, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2949; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2950; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v15 2951; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 2952; GFX9-NEXT: v_or3_b32 v0, v0, v12, v7 2953; GFX9-NEXT: v_or3_b32 v1, v1, v14, v8 2954; GFX9-NEXT: v_lshlrev_b32_sdwa v6, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2955; GFX9-NEXT: v_and_or_b32 v3, v3, v4, v5 2956; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v10 2957; GFX9-NEXT: v_or3_b32 v2, v2, v16, v9 2958; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2959; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s4, 2 2960; GFX9-NEXT: v_or3_b32 v3, v3, v6, v4 2961; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2962; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s4, 3 2963; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2964; GFX9-NEXT: s_lshl_b32 s0, s2, 3 2965; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 2966; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2967; GFX9-NEXT: ; return to shader part epilog 2968; 2969; GFX8-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 2970; GFX8: ; %bb.0: 2971; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2972; GFX8-NEXT: v_mov_b32_e32 v4, 8 2973; GFX8-NEXT: v_mov_b32_e32 v5, 16 2974; GFX8-NEXT: v_mov_b32_e32 v6, 8 2975; GFX8-NEXT: v_mov_b32_e32 v7, 16 2976; GFX8-NEXT: s_lshr_b32 s0, s2, 2 2977; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 2978; GFX8-NEXT: s_and_b32 s1, s2, 3 2979; GFX8-NEXT: s_waitcnt vmcnt(0) 2980; GFX8-NEXT: v_lshlrev_b32_sdwa v12, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2981; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2982; GFX8-NEXT: v_lshrrev_b32_e32 v8, 24, v0 2983; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v1 2984; GFX8-NEXT: v_lshlrev_b32_sdwa v13, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2985; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2986; GFX8-NEXT: v_lshlrev_b32_sdwa v14, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2987; GFX8-NEXT: v_or_b32_sdwa v0, v0, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2988; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2989; GFX8-NEXT: v_lshrrev_b32_e32 v10, 24, v2 2990; GFX8-NEXT: v_lshlrev_b32_sdwa v15, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2991; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2992; GFX8-NEXT: v_lshlrev_b32_e32 v8, 24, v8 2993; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v9 2994; GFX8-NEXT: v_or_b32_sdwa v2, v2, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2995; GFX8-NEXT: v_or_b32_e32 v0, v0, v13 2996; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 2997; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v3 2998; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2999; GFX8-NEXT: v_lshlrev_b32_e32 v9, 24, v10 3000; GFX8-NEXT: v_or_b32_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3001; GFX8-NEXT: v_or_b32_e32 v2, v2, v15 3002; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 3003; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 3004; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v11 3005; GFX8-NEXT: v_or_b32_e32 v3, v3, v7 3006; GFX8-NEXT: v_or_b32_e32 v2, v2, v9 3007; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3008; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2 3009; GFX8-NEXT: v_or_b32_e32 v3, v3, v6 3010; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3011; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3 3012; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3013; GFX8-NEXT: s_lshl_b32 s0, s1, 3 3014; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 3015; GFX8-NEXT: v_readfirstlane_b32 s0, v0 3016; GFX8-NEXT: ; return to shader part epilog 3017; 3018; GFX7-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 3019; GFX7: ; %bb.0: 3020; GFX7-NEXT: s_mov_b32 s6, 0 3021; GFX7-NEXT: s_mov_b32 s7, 0xf000 3022; GFX7-NEXT: s_mov_b64 s[4:5], 0 3023; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3024; GFX7-NEXT: s_lshr_b32 s0, s2, 2 3025; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 3026; GFX7-NEXT: s_and_b32 s1, s2, 3 3027; GFX7-NEXT: s_waitcnt vmcnt(0) 3028; GFX7-NEXT: v_bfe_u32 v9, v0, 8, 8 3029; GFX7-NEXT: v_bfe_u32 v11, v1, 8, 8 3030; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 3031; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v1 3032; GFX7-NEXT: v_and_b32_e32 v8, 0xff, v0 3033; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 3034; GFX7-NEXT: v_and_b32_e32 v10, 0xff, v1 3035; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 3036; GFX7-NEXT: v_bfe_u32 v13, v2, 8, 8 3037; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9 3038; GFX7-NEXT: v_lshlrev_b32_e32 v11, 8, v11 3039; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v2 3040; GFX7-NEXT: v_and_b32_e32 v12, 0xff, v2 3041; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 3042; GFX7-NEXT: v_bfe_u32 v15, v3, 8, 8 3043; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 3044; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3045; GFX7-NEXT: v_lshlrev_b32_e32 v13, 8, v13 3046; GFX7-NEXT: v_or_b32_e32 v8, v8, v9 3047; GFX7-NEXT: v_or_b32_e32 v9, v10, v11 3048; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v3 3049; GFX7-NEXT: v_and_b32_e32 v14, 0xff, v3 3050; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 3051; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 3052; GFX7-NEXT: v_lshlrev_b32_e32 v5, 24, v5 3053; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3054; GFX7-NEXT: v_lshlrev_b32_e32 v15, 8, v15 3055; GFX7-NEXT: v_or_b32_e32 v10, v12, v13 3056; GFX7-NEXT: v_or_b32_e32 v0, v8, v0 3057; GFX7-NEXT: v_or_b32_e32 v1, v9, v1 3058; GFX7-NEXT: v_lshlrev_b32_e32 v6, 24, v6 3059; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3060; GFX7-NEXT: v_or_b32_e32 v11, v14, v15 3061; GFX7-NEXT: v_or_b32_e32 v2, v10, v2 3062; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 3063; GFX7-NEXT: v_or_b32_e32 v1, v1, v5 3064; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 3065; GFX7-NEXT: v_or_b32_e32 v3, v11, v3 3066; GFX7-NEXT: v_or_b32_e32 v2, v2, v6 3067; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3068; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2 3069; GFX7-NEXT: v_or_b32_e32 v3, v3, v7 3070; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3071; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3 3072; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3073; GFX7-NEXT: s_lshl_b32 s0, s1, 3 3074; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 3075; GFX7-NEXT: v_readfirstlane_b32 s0, v0 3076; GFX7-NEXT: ; return to shader part epilog 3077; 3078; GFX10-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 3079; GFX10: ; %bb.0: 3080; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3081; GFX10-NEXT: s_mov_b32 s0, 8 3082; GFX10-NEXT: v_mov_b32_e32 v4, 8 3083; GFX10-NEXT: s_mov_b32 s1, 16 3084; GFX10-NEXT: v_mov_b32_e32 v5, 16 3085; GFX10-NEXT: s_waitcnt vmcnt(0) 3086; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v0 3087; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1 3088; GFX10-NEXT: v_lshlrev_b32_sdwa v9, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3089; GFX10-NEXT: v_lshlrev_b32_sdwa v11, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3090; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v2 3091; GFX10-NEXT: v_lshlrev_b32_sdwa v10, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3092; GFX10-NEXT: v_lshlrev_b32_sdwa v12, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3093; GFX10-NEXT: v_lshlrev_b32_sdwa v13, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3094; GFX10-NEXT: v_and_or_b32 v0, v0, 0xff, v9 3095; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 3096; GFX10-NEXT: v_and_or_b32 v1, v1, 0xff, v11 3097; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v7 3098; GFX10-NEXT: s_lshr_b32 s0, s2, 2 3099; GFX10-NEXT: v_lshlrev_b32_sdwa v14, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3100; GFX10-NEXT: v_and_or_b32 v2, 0xff, v2, v13 3101; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v8 3102; GFX10-NEXT: v_lshlrev_b32_sdwa v4, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3103; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v3 3104; GFX10-NEXT: v_or3_b32 v0, v0, v10, v6 3105; GFX10-NEXT: v_or3_b32 v1, v1, v12, v7 3106; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1 3107; GFX10-NEXT: v_or3_b32 v2, v2, v14, v8 3108; GFX10-NEXT: v_and_or_b32 v4, 0xff, v3, v4 3109; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3110; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v9 3111; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 3112; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 2 3113; GFX10-NEXT: v_or3_b32 v1, v4, v3, v5 3114; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3115; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 3 3116; GFX10-NEXT: s_and_b32 s0, s2, 3 3117; GFX10-NEXT: s_lshl_b32 s0, s0, 3 3118; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 3119; GFX10-NEXT: v_lshrrev_b32_e32 v0, s0, v0 3120; GFX10-NEXT: v_readfirstlane_b32 s0, v0 3121; GFX10-NEXT: ; return to shader part epilog 3122; 3123; GFX11-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 3124; GFX11: ; %bb.0: 3125; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 3126; GFX11-NEXT: s_lshr_b32 s0, s2, 2 3127; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3128; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1 3129; GFX11-NEXT: s_waitcnt vmcnt(0) 3130; GFX11-NEXT: v_bfe_u32 v12, v2, 8, 8 3131; GFX11-NEXT: v_lshrrev_b32_e32 v6, 24, v2 3132; GFX11-NEXT: v_bfe_u32 v8, v0, 8, 8 3133; GFX11-NEXT: v_bfe_u32 v13, v2, 16, 8 3134; GFX11-NEXT: v_lshrrev_b32_e32 v4, 24, v0 3135; GFX11-NEXT: v_lshlrev_b32_e32 v12, 8, v12 3136; GFX11-NEXT: v_bfe_u32 v9, v0, 16, 8 3137; GFX11-NEXT: v_bfe_u32 v10, v1, 8, 8 3138; GFX11-NEXT: v_lshlrev_b32_e32 v8, 8, v8 3139; GFX11-NEXT: v_lshlrev_b32_e32 v13, 16, v13 3140; GFX11-NEXT: v_lshlrev_b32_e32 v6, 24, v6 3141; GFX11-NEXT: v_and_or_b32 v2, 0xff, v2, v12 3142; GFX11-NEXT: v_lshrrev_b32_e32 v5, 24, v1 3143; GFX11-NEXT: v_bfe_u32 v11, v1, 16, 8 3144; GFX11-NEXT: v_lshlrev_b32_e32 v4, 24, v4 3145; GFX11-NEXT: v_lshlrev_b32_e32 v10, 8, v10 3146; GFX11-NEXT: v_and_or_b32 v0, v0, 0xff, v8 3147; GFX11-NEXT: v_or3_b32 v2, v2, v13, v6 3148; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v9 3149; GFX11-NEXT: v_lshlrev_b32_e32 v5, 24, v5 3150; GFX11-NEXT: v_and_or_b32 v1, v1, 0xff, v10 3151; GFX11-NEXT: v_bfe_u32 v14, v3, 8, 8 3152; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v3 3153; GFX11-NEXT: v_or3_b32 v0, v0, v9, v4 3154; GFX11-NEXT: v_lshlrev_b32_e32 v11, 16, v11 3155; GFX11-NEXT: v_bfe_u32 v10, v3, 16, 8 3156; GFX11-NEXT: v_lshlrev_b32_e32 v8, 8, v14 3157; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3158; GFX11-NEXT: v_or3_b32 v1, v1, v11, v5 3159; GFX11-NEXT: v_lshlrev_b32_e32 v4, 16, v10 3160; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3161; GFX11-NEXT: v_and_or_b32 v3, 0xff, v3, v8 3162; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 3163; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 2 3164; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) 3165; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3166; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 3 3167; GFX11-NEXT: v_lshlrev_b32_e32 v5, 24, v7 3168; GFX11-NEXT: s_and_b32 s0, s2, 3 3169; GFX11-NEXT: s_lshl_b32 s0, s0, 3 3170; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3171; GFX11-NEXT: v_or3_b32 v1, v3, v4, v5 3172; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 3173; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3174; GFX11-NEXT: v_lshrrev_b32_e32 v0, s0, v0 3175; GFX11-NEXT: v_readfirstlane_b32 s0, v0 3176; GFX11-NEXT: ; return to shader part epilog 3177 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3178 %element = extractelement <16 x i8> %vector, i32 %idx 3179 ret i8 %element 3180} 3181 3182define i8 @extractelement_vgpr_v16i8_vgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 %idx) { 3183; GFX9-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 3184; GFX9: ; %bb.0: 3185; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3186; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off 3187; GFX9-NEXT: s_mov_b32 s4, 8 3188; GFX9-NEXT: s_mov_b32 s5, 16 3189; GFX9-NEXT: s_movk_i32 s6, 0xff 3190; GFX9-NEXT: v_mov_b32_e32 v1, 8 3191; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 3192; GFX9-NEXT: v_mov_b32_e32 v7, 16 3193; GFX9-NEXT: v_lshrrev_b32_e32 v8, 2, v2 3194; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 3195; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 3196; GFX9-NEXT: s_waitcnt vmcnt(0) 3197; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v3 3198; GFX9-NEXT: v_lshrrev_b32_e32 v10, 24, v4 3199; GFX9-NEXT: v_lshlrev_b32_sdwa v13, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3200; GFX9-NEXT: v_lshlrev_b32_sdwa v15, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3201; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v5 3202; GFX9-NEXT: v_lshrrev_b32_e32 v12, 24, v6 3203; GFX9-NEXT: v_lshlrev_b32_sdwa v14, s5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3204; GFX9-NEXT: v_lshlrev_b32_sdwa v16, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3205; GFX9-NEXT: v_lshlrev_b32_sdwa v17, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3206; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3207; GFX9-NEXT: v_and_or_b32 v3, v3, s6, v13 3208; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 3209; GFX9-NEXT: v_and_or_b32 v4, v4, s6, v15 3210; GFX9-NEXT: v_lshlrev_b32_e32 v10, 24, v10 3211; GFX9-NEXT: v_lshlrev_b32_sdwa v18, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3212; GFX9-NEXT: v_lshlrev_b32_sdwa v7, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3213; GFX9-NEXT: v_and_or_b32 v5, v5, v0, v17 3214; GFX9-NEXT: v_lshlrev_b32_e32 v11, 24, v11 3215; GFX9-NEXT: v_and_or_b32 v0, v6, v0, v1 3216; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v12 3217; GFX9-NEXT: v_or3_b32 v3, v3, v14, v9 3218; GFX9-NEXT: v_or3_b32 v4, v4, v16, v10 3219; GFX9-NEXT: v_or3_b32 v5, v5, v18, v11 3220; GFX9-NEXT: v_or3_b32 v0, v0, v7, v1 3221; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc 3222; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 3223; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 3224; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 3225; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3226; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v2 3227; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 3228; GFX9-NEXT: s_setpc_b64 s[30:31] 3229; 3230; GFX8-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 3231; GFX8: ; %bb.0: 3232; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3233; GFX8-NEXT: flat_load_dwordx4 v[3:6], v[0:1] 3234; GFX8-NEXT: v_mov_b32_e32 v0, 8 3235; GFX8-NEXT: v_mov_b32_e32 v1, 16 3236; GFX8-NEXT: v_mov_b32_e32 v7, 8 3237; GFX8-NEXT: v_mov_b32_e32 v8, 16 3238; GFX8-NEXT: v_lshrrev_b32_e32 v9, 2, v2 3239; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9 3240; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 3241; GFX8-NEXT: s_waitcnt vmcnt(0) 3242; GFX8-NEXT: v_lshlrev_b32_sdwa v14, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3243; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3244; GFX8-NEXT: v_lshrrev_b32_e32 v10, 24, v3 3245; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v4 3246; GFX8-NEXT: v_lshlrev_b32_sdwa v15, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3247; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3248; GFX8-NEXT: v_lshlrev_b32_sdwa v16, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3249; GFX8-NEXT: v_or_b32_sdwa v3, v3, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3250; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3251; GFX8-NEXT: v_lshrrev_b32_e32 v12, 24, v5 3252; GFX8-NEXT: v_lshlrev_b32_sdwa v17, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3253; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3254; GFX8-NEXT: v_lshlrev_b32_e32 v10, 24, v10 3255; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v11 3256; GFX8-NEXT: v_or_b32_sdwa v5, v5, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3257; GFX8-NEXT: v_or_b32_e32 v3, v3, v15 3258; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3259; GFX8-NEXT: v_lshrrev_b32_e32 v13, 24, v6 3260; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3261; GFX8-NEXT: v_lshlrev_b32_e32 v11, 24, v12 3262; GFX8-NEXT: v_or_b32_sdwa v6, v6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3263; GFX8-NEXT: v_or_b32_e32 v1, v5, v17 3264; GFX8-NEXT: v_or_b32_e32 v3, v3, v10 3265; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 3266; GFX8-NEXT: v_lshlrev_b32_e32 v7, 24, v13 3267; GFX8-NEXT: v_or_b32_e32 v5, v6, v8 3268; GFX8-NEXT: v_or_b32_e32 v1, v1, v11 3269; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc 3270; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 2, v9 3271; GFX8-NEXT: v_or_b32_e32 v4, v5, v7 3272; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3273; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 3, v9 3274; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 3275; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v2 3276; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 3277; GFX8-NEXT: s_setpc_b64 s[30:31] 3278; 3279; GFX7-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 3280; GFX7: ; %bb.0: 3281; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3282; GFX7-NEXT: s_mov_b32 s6, 0 3283; GFX7-NEXT: s_mov_b32 s7, 0xf000 3284; GFX7-NEXT: s_mov_b64 s[4:5], 0 3285; GFX7-NEXT: buffer_load_dwordx4 v[3:6], v[0:1], s[4:7], 0 addr64 3286; GFX7-NEXT: v_lshrrev_b32_e32 v17, 2, v2 3287; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v17 3288; GFX7-NEXT: v_and_b32_e32 v2, 3, v2 3289; GFX7-NEXT: s_waitcnt vmcnt(0) 3290; GFX7-NEXT: v_bfe_u32 v10, v3, 8, 8 3291; GFX7-NEXT: v_bfe_u32 v12, v4, 8, 8 3292; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v3 3293; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v4 3294; GFX7-NEXT: v_and_b32_e32 v9, 0xff, v3 3295; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 3296; GFX7-NEXT: v_and_b32_e32 v11, 0xff, v4 3297; GFX7-NEXT: v_bfe_u32 v4, v4, 16, 8 3298; GFX7-NEXT: v_bfe_u32 v14, v5, 8, 8 3299; GFX7-NEXT: v_lshlrev_b32_e32 v10, 8, v10 3300; GFX7-NEXT: v_lshlrev_b32_e32 v12, 8, v12 3301; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v5 3302; GFX7-NEXT: v_and_b32_e32 v13, 0xff, v5 3303; GFX7-NEXT: v_bfe_u32 v5, v5, 16, 8 3304; GFX7-NEXT: v_bfe_u32 v16, v6, 8, 8 3305; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3306; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 3307; GFX7-NEXT: v_lshlrev_b32_e32 v14, 8, v14 3308; GFX7-NEXT: v_or_b32_e32 v9, v9, v10 3309; GFX7-NEXT: v_or_b32_e32 v10, v11, v12 3310; GFX7-NEXT: v_lshrrev_b32_e32 v8, 24, v6 3311; GFX7-NEXT: v_and_b32_e32 v15, 0xff, v6 3312; GFX7-NEXT: v_bfe_u32 v6, v6, 16, 8 3313; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 3314; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 3315; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 3316; GFX7-NEXT: v_lshlrev_b32_e32 v16, 8, v16 3317; GFX7-NEXT: v_or_b32_e32 v11, v13, v14 3318; GFX7-NEXT: v_or_b32_e32 v3, v9, v3 3319; GFX7-NEXT: v_or_b32_e32 v4, v10, v4 3320; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 3321; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 3322; GFX7-NEXT: v_or_b32_e32 v12, v15, v16 3323; GFX7-NEXT: v_or_b32_e32 v5, v11, v5 3324; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 3325; GFX7-NEXT: v_or_b32_e32 v1, v4, v1 3326; GFX7-NEXT: v_lshlrev_b32_e32 v8, 24, v8 3327; GFX7-NEXT: v_or_b32_e32 v6, v12, v6 3328; GFX7-NEXT: v_or_b32_e32 v3, v5, v7 3329; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3330; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 2, v17 3331; GFX7-NEXT: v_or_b32_e32 v4, v6, v8 3332; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3333; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 3, v17 3334; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 3335; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v2 3336; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 3337; GFX7-NEXT: s_setpc_b64 s[30:31] 3338; 3339; GFX10-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 3340; GFX10: ; %bb.0: 3341; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3342; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3343; GFX10-NEXT: global_load_dwordx4 v[3:6], v[0:1], off 3344; GFX10-NEXT: s_mov_b32 s4, 8 3345; GFX10-NEXT: v_mov_b32_e32 v0, 8 3346; GFX10-NEXT: s_mov_b32 s5, 16 3347; GFX10-NEXT: v_mov_b32_e32 v1, 16 3348; GFX10-NEXT: v_lshrrev_b32_e32 v7, 2, v2 3349; GFX10-NEXT: v_and_b32_e32 v2, 3, v2 3350; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7 3351; GFX10-NEXT: s_waitcnt vmcnt(0) 3352; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v3 3353; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v4 3354; GFX10-NEXT: v_lshlrev_b32_sdwa v12, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3355; GFX10-NEXT: v_lshlrev_b32_sdwa v14, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3356; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v5 3357; GFX10-NEXT: v_lshlrev_b32_sdwa v13, s5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3358; GFX10-NEXT: v_lshlrev_b32_sdwa v15, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3359; GFX10-NEXT: v_lshlrev_b32_sdwa v16, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3360; GFX10-NEXT: v_and_or_b32 v3, v3, 0xff, v12 3361; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v8 3362; GFX10-NEXT: v_and_or_b32 v4, v4, 0xff, v14 3363; GFX10-NEXT: v_lshlrev_b32_e32 v9, 24, v9 3364; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v6 3365; GFX10-NEXT: v_lshlrev_b32_sdwa v17, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3366; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3367; GFX10-NEXT: v_and_or_b32 v5, 0xff, v5, v16 3368; GFX10-NEXT: v_lshlrev_b32_e32 v10, 24, v10 3369; GFX10-NEXT: v_or3_b32 v3, v3, v13, v8 3370; GFX10-NEXT: v_or3_b32 v4, v4, v15, v9 3371; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3372; GFX10-NEXT: v_and_or_b32 v0, 0xff, v6, v0 3373; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v11 3374; GFX10-NEXT: v_or3_b32 v5, v5, v17, v10 3375; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo 3376; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v7 3377; GFX10-NEXT: v_or3_b32 v0, v0, v1, v6 3378; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc_lo 3379; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v7 3380; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 3381; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v2 3382; GFX10-NEXT: v_lshrrev_b32_e32 v0, v1, v0 3383; GFX10-NEXT: s_setpc_b64 s[30:31] 3384; 3385; GFX11-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 3386; GFX11: ; %bb.0: 3387; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3388; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3389; GFX11-NEXT: global_load_b128 v[3:6], v[0:1], off 3390; GFX11-NEXT: v_lshrrev_b32_e32 v0, 2, v2 3391; GFX11-NEXT: v_and_b32_e32 v2, 3, v2 3392; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 3393; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 3394; GFX11-NEXT: s_waitcnt vmcnt(0) 3395; GFX11-NEXT: v_bfe_u32 v14, v5, 8, 8 3396; GFX11-NEXT: v_lshrrev_b32_e32 v8, 24, v5 3397; GFX11-NEXT: v_bfe_u32 v15, v5, 16, 8 3398; GFX11-NEXT: v_bfe_u32 v10, v3, 8, 8 3399; GFX11-NEXT: v_bfe_u32 v12, v4, 8, 8 3400; GFX11-NEXT: v_lshlrev_b32_e32 v14, 8, v14 3401; GFX11-NEXT: v_lshlrev_b32_e32 v8, 24, v8 3402; GFX11-NEXT: v_lshlrev_b32_e32 v15, 16, v15 3403; GFX11-NEXT: v_lshrrev_b32_e32 v1, 24, v3 3404; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v4 3405; GFX11-NEXT: v_and_or_b32 v5, 0xff, v5, v14 3406; GFX11-NEXT: v_bfe_u32 v11, v3, 16, 8 3407; GFX11-NEXT: v_bfe_u32 v13, v4, 16, 8 3408; GFX11-NEXT: v_lshlrev_b32_e32 v10, 8, v10 3409; GFX11-NEXT: v_bfe_u32 v16, v6, 8, 8 3410; GFX11-NEXT: v_or3_b32 v5, v5, v15, v8 3411; GFX11-NEXT: v_lshlrev_b32_e32 v12, 8, v12 3412; GFX11-NEXT: v_lshlrev_b32_e32 v11, 16, v11 3413; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v1 3414; GFX11-NEXT: v_lshlrev_b32_e32 v13, 16, v13 3415; GFX11-NEXT: v_lshlrev_b32_e32 v7, 24, v7 3416; GFX11-NEXT: v_and_or_b32 v3, v3, 0xff, v10 3417; GFX11-NEXT: v_and_or_b32 v4, v4, 0xff, v12 3418; GFX11-NEXT: v_lshrrev_b32_e32 v9, 24, v6 3419; GFX11-NEXT: v_bfe_u32 v17, v6, 16, 8 3420; GFX11-NEXT: v_lshlrev_b32_e32 v10, 8, v16 3421; GFX11-NEXT: v_or3_b32 v1, v3, v11, v1 3422; GFX11-NEXT: v_or3_b32 v3, v4, v13, v7 3423; GFX11-NEXT: v_lshlrev_b32_e32 v4, 24, v9 3424; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v17 3425; GFX11-NEXT: v_and_or_b32 v6, 0xff, v6, v10 3426; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) 3427; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 3428; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 3429; GFX11-NEXT: v_or3_b32 v3, v6, v12, v4 3430; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) 3431; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 3432; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 3433; GFX11-NEXT: v_dual_cndmask_b32 v0, v1, v3 :: v_dual_lshlrev_b32 v1, 3, v2 3434; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3435; GFX11-NEXT: v_lshrrev_b32_e32 v0, v1, v0 3436; GFX11-NEXT: s_setpc_b64 s[30:31] 3437 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3438 %element = extractelement <16 x i8> %vector, i32 %idx 3439 ret i8 %element 3440} 3441 3442define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 %idx) { 3443; GCN-LABEL: extractelement_sgpr_v16i8_vgpr_idx: 3444; GCN: ; %bb.0: 3445; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 3446; GCN-NEXT: v_lshrrev_b32_e32 v1, 2, v0 3447; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 3448; GCN-NEXT: v_and_b32_e32 v0, 3, v0 3449; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 3450; GCN-NEXT: s_waitcnt lgkmcnt(0) 3451; GCN-NEXT: s_bfe_u32 s9, s0, 0x80008 3452; GCN-NEXT: s_lshr_b32 s4, s0, 24 3453; GCN-NEXT: s_and_b32 s8, s0, 0xff 3454; GCN-NEXT: s_lshl_b32 s9, s9, 8 3455; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 3456; GCN-NEXT: s_or_b32 s8, s8, s9 3457; GCN-NEXT: s_lshl_b32 s0, s0, 16 3458; GCN-NEXT: s_or_b32 s0, s8, s0 3459; GCN-NEXT: s_lshl_b32 s4, s4, 24 3460; GCN-NEXT: s_bfe_u32 s8, s1, 0x80008 3461; GCN-NEXT: s_lshr_b32 s5, s1, 24 3462; GCN-NEXT: s_or_b32 s0, s0, s4 3463; GCN-NEXT: s_and_b32 s4, s1, 0xff 3464; GCN-NEXT: s_lshl_b32 s8, s8, 8 3465; GCN-NEXT: s_bfe_u32 s1, s1, 0x80010 3466; GCN-NEXT: s_or_b32 s4, s4, s8 3467; GCN-NEXT: s_lshl_b32 s1, s1, 16 3468; GCN-NEXT: s_or_b32 s1, s4, s1 3469; GCN-NEXT: s_lshl_b32 s4, s5, 24 3470; GCN-NEXT: s_bfe_u32 s5, s2, 0x80008 3471; GCN-NEXT: s_lshr_b32 s6, s2, 24 3472; GCN-NEXT: s_or_b32 s1, s1, s4 3473; GCN-NEXT: s_and_b32 s4, s2, 0xff 3474; GCN-NEXT: s_lshl_b32 s5, s5, 8 3475; GCN-NEXT: s_bfe_u32 s2, s2, 0x80010 3476; GCN-NEXT: s_or_b32 s4, s4, s5 3477; GCN-NEXT: s_lshl_b32 s2, s2, 16 3478; GCN-NEXT: s_or_b32 s2, s4, s2 3479; GCN-NEXT: s_lshl_b32 s4, s6, 24 3480; GCN-NEXT: s_bfe_u32 s5, s3, 0x80008 3481; GCN-NEXT: s_lshr_b32 s7, s3, 24 3482; GCN-NEXT: s_or_b32 s2, s2, s4 3483; GCN-NEXT: s_and_b32 s4, s3, 0xff 3484; GCN-NEXT: s_lshl_b32 s5, s5, 8 3485; GCN-NEXT: s_bfe_u32 s3, s3, 0x80010 3486; GCN-NEXT: s_or_b32 s4, s4, s5 3487; GCN-NEXT: s_lshl_b32 s3, s3, 16 3488; GCN-NEXT: s_or_b32 s3, s4, s3 3489; GCN-NEXT: s_lshl_b32 s4, s7, 24 3490; GCN-NEXT: v_mov_b32_e32 v2, s0 3491; GCN-NEXT: v_mov_b32_e32 v3, s1 3492; GCN-NEXT: s_or_b32 s3, s3, s4 3493; GCN-NEXT: v_mov_b32_e32 v4, s2 3494; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 3495; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 3496; GCN-NEXT: v_mov_b32_e32 v5, s3 3497; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 3498; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 3499; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc 3500; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1 3501; GCN-NEXT: v_readfirstlane_b32 s0, v0 3502; GCN-NEXT: ; return to shader part epilog 3503; 3504; GFX10-LABEL: extractelement_sgpr_v16i8_vgpr_idx: 3505; GFX10: ; %bb.0: 3506; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 3507; GFX10-NEXT: v_lshrrev_b32_e32 v1, 2, v0 3508; GFX10-NEXT: v_and_b32_e32 v0, 3, v0 3509; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 3510; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 3511; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3512; GFX10-NEXT: s_bfe_u32 s9, s0, 0x80008 3513; GFX10-NEXT: s_bfe_u32 s11, s1, 0x80008 3514; GFX10-NEXT: s_lshr_b32 s5, s1, 24 3515; GFX10-NEXT: s_and_b32 s8, s0, 0xff 3516; GFX10-NEXT: s_and_b32 s10, s1, 0xff 3517; GFX10-NEXT: s_bfe_u32 s1, s1, 0x80010 3518; GFX10-NEXT: s_lshl_b32 s9, s9, 8 3519; GFX10-NEXT: s_lshl_b32 s11, s11, 8 3520; GFX10-NEXT: s_lshl_b32 s1, s1, 16 3521; GFX10-NEXT: s_or_b32 s8, s8, s9 3522; GFX10-NEXT: s_or_b32 s9, s10, s11 3523; GFX10-NEXT: s_lshl_b32 s5, s5, 24 3524; GFX10-NEXT: s_or_b32 s1, s9, s1 3525; GFX10-NEXT: s_lshr_b32 s4, s0, 24 3526; GFX10-NEXT: s_bfe_u32 s0, s0, 0x80010 3527; GFX10-NEXT: s_or_b32 s1, s1, s5 3528; GFX10-NEXT: s_lshl_b32 s0, s0, 16 3529; GFX10-NEXT: s_bfe_u32 s13, s2, 0x80008 3530; GFX10-NEXT: v_mov_b32_e32 v2, s1 3531; GFX10-NEXT: s_lshl_b32 s4, s4, 24 3532; GFX10-NEXT: s_or_b32 s0, s8, s0 3533; GFX10-NEXT: s_lshr_b32 s6, s2, 24 3534; GFX10-NEXT: s_and_b32 s12, s2, 0xff 3535; GFX10-NEXT: s_bfe_u32 s2, s2, 0x80010 3536; GFX10-NEXT: s_lshl_b32 s13, s13, 8 3537; GFX10-NEXT: s_or_b32 s0, s0, s4 3538; GFX10-NEXT: s_lshl_b32 s2, s2, 16 3539; GFX10-NEXT: s_or_b32 s10, s12, s13 3540; GFX10-NEXT: s_bfe_u32 s5, s3, 0x80008 3541; GFX10-NEXT: v_cndmask_b32_e32 v2, s0, v2, vcc_lo 3542; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 3543; GFX10-NEXT: s_or_b32 s2, s10, s2 3544; GFX10-NEXT: s_lshl_b32 s4, s6, 24 3545; GFX10-NEXT: s_and_b32 s6, s3, 0xff 3546; GFX10-NEXT: s_lshl_b32 s5, s5, 8 3547; GFX10-NEXT: s_bfe_u32 s1, s3, 0x80010 3548; GFX10-NEXT: s_or_b32 s2, s2, s4 3549; GFX10-NEXT: s_lshr_b32 s7, s3, 24 3550; GFX10-NEXT: s_or_b32 s3, s6, s5 3551; GFX10-NEXT: s_lshl_b32 s1, s1, 16 3552; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s2, vcc_lo 3553; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 3554; GFX10-NEXT: s_or_b32 s0, s3, s1 3555; GFX10-NEXT: s_lshl_b32 s1, s7, 24 3556; GFX10-NEXT: s_or_b32 s3, s0, s1 3557; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo 3558; GFX10-NEXT: v_lshrrev_b32_e32 v0, v0, v1 3559; GFX10-NEXT: v_readfirstlane_b32 s0, v0 3560; GFX10-NEXT: ; return to shader part epilog 3561; 3562; GFX11-LABEL: extractelement_sgpr_v16i8_vgpr_idx: 3563; GFX11: ; %bb.0: 3564; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0 3565; GFX11-NEXT: v_lshrrev_b32_e32 v1, 2, v0 3566; GFX11-NEXT: v_and_b32_e32 v0, 3, v0 3567; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 3568; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 3569; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0 3570; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3571; GFX11-NEXT: s_bfe_u32 s9, s0, 0x80008 3572; GFX11-NEXT: s_bfe_u32 s11, s1, 0x80008 3573; GFX11-NEXT: s_lshr_b32 s5, s1, 24 3574; GFX11-NEXT: s_and_b32 s8, s0, 0xff 3575; GFX11-NEXT: s_and_b32 s10, s1, 0xff 3576; GFX11-NEXT: s_bfe_u32 s1, s1, 0x80010 3577; GFX11-NEXT: s_lshl_b32 s9, s9, 8 3578; GFX11-NEXT: s_lshl_b32 s11, s11, 8 3579; GFX11-NEXT: s_lshl_b32 s1, s1, 16 3580; GFX11-NEXT: s_or_b32 s8, s8, s9 3581; GFX11-NEXT: s_or_b32 s9, s10, s11 3582; GFX11-NEXT: s_lshl_b32 s5, s5, 24 3583; GFX11-NEXT: s_or_b32 s1, s9, s1 3584; GFX11-NEXT: s_lshr_b32 s4, s0, 24 3585; GFX11-NEXT: s_bfe_u32 s0, s0, 0x80010 3586; GFX11-NEXT: s_or_b32 s1, s1, s5 3587; GFX11-NEXT: s_lshl_b32 s0, s0, 16 3588; GFX11-NEXT: s_bfe_u32 s13, s2, 0x80008 3589; GFX11-NEXT: v_mov_b32_e32 v2, s1 3590; GFX11-NEXT: s_lshl_b32 s4, s4, 24 3591; GFX11-NEXT: s_or_b32 s0, s8, s0 3592; GFX11-NEXT: s_lshr_b32 s6, s2, 24 3593; GFX11-NEXT: s_and_b32 s12, s2, 0xff 3594; GFX11-NEXT: s_bfe_u32 s2, s2, 0x80010 3595; GFX11-NEXT: s_lshl_b32 s13, s13, 8 3596; GFX11-NEXT: s_or_b32 s0, s0, s4 3597; GFX11-NEXT: s_lshl_b32 s2, s2, 16 3598; GFX11-NEXT: s_or_b32 s10, s12, s13 3599; GFX11-NEXT: s_bfe_u32 s5, s3, 0x80008 3600; GFX11-NEXT: v_cndmask_b32_e32 v2, s0, v2, vcc_lo 3601; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 3602; GFX11-NEXT: s_or_b32 s2, s10, s2 3603; GFX11-NEXT: s_lshl_b32 s4, s6, 24 3604; GFX11-NEXT: s_and_b32 s6, s3, 0xff 3605; GFX11-NEXT: s_lshl_b32 s5, s5, 8 3606; GFX11-NEXT: s_bfe_u32 s1, s3, 0x80010 3607; GFX11-NEXT: s_or_b32 s2, s2, s4 3608; GFX11-NEXT: s_lshr_b32 s7, s3, 24 3609; GFX11-NEXT: s_or_b32 s3, s6, s5 3610; GFX11-NEXT: s_lshl_b32 s1, s1, 16 3611; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s2, vcc_lo 3612; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 3613; GFX11-NEXT: s_or_b32 s0, s3, s1 3614; GFX11-NEXT: s_lshl_b32 s1, s7, 24 3615; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 3616; GFX11-NEXT: s_or_b32 s3, s0, s1 3617; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo 3618; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3619; GFX11-NEXT: v_lshrrev_b32_e32 v0, v0, v1 3620; GFX11-NEXT: v_readfirstlane_b32 s0, v0 3621; GFX11-NEXT: ; return to shader part epilog 3622 %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr 3623 %element = extractelement <16 x i8> %vector, i32 %idx 3624 ret i8 %element 3625} 3626 3627define i8 @extractelement_vgpr_v16i8_idx0(<16 x i8> addrspace(1)* %ptr) { 3628; GFX9-LABEL: extractelement_vgpr_v16i8_idx0: 3629; GFX9: ; %bb.0: 3630; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3631; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3632; GFX9-NEXT: s_waitcnt vmcnt(0) 3633; GFX9-NEXT: v_mov_b32_e32 v2, 8 3634; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 3635; GFX9-NEXT: v_mov_b32_e32 v3, 16 3636; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 3637; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3638; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3639; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 3640; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 3641; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 3642; GFX9-NEXT: s_setpc_b64 s[30:31] 3643; 3644; GFX8-LABEL: extractelement_vgpr_v16i8_idx0: 3645; GFX8: ; %bb.0: 3646; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3647; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3648; GFX8-NEXT: s_waitcnt vmcnt(0) 3649; GFX8-NEXT: v_mov_b32_e32 v1, 8 3650; GFX8-NEXT: v_mov_b32_e32 v2, 16 3651; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3652; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 3653; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3654; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3655; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 3656; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3657; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3658; GFX8-NEXT: s_setpc_b64 s[30:31] 3659; 3660; GFX7-LABEL: extractelement_vgpr_v16i8_idx0: 3661; GFX7: ; %bb.0: 3662; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3663; GFX7-NEXT: s_mov_b32 s6, 0 3664; GFX7-NEXT: s_mov_b32 s7, 0xf000 3665; GFX7-NEXT: s_mov_b64 s[4:5], 0 3666; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3667; GFX7-NEXT: s_waitcnt vmcnt(0) 3668; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 3669; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 3670; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 3671; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 3672; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 3673; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 3674; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 3675; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 3676; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 3677; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 3678; GFX7-NEXT: s_setpc_b64 s[30:31] 3679; 3680; GFX10-LABEL: extractelement_vgpr_v16i8_idx0: 3681; GFX10: ; %bb.0: 3682; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3683; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3684; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3685; GFX10-NEXT: s_waitcnt vmcnt(0) 3686; GFX10-NEXT: v_mov_b32_e32 v1, 8 3687; GFX10-NEXT: v_mov_b32_e32 v2, 16 3688; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3689; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 3690; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3691; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 3692; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3693; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 3694; GFX10-NEXT: s_setpc_b64 s[30:31] 3695; 3696; GFX11-LABEL: extractelement_vgpr_v16i8_idx0: 3697; GFX11: ; %bb.0: 3698; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3699; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3700; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 3701; GFX11-NEXT: s_waitcnt vmcnt(0) 3702; GFX11-NEXT: v_bfe_u32 v1, v0, 8, 8 3703; GFX11-NEXT: v_bfe_u32 v2, v0, 16, 8 3704; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 3705; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3706; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v1 3707; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3708; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 3709; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1 3710; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3711; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3712; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 3713; GFX11-NEXT: s_setpc_b64 s[30:31] 3714 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3715 %element = extractelement <16 x i8> %vector, i32 0 3716 ret i8 %element 3717} 3718 3719define i8 @extractelement_vgpr_v16i8_idx1(<16 x i8> addrspace(1)* %ptr) { 3720; GFX9-LABEL: extractelement_vgpr_v16i8_idx1: 3721; GFX9: ; %bb.0: 3722; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3723; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3724; GFX9-NEXT: s_mov_b32 s4, 8 3725; GFX9-NEXT: s_waitcnt vmcnt(0) 3726; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 3727; GFX9-NEXT: v_mov_b32_e32 v2, 16 3728; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 3729; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3730; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3731; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v4 3732; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3733; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 3734; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3735; GFX9-NEXT: s_setpc_b64 s[30:31] 3736; 3737; GFX8-LABEL: extractelement_vgpr_v16i8_idx1: 3738; GFX8: ; %bb.0: 3739; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3740; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3741; GFX8-NEXT: s_waitcnt vmcnt(0) 3742; GFX8-NEXT: v_mov_b32_e32 v1, 8 3743; GFX8-NEXT: v_mov_b32_e32 v2, 16 3744; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3745; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 3746; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3747; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3748; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 3749; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3750; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3751; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3752; GFX8-NEXT: s_setpc_b64 s[30:31] 3753; 3754; GFX7-LABEL: extractelement_vgpr_v16i8_idx1: 3755; GFX7: ; %bb.0: 3756; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3757; GFX7-NEXT: s_mov_b32 s6, 0 3758; GFX7-NEXT: s_mov_b32 s7, 0xf000 3759; GFX7-NEXT: s_mov_b64 s[4:5], 0 3760; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3761; GFX7-NEXT: s_waitcnt vmcnt(0) 3762; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 3763; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 3764; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 3765; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 3766; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 3767; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 3768; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 3769; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 3770; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 3771; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 3772; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3773; GFX7-NEXT: s_setpc_b64 s[30:31] 3774; 3775; GFX10-LABEL: extractelement_vgpr_v16i8_idx1: 3776; GFX10: ; %bb.0: 3777; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3778; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3779; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3780; GFX10-NEXT: s_mov_b32 s4, 8 3781; GFX10-NEXT: s_waitcnt vmcnt(0) 3782; GFX10-NEXT: v_mov_b32_e32 v1, 16 3783; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3784; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 3785; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3786; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v2 3787; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 3788; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 3789; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3790; GFX10-NEXT: s_setpc_b64 s[30:31] 3791; 3792; GFX11-LABEL: extractelement_vgpr_v16i8_idx1: 3793; GFX11: ; %bb.0: 3794; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3795; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3796; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 3797; GFX11-NEXT: s_waitcnt vmcnt(0) 3798; GFX11-NEXT: v_bfe_u32 v1, v0, 8, 8 3799; GFX11-NEXT: v_bfe_u32 v2, v0, 16, 8 3800; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 3801; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3802; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v1 3803; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3804; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 3805; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1 3806; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3807; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3808; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 3809; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3810; GFX11-NEXT: s_setpc_b64 s[30:31] 3811 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3812 %element = extractelement <16 x i8> %vector, i32 1 3813 ret i8 %element 3814} 3815 3816define i8 @extractelement_vgpr_v16i8_idx2(<16 x i8> addrspace(1)* %ptr) { 3817; GFX9-LABEL: extractelement_vgpr_v16i8_idx2: 3818; GFX9: ; %bb.0: 3819; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3820; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3821; GFX9-NEXT: s_waitcnt vmcnt(0) 3822; GFX9-NEXT: v_mov_b32_e32 v2, 8 3823; GFX9-NEXT: s_mov_b32 s4, 16 3824; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 3825; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 3826; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3827; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3828; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 3829; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3830; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 3831; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3832; GFX9-NEXT: s_setpc_b64 s[30:31] 3833; 3834; GFX8-LABEL: extractelement_vgpr_v16i8_idx2: 3835; GFX8: ; %bb.0: 3836; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3837; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3838; GFX8-NEXT: s_waitcnt vmcnt(0) 3839; GFX8-NEXT: v_mov_b32_e32 v1, 8 3840; GFX8-NEXT: v_mov_b32_e32 v2, 16 3841; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3842; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 3843; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3844; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3845; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 3846; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3847; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3848; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3849; GFX8-NEXT: s_setpc_b64 s[30:31] 3850; 3851; GFX7-LABEL: extractelement_vgpr_v16i8_idx2: 3852; GFX7: ; %bb.0: 3853; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3854; GFX7-NEXT: s_mov_b32 s6, 0 3855; GFX7-NEXT: s_mov_b32 s7, 0xf000 3856; GFX7-NEXT: s_mov_b64 s[4:5], 0 3857; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3858; GFX7-NEXT: s_waitcnt vmcnt(0) 3859; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 3860; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 3861; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 3862; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 3863; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 3864; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 3865; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 3866; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 3867; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 3868; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 3869; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3870; GFX7-NEXT: s_setpc_b64 s[30:31] 3871; 3872; GFX10-LABEL: extractelement_vgpr_v16i8_idx2: 3873; GFX10: ; %bb.0: 3874; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3875; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3876; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3877; GFX10-NEXT: s_waitcnt vmcnt(0) 3878; GFX10-NEXT: v_mov_b32_e32 v1, 8 3879; GFX10-NEXT: s_mov_b32 s4, 16 3880; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3881; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 3882; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3883; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 3884; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 3885; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 3886; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3887; GFX10-NEXT: s_setpc_b64 s[30:31] 3888; 3889; GFX11-LABEL: extractelement_vgpr_v16i8_idx2: 3890; GFX11: ; %bb.0: 3891; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3892; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3893; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 3894; GFX11-NEXT: s_waitcnt vmcnt(0) 3895; GFX11-NEXT: v_bfe_u32 v1, v0, 8, 8 3896; GFX11-NEXT: v_bfe_u32 v2, v0, 16, 8 3897; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 3898; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3899; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v1 3900; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3901; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 3902; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1 3903; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3904; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3905; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 3906; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3907; GFX11-NEXT: s_setpc_b64 s[30:31] 3908 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3909 %element = extractelement <16 x i8> %vector, i32 2 3910 ret i8 %element 3911} 3912 3913define i8 @extractelement_vgpr_v16i8_idx3(<16 x i8> addrspace(1)* %ptr) { 3914; GFX9-LABEL: extractelement_vgpr_v16i8_idx3: 3915; GFX9: ; %bb.0: 3916; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3917; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3918; GFX9-NEXT: s_waitcnt vmcnt(0) 3919; GFX9-NEXT: v_mov_b32_e32 v2, 8 3920; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 3921; GFX9-NEXT: v_mov_b32_e32 v3, 16 3922; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 3923; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3924; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3925; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 3926; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 3927; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 3928; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3929; GFX9-NEXT: s_setpc_b64 s[30:31] 3930; 3931; GFX8-LABEL: extractelement_vgpr_v16i8_idx3: 3932; GFX8: ; %bb.0: 3933; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3934; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3935; GFX8-NEXT: s_waitcnt vmcnt(0) 3936; GFX8-NEXT: v_mov_b32_e32 v1, 8 3937; GFX8-NEXT: v_mov_b32_e32 v2, 16 3938; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3939; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 3940; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3941; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3942; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 3943; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3944; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3945; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3946; GFX8-NEXT: s_setpc_b64 s[30:31] 3947; 3948; GFX7-LABEL: extractelement_vgpr_v16i8_idx3: 3949; GFX7: ; %bb.0: 3950; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3951; GFX7-NEXT: s_mov_b32 s6, 0 3952; GFX7-NEXT: s_mov_b32 s7, 0xf000 3953; GFX7-NEXT: s_mov_b64 s[4:5], 0 3954; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3955; GFX7-NEXT: s_waitcnt vmcnt(0) 3956; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 3957; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 3958; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 3959; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 3960; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 3961; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 3962; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 3963; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 3964; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 3965; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 3966; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3967; GFX7-NEXT: s_setpc_b64 s[30:31] 3968; 3969; GFX10-LABEL: extractelement_vgpr_v16i8_idx3: 3970; GFX10: ; %bb.0: 3971; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3972; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3973; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3974; GFX10-NEXT: s_waitcnt vmcnt(0) 3975; GFX10-NEXT: v_mov_b32_e32 v1, 8 3976; GFX10-NEXT: v_mov_b32_e32 v2, 16 3977; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3978; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 3979; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3980; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 3981; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3982; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 3983; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3984; GFX10-NEXT: s_setpc_b64 s[30:31] 3985; 3986; GFX11-LABEL: extractelement_vgpr_v16i8_idx3: 3987; GFX11: ; %bb.0: 3988; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3989; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3990; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 3991; GFX11-NEXT: s_waitcnt vmcnt(0) 3992; GFX11-NEXT: v_bfe_u32 v1, v0, 8, 8 3993; GFX11-NEXT: v_bfe_u32 v2, v0, 16, 8 3994; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 3995; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3996; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v1 3997; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3998; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 3999; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1 4000; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4001; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4002; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 4003; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0 4004; GFX11-NEXT: s_setpc_b64 s[30:31] 4005 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 4006 %element = extractelement <16 x i8> %vector, i32 3 4007 ret i8 %element 4008} 4009 4010define i8 @extractelement_vgpr_v16i8_idx4(<16 x i8> addrspace(1)* %ptr) { 4011; GFX9-LABEL: extractelement_vgpr_v16i8_idx4: 4012; GFX9: ; %bb.0: 4013; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4014; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4015; GFX9-NEXT: s_waitcnt vmcnt(0) 4016; GFX9-NEXT: v_mov_b32_e32 v2, 8 4017; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 4018; GFX9-NEXT: v_mov_b32_e32 v3, 16 4019; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v1 4020; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4021; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4022; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 4023; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 4024; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 4025; GFX9-NEXT: s_setpc_b64 s[30:31] 4026; 4027; GFX8-LABEL: extractelement_vgpr_v16i8_idx4: 4028; GFX8: ; %bb.0: 4029; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4030; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4031; GFX8-NEXT: s_waitcnt vmcnt(0) 4032; GFX8-NEXT: v_mov_b32_e32 v0, 8 4033; GFX8-NEXT: v_mov_b32_e32 v2, 16 4034; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4035; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 4036; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4037; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 4038; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 4039; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4040; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4041; GFX8-NEXT: s_setpc_b64 s[30:31] 4042; 4043; GFX7-LABEL: extractelement_vgpr_v16i8_idx4: 4044; GFX7: ; %bb.0: 4045; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4046; GFX7-NEXT: s_mov_b32 s6, 0 4047; GFX7-NEXT: s_mov_b32 s7, 0xf000 4048; GFX7-NEXT: s_mov_b64 s[4:5], 0 4049; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 4050; GFX7-NEXT: s_waitcnt vmcnt(0) 4051; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 4052; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 4053; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 4054; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 4055; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 4056; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4057; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 4058; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 4059; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 4060; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 4061; GFX7-NEXT: s_setpc_b64 s[30:31] 4062; 4063; GFX10-LABEL: extractelement_vgpr_v16i8_idx4: 4064; GFX10: ; %bb.0: 4065; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4066; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4067; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4068; GFX10-NEXT: s_waitcnt vmcnt(0) 4069; GFX10-NEXT: v_mov_b32_e32 v0, 8 4070; GFX10-NEXT: v_mov_b32_e32 v2, 16 4071; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4072; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 4073; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4074; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 4075; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4076; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 4077; GFX10-NEXT: s_setpc_b64 s[30:31] 4078; 4079; GFX11-LABEL: extractelement_vgpr_v16i8_idx4: 4080; GFX11: ; %bb.0: 4081; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4082; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4083; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 4084; GFX11-NEXT: s_waitcnt vmcnt(0) 4085; GFX11-NEXT: v_bfe_u32 v0, v1, 8, 8 4086; GFX11-NEXT: v_bfe_u32 v2, v1, 16, 8 4087; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v1 4088; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 4089; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v0 4090; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 4091; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 4092; GFX11-NEXT: v_and_or_b32 v0, 0xff, v1, v0 4093; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4094; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4095; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 4096; GFX11-NEXT: s_setpc_b64 s[30:31] 4097 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 4098 %element = extractelement <16 x i8> %vector, i32 4 4099 ret i8 %element 4100} 4101 4102define i8 @extractelement_vgpr_v16i8_idx5(<16 x i8> addrspace(1)* %ptr) { 4103; GFX9-LABEL: extractelement_vgpr_v16i8_idx5: 4104; GFX9: ; %bb.0: 4105; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4106; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4107; GFX9-NEXT: s_mov_b32 s4, 8 4108; GFX9-NEXT: s_waitcnt vmcnt(0) 4109; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 4110; GFX9-NEXT: v_mov_b32_e32 v2, 16 4111; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 4112; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4113; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4114; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v4 4115; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4116; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 4117; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 4118; GFX9-NEXT: s_setpc_b64 s[30:31] 4119; 4120; GFX8-LABEL: extractelement_vgpr_v16i8_idx5: 4121; GFX8: ; %bb.0: 4122; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4123; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4124; GFX8-NEXT: s_waitcnt vmcnt(0) 4125; GFX8-NEXT: v_mov_b32_e32 v0, 8 4126; GFX8-NEXT: v_mov_b32_e32 v2, 16 4127; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4128; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 4129; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4130; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 4131; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 4132; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4133; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4134; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 4135; GFX8-NEXT: s_setpc_b64 s[30:31] 4136; 4137; GFX7-LABEL: extractelement_vgpr_v16i8_idx5: 4138; GFX7: ; %bb.0: 4139; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4140; GFX7-NEXT: s_mov_b32 s6, 0 4141; GFX7-NEXT: s_mov_b32 s7, 0xf000 4142; GFX7-NEXT: s_mov_b64 s[4:5], 0 4143; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 4144; GFX7-NEXT: s_waitcnt vmcnt(0) 4145; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 4146; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 4147; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 4148; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 4149; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 4150; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4151; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 4152; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 4153; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 4154; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 4155; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 4156; GFX7-NEXT: s_setpc_b64 s[30:31] 4157; 4158; GFX10-LABEL: extractelement_vgpr_v16i8_idx5: 4159; GFX10: ; %bb.0: 4160; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4161; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4162; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4163; GFX10-NEXT: s_mov_b32 s4, 8 4164; GFX10-NEXT: s_waitcnt vmcnt(0) 4165; GFX10-NEXT: v_mov_b32_e32 v0, 16 4166; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4167; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 4168; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4169; GFX10-NEXT: v_and_or_b32 v1, 0xff, v1, v2 4170; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 4171; GFX10-NEXT: v_or3_b32 v0, v1, v0, v2 4172; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 4173; GFX10-NEXT: s_setpc_b64 s[30:31] 4174; 4175; GFX11-LABEL: extractelement_vgpr_v16i8_idx5: 4176; GFX11: ; %bb.0: 4177; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4178; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4179; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 4180; GFX11-NEXT: s_waitcnt vmcnt(0) 4181; GFX11-NEXT: v_bfe_u32 v0, v1, 8, 8 4182; GFX11-NEXT: v_bfe_u32 v2, v1, 16, 8 4183; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v1 4184; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 4185; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v0 4186; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 4187; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 4188; GFX11-NEXT: v_and_or_b32 v0, 0xff, v1, v0 4189; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4190; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4191; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 4192; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0 4193; GFX11-NEXT: s_setpc_b64 s[30:31] 4194 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 4195 %element = extractelement <16 x i8> %vector, i32 5 4196 ret i8 %element 4197} 4198 4199define i8 @extractelement_vgpr_v16i8_idx6(<16 x i8> addrspace(1)* %ptr) { 4200; GFX9-LABEL: extractelement_vgpr_v16i8_idx6: 4201; GFX9: ; %bb.0: 4202; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4203; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4204; GFX9-NEXT: s_waitcnt vmcnt(0) 4205; GFX9-NEXT: v_mov_b32_e32 v2, 8 4206; GFX9-NEXT: s_mov_b32 s4, 16 4207; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 4208; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 4209; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4210; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4211; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 4212; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4213; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 4214; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4215; GFX9-NEXT: s_setpc_b64 s[30:31] 4216; 4217; GFX8-LABEL: extractelement_vgpr_v16i8_idx6: 4218; GFX8: ; %bb.0: 4219; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4220; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4221; GFX8-NEXT: s_waitcnt vmcnt(0) 4222; GFX8-NEXT: v_mov_b32_e32 v0, 8 4223; GFX8-NEXT: v_mov_b32_e32 v2, 16 4224; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4225; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 4226; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4227; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 4228; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 4229; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4230; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4231; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4232; GFX8-NEXT: s_setpc_b64 s[30:31] 4233; 4234; GFX7-LABEL: extractelement_vgpr_v16i8_idx6: 4235; GFX7: ; %bb.0: 4236; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4237; GFX7-NEXT: s_mov_b32 s6, 0 4238; GFX7-NEXT: s_mov_b32 s7, 0xf000 4239; GFX7-NEXT: s_mov_b64 s[4:5], 0 4240; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 4241; GFX7-NEXT: s_waitcnt vmcnt(0) 4242; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 4243; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 4244; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 4245; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 4246; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 4247; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4248; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 4249; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 4250; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 4251; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 4252; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4253; GFX7-NEXT: s_setpc_b64 s[30:31] 4254; 4255; GFX10-LABEL: extractelement_vgpr_v16i8_idx6: 4256; GFX10: ; %bb.0: 4257; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4258; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4259; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4260; GFX10-NEXT: s_waitcnt vmcnt(0) 4261; GFX10-NEXT: v_mov_b32_e32 v0, 8 4262; GFX10-NEXT: s_mov_b32 s4, 16 4263; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4264; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v1 4265; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4266; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 4267; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 4268; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 4269; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4270; GFX10-NEXT: s_setpc_b64 s[30:31] 4271; 4272; GFX11-LABEL: extractelement_vgpr_v16i8_idx6: 4273; GFX11: ; %bb.0: 4274; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4275; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4276; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 4277; GFX11-NEXT: s_waitcnt vmcnt(0) 4278; GFX11-NEXT: v_bfe_u32 v0, v1, 8, 8 4279; GFX11-NEXT: v_bfe_u32 v2, v1, 16, 8 4280; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v1 4281; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 4282; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v0 4283; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 4284; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 4285; GFX11-NEXT: v_and_or_b32 v0, 0xff, v1, v0 4286; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4287; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4288; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 4289; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4290; GFX11-NEXT: s_setpc_b64 s[30:31] 4291 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 4292 %element = extractelement <16 x i8> %vector, i32 6 4293 ret i8 %element 4294} 4295 4296define i8 @extractelement_vgpr_v16i8_idx7(<16 x i8> addrspace(1)* %ptr) { 4297; GFX9-LABEL: extractelement_vgpr_v16i8_idx7: 4298; GFX9: ; %bb.0: 4299; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4300; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4301; GFX9-NEXT: s_waitcnt vmcnt(0) 4302; GFX9-NEXT: v_mov_b32_e32 v2, 8 4303; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 4304; GFX9-NEXT: v_mov_b32_e32 v3, 16 4305; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v1 4306; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4307; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4308; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 4309; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 4310; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 4311; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 4312; GFX9-NEXT: s_setpc_b64 s[30:31] 4313; 4314; GFX8-LABEL: extractelement_vgpr_v16i8_idx7: 4315; GFX8: ; %bb.0: 4316; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4317; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4318; GFX8-NEXT: s_waitcnt vmcnt(0) 4319; GFX8-NEXT: v_mov_b32_e32 v0, 8 4320; GFX8-NEXT: v_mov_b32_e32 v2, 16 4321; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4322; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 4323; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4324; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 4325; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 4326; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4327; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4328; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 4329; GFX8-NEXT: s_setpc_b64 s[30:31] 4330; 4331; GFX7-LABEL: extractelement_vgpr_v16i8_idx7: 4332; GFX7: ; %bb.0: 4333; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4334; GFX7-NEXT: s_mov_b32 s6, 0 4335; GFX7-NEXT: s_mov_b32 s7, 0xf000 4336; GFX7-NEXT: s_mov_b64 s[4:5], 0 4337; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 4338; GFX7-NEXT: s_waitcnt vmcnt(0) 4339; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 4340; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 4341; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 4342; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 4343; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 4344; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4345; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 4346; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 4347; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 4348; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 4349; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 4350; GFX7-NEXT: s_setpc_b64 s[30:31] 4351; 4352; GFX10-LABEL: extractelement_vgpr_v16i8_idx7: 4353; GFX10: ; %bb.0: 4354; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4355; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4356; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4357; GFX10-NEXT: s_waitcnt vmcnt(0) 4358; GFX10-NEXT: v_mov_b32_e32 v0, 8 4359; GFX10-NEXT: v_mov_b32_e32 v2, 16 4360; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4361; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 4362; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4363; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 4364; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4365; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 4366; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 4367; GFX10-NEXT: s_setpc_b64 s[30:31] 4368; 4369; GFX11-LABEL: extractelement_vgpr_v16i8_idx7: 4370; GFX11: ; %bb.0: 4371; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4372; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4373; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 4374; GFX11-NEXT: s_waitcnt vmcnt(0) 4375; GFX11-NEXT: v_bfe_u32 v0, v1, 8, 8 4376; GFX11-NEXT: v_bfe_u32 v2, v1, 16, 8 4377; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v1 4378; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 4379; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v0 4380; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 4381; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 4382; GFX11-NEXT: v_and_or_b32 v0, 0xff, v1, v0 4383; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4384; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4385; GFX11-NEXT: v_or3_b32 v0, v0, v2, v1 4386; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0 4387; GFX11-NEXT: s_setpc_b64 s[30:31] 4388 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 4389 %element = extractelement <16 x i8> %vector, i32 7 4390 ret i8 %element 4391} 4392 4393define i8 @extractelement_vgpr_v16i8_idx8(<16 x i8> addrspace(1)* %ptr) { 4394; GFX9-LABEL: extractelement_vgpr_v16i8_idx8: 4395; GFX9: ; %bb.0: 4396; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4397; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4398; GFX9-NEXT: s_waitcnt vmcnt(0) 4399; GFX9-NEXT: v_mov_b32_e32 v1, 8 4400; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 4401; GFX9-NEXT: v_mov_b32_e32 v3, 16 4402; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v2 4403; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4404; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4405; GFX9-NEXT: v_and_or_b32 v0, v2, v0, v1 4406; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 4407; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 4408; GFX9-NEXT: s_setpc_b64 s[30:31] 4409; 4410; GFX8-LABEL: extractelement_vgpr_v16i8_idx8: 4411; GFX8: ; %bb.0: 4412; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4413; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4414; GFX8-NEXT: s_waitcnt vmcnt(0) 4415; GFX8-NEXT: v_mov_b32_e32 v0, 8 4416; GFX8-NEXT: v_mov_b32_e32 v1, 16 4417; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4418; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v2 4419; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4420; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 4421; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4422; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4423; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4424; GFX8-NEXT: s_setpc_b64 s[30:31] 4425; 4426; GFX7-LABEL: extractelement_vgpr_v16i8_idx8: 4427; GFX7: ; %bb.0: 4428; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4429; GFX7-NEXT: s_mov_b32 s6, 0 4430; GFX7-NEXT: s_mov_b32 s7, 0xf000 4431; GFX7-NEXT: s_mov_b64 s[4:5], 0 4432; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 4433; GFX7-NEXT: s_waitcnt vmcnt(0) 4434; GFX7-NEXT: v_bfe_u32 v3, v2, 8, 8 4435; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 4436; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v2 4437; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 4438; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 4439; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 4440; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 4441; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 4442; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 4443; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 4444; GFX7-NEXT: s_setpc_b64 s[30:31] 4445; 4446; GFX10-LABEL: extractelement_vgpr_v16i8_idx8: 4447; GFX10: ; %bb.0: 4448; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4449; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4450; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4451; GFX10-NEXT: s_waitcnt vmcnt(0) 4452; GFX10-NEXT: v_mov_b32_e32 v0, 8 4453; GFX10-NEXT: v_mov_b32_e32 v1, 16 4454; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4455; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v2 4456; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4457; GFX10-NEXT: v_and_or_b32 v0, 0xff, v2, v0 4458; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 4459; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 4460; GFX10-NEXT: s_setpc_b64 s[30:31] 4461; 4462; GFX11-LABEL: extractelement_vgpr_v16i8_idx8: 4463; GFX11: ; %bb.0: 4464; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4465; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4466; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 4467; GFX11-NEXT: s_waitcnt vmcnt(0) 4468; GFX11-NEXT: v_bfe_u32 v0, v2, 8, 8 4469; GFX11-NEXT: v_bfe_u32 v1, v2, 16, 8 4470; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v2 4471; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 4472; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v0 4473; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4474; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 4475; GFX11-NEXT: v_and_or_b32 v0, 0xff, v2, v0 4476; GFX11-NEXT: v_lshlrev_b32_e32 v2, 24, v3 4477; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4478; GFX11-NEXT: v_or3_b32 v0, v0, v1, v2 4479; GFX11-NEXT: s_setpc_b64 s[30:31] 4480 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 4481 %element = extractelement <16 x i8> %vector, i32 8 4482 ret i8 %element 4483} 4484 4485define i8 @extractelement_vgpr_v16i8_idx9(<16 x i8> addrspace(1)* %ptr) { 4486; GFX9-LABEL: extractelement_vgpr_v16i8_idx9: 4487; GFX9: ; %bb.0: 4488; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4489; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4490; GFX9-NEXT: s_mov_b32 s4, 8 4491; GFX9-NEXT: s_waitcnt vmcnt(0) 4492; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 4493; GFX9-NEXT: v_mov_b32_e32 v1, 16 4494; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v2 4495; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4496; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4497; GFX9-NEXT: v_and_or_b32 v0, v2, v0, v4 4498; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v3 4499; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 4500; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 4501; GFX9-NEXT: s_setpc_b64 s[30:31] 4502; 4503; GFX8-LABEL: extractelement_vgpr_v16i8_idx9: 4504; GFX8: ; %bb.0: 4505; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4506; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4507; GFX8-NEXT: s_waitcnt vmcnt(0) 4508; GFX8-NEXT: v_mov_b32_e32 v0, 8 4509; GFX8-NEXT: v_mov_b32_e32 v1, 16 4510; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4511; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v2 4512; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4513; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 4514; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4515; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4516; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4517; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 4518; GFX8-NEXT: s_setpc_b64 s[30:31] 4519; 4520; GFX7-LABEL: extractelement_vgpr_v16i8_idx9: 4521; GFX7: ; %bb.0: 4522; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4523; GFX7-NEXT: s_mov_b32 s6, 0 4524; GFX7-NEXT: s_mov_b32 s7, 0xf000 4525; GFX7-NEXT: s_mov_b64 s[4:5], 0 4526; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 4527; GFX7-NEXT: s_waitcnt vmcnt(0) 4528; GFX7-NEXT: v_bfe_u32 v3, v2, 8, 8 4529; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 4530; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v2 4531; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 4532; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 4533; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 4534; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 4535; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 4536; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 4537; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 4538; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 4539; GFX7-NEXT: s_setpc_b64 s[30:31] 4540; 4541; GFX10-LABEL: extractelement_vgpr_v16i8_idx9: 4542; GFX10: ; %bb.0: 4543; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4544; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4545; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4546; GFX10-NEXT: s_mov_b32 s4, 8 4547; GFX10-NEXT: s_waitcnt vmcnt(0) 4548; GFX10-NEXT: v_mov_b32_e32 v0, 16 4549; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4550; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v2 4551; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4552; GFX10-NEXT: v_and_or_b32 v1, 0xff, v2, v1 4553; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 4554; GFX10-NEXT: v_or3_b32 v0, v1, v0, v2 4555; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 4556; GFX10-NEXT: s_setpc_b64 s[30:31] 4557; 4558; GFX11-LABEL: extractelement_vgpr_v16i8_idx9: 4559; GFX11: ; %bb.0: 4560; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4561; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4562; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 4563; GFX11-NEXT: s_waitcnt vmcnt(0) 4564; GFX11-NEXT: v_bfe_u32 v0, v2, 8, 8 4565; GFX11-NEXT: v_bfe_u32 v1, v2, 16, 8 4566; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v2 4567; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 4568; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v0 4569; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4570; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 4571; GFX11-NEXT: v_and_or_b32 v0, 0xff, v2, v0 4572; GFX11-NEXT: v_lshlrev_b32_e32 v2, 24, v3 4573; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4574; GFX11-NEXT: v_or3_b32 v0, v0, v1, v2 4575; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0 4576; GFX11-NEXT: s_setpc_b64 s[30:31] 4577 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 4578 %element = extractelement <16 x i8> %vector, i32 9 4579 ret i8 %element 4580} 4581 4582define i8 @extractelement_vgpr_v16i8_idx10(<16 x i8> addrspace(1)* %ptr) { 4583; GFX9-LABEL: extractelement_vgpr_v16i8_idx10: 4584; GFX9: ; %bb.0: 4585; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4586; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4587; GFX9-NEXT: s_waitcnt vmcnt(0) 4588; GFX9-NEXT: v_mov_b32_e32 v1, 8 4589; GFX9-NEXT: s_mov_b32 s4, 16 4590; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 4591; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v2 4592; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4593; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4594; GFX9-NEXT: v_and_or_b32 v0, v2, v0, v1 4595; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4596; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 4597; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4598; GFX9-NEXT: s_setpc_b64 s[30:31] 4599; 4600; GFX8-LABEL: extractelement_vgpr_v16i8_idx10: 4601; GFX8: ; %bb.0: 4602; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4603; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4604; GFX8-NEXT: s_waitcnt vmcnt(0) 4605; GFX8-NEXT: v_mov_b32_e32 v0, 8 4606; GFX8-NEXT: v_mov_b32_e32 v1, 16 4607; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4608; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v2 4609; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4610; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 4611; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4612; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4613; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4614; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4615; GFX8-NEXT: s_setpc_b64 s[30:31] 4616; 4617; GFX7-LABEL: extractelement_vgpr_v16i8_idx10: 4618; GFX7: ; %bb.0: 4619; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4620; GFX7-NEXT: s_mov_b32 s6, 0 4621; GFX7-NEXT: s_mov_b32 s7, 0xf000 4622; GFX7-NEXT: s_mov_b64 s[4:5], 0 4623; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 4624; GFX7-NEXT: s_waitcnt vmcnt(0) 4625; GFX7-NEXT: v_bfe_u32 v3, v2, 8, 8 4626; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 4627; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v2 4628; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 4629; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 4630; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 4631; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 4632; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 4633; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 4634; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 4635; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4636; GFX7-NEXT: s_setpc_b64 s[30:31] 4637; 4638; GFX10-LABEL: extractelement_vgpr_v16i8_idx10: 4639; GFX10: ; %bb.0: 4640; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4641; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4642; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4643; GFX10-NEXT: s_waitcnt vmcnt(0) 4644; GFX10-NEXT: v_mov_b32_e32 v0, 8 4645; GFX10-NEXT: s_mov_b32 s4, 16 4646; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4647; GFX10-NEXT: v_lshrrev_b32_e32 v1, 24, v2 4648; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4649; GFX10-NEXT: v_and_or_b32 v0, 0xff, v2, v0 4650; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v1 4651; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 4652; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4653; GFX10-NEXT: s_setpc_b64 s[30:31] 4654; 4655; GFX11-LABEL: extractelement_vgpr_v16i8_idx10: 4656; GFX11: ; %bb.0: 4657; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4658; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4659; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 4660; GFX11-NEXT: s_waitcnt vmcnt(0) 4661; GFX11-NEXT: v_bfe_u32 v0, v2, 8, 8 4662; GFX11-NEXT: v_bfe_u32 v1, v2, 16, 8 4663; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v2 4664; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 4665; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v0 4666; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4667; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 4668; GFX11-NEXT: v_and_or_b32 v0, 0xff, v2, v0 4669; GFX11-NEXT: v_lshlrev_b32_e32 v2, 24, v3 4670; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4671; GFX11-NEXT: v_or3_b32 v0, v0, v1, v2 4672; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4673; GFX11-NEXT: s_setpc_b64 s[30:31] 4674 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 4675 %element = extractelement <16 x i8> %vector, i32 10 4676 ret i8 %element 4677} 4678 4679define i8 @extractelement_vgpr_v16i8_idx11(<16 x i8> addrspace(1)* %ptr) { 4680; GFX9-LABEL: extractelement_vgpr_v16i8_idx11: 4681; GFX9: ; %bb.0: 4682; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4683; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4684; GFX9-NEXT: s_waitcnt vmcnt(0) 4685; GFX9-NEXT: v_mov_b32_e32 v1, 8 4686; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 4687; GFX9-NEXT: v_mov_b32_e32 v3, 16 4688; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v2 4689; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4690; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4691; GFX9-NEXT: v_and_or_b32 v0, v2, v0, v1 4692; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 4693; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 4694; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 4695; GFX9-NEXT: s_setpc_b64 s[30:31] 4696; 4697; GFX8-LABEL: extractelement_vgpr_v16i8_idx11: 4698; GFX8: ; %bb.0: 4699; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4700; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4701; GFX8-NEXT: s_waitcnt vmcnt(0) 4702; GFX8-NEXT: v_mov_b32_e32 v0, 8 4703; GFX8-NEXT: v_mov_b32_e32 v1, 16 4704; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4705; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v2 4706; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4707; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 4708; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4709; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 4710; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4711; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 4712; GFX8-NEXT: s_setpc_b64 s[30:31] 4713; 4714; GFX7-LABEL: extractelement_vgpr_v16i8_idx11: 4715; GFX7: ; %bb.0: 4716; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4717; GFX7-NEXT: s_mov_b32 s6, 0 4718; GFX7-NEXT: s_mov_b32 s7, 0xf000 4719; GFX7-NEXT: s_mov_b64 s[4:5], 0 4720; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 4721; GFX7-NEXT: s_waitcnt vmcnt(0) 4722; GFX7-NEXT: v_bfe_u32 v3, v2, 8, 8 4723; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 4724; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v2 4725; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 4726; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 4727; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 4728; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 4729; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 4730; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 4731; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 4732; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 4733; GFX7-NEXT: s_setpc_b64 s[30:31] 4734; 4735; GFX10-LABEL: extractelement_vgpr_v16i8_idx11: 4736; GFX10: ; %bb.0: 4737; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4738; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4739; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4740; GFX10-NEXT: s_waitcnt vmcnt(0) 4741; GFX10-NEXT: v_mov_b32_e32 v0, 8 4742; GFX10-NEXT: v_mov_b32_e32 v1, 16 4743; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4744; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v2 4745; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4746; GFX10-NEXT: v_and_or_b32 v0, 0xff, v2, v0 4747; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 4748; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 4749; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 4750; GFX10-NEXT: s_setpc_b64 s[30:31] 4751; 4752; GFX11-LABEL: extractelement_vgpr_v16i8_idx11: 4753; GFX11: ; %bb.0: 4754; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4755; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4756; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 4757; GFX11-NEXT: s_waitcnt vmcnt(0) 4758; GFX11-NEXT: v_bfe_u32 v0, v2, 8, 8 4759; GFX11-NEXT: v_bfe_u32 v1, v2, 16, 8 4760; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v2 4761; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 4762; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v0 4763; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4764; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 4765; GFX11-NEXT: v_and_or_b32 v0, 0xff, v2, v0 4766; GFX11-NEXT: v_lshlrev_b32_e32 v2, 24, v3 4767; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4768; GFX11-NEXT: v_or3_b32 v0, v0, v1, v2 4769; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0 4770; GFX11-NEXT: s_setpc_b64 s[30:31] 4771 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 4772 %element = extractelement <16 x i8> %vector, i32 11 4773 ret i8 %element 4774} 4775 4776define i8 @extractelement_vgpr_v16i8_idx12(<16 x i8> addrspace(1)* %ptr) { 4777; GFX9-LABEL: extractelement_vgpr_v16i8_idx12: 4778; GFX9: ; %bb.0: 4779; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4780; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4781; GFX9-NEXT: s_waitcnt vmcnt(0) 4782; GFX9-NEXT: v_mov_b32_e32 v1, 8 4783; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 4784; GFX9-NEXT: v_mov_b32_e32 v2, 16 4785; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v3 4786; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4787; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4788; GFX9-NEXT: v_and_or_b32 v0, v3, v0, v1 4789; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 4790; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 4791; GFX9-NEXT: s_setpc_b64 s[30:31] 4792; 4793; GFX8-LABEL: extractelement_vgpr_v16i8_idx12: 4794; GFX8: ; %bb.0: 4795; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4796; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4797; GFX8-NEXT: s_waitcnt vmcnt(0) 4798; GFX8-NEXT: v_mov_b32_e32 v0, 8 4799; GFX8-NEXT: v_mov_b32_e32 v1, 16 4800; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4801; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v3 4802; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4803; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 4804; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4805; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v2 4806; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4807; GFX8-NEXT: s_setpc_b64 s[30:31] 4808; 4809; GFX7-LABEL: extractelement_vgpr_v16i8_idx12: 4810; GFX7: ; %bb.0: 4811; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4812; GFX7-NEXT: s_mov_b32 s6, 0 4813; GFX7-NEXT: s_mov_b32 s7, 0xf000 4814; GFX7-NEXT: s_mov_b64 s[4:5], 0 4815; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 4816; GFX7-NEXT: s_waitcnt vmcnt(0) 4817; GFX7-NEXT: v_bfe_u32 v2, v3, 8, 8 4818; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v3 4819; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v3 4820; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 4821; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 4822; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 4823; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 4824; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 4825; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 4826; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 4827; GFX7-NEXT: s_setpc_b64 s[30:31] 4828; 4829; GFX10-LABEL: extractelement_vgpr_v16i8_idx12: 4830; GFX10: ; %bb.0: 4831; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4832; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4833; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4834; GFX10-NEXT: s_waitcnt vmcnt(0) 4835; GFX10-NEXT: v_mov_b32_e32 v0, 8 4836; GFX10-NEXT: v_mov_b32_e32 v1, 16 4837; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4838; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v3 4839; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4840; GFX10-NEXT: v_and_or_b32 v0, 0xff, v3, v0 4841; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 4842; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 4843; GFX10-NEXT: s_setpc_b64 s[30:31] 4844; 4845; GFX11-LABEL: extractelement_vgpr_v16i8_idx12: 4846; GFX11: ; %bb.0: 4847; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4848; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4849; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 4850; GFX11-NEXT: s_waitcnt vmcnt(0) 4851; GFX11-NEXT: v_bfe_u32 v0, v3, 8, 8 4852; GFX11-NEXT: v_bfe_u32 v1, v3, 16, 8 4853; GFX11-NEXT: v_lshrrev_b32_e32 v2, 24, v3 4854; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 4855; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v0 4856; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4857; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 4858; GFX11-NEXT: v_lshlrev_b32_e32 v2, 24, v2 4859; GFX11-NEXT: v_and_or_b32 v0, 0xff, v3, v0 4860; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4861; GFX11-NEXT: v_or3_b32 v0, v0, v1, v2 4862; GFX11-NEXT: s_setpc_b64 s[30:31] 4863 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 4864 %element = extractelement <16 x i8> %vector, i32 12 4865 ret i8 %element 4866} 4867 4868define i8 @extractelement_vgpr_v16i8_idx13(<16 x i8> addrspace(1)* %ptr) { 4869; GFX9-LABEL: extractelement_vgpr_v16i8_idx13: 4870; GFX9: ; %bb.0: 4871; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4872; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4873; GFX9-NEXT: s_mov_b32 s4, 8 4874; GFX9-NEXT: s_waitcnt vmcnt(0) 4875; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 4876; GFX9-NEXT: v_mov_b32_e32 v1, 16 4877; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v3 4878; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4879; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4880; GFX9-NEXT: v_and_or_b32 v0, v3, v0, v4 4881; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 4882; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 4883; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 4884; GFX9-NEXT: s_setpc_b64 s[30:31] 4885; 4886; GFX8-LABEL: extractelement_vgpr_v16i8_idx13: 4887; GFX8: ; %bb.0: 4888; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4889; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4890; GFX8-NEXT: s_waitcnt vmcnt(0) 4891; GFX8-NEXT: v_mov_b32_e32 v0, 8 4892; GFX8-NEXT: v_mov_b32_e32 v1, 16 4893; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4894; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v3 4895; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4896; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 4897; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4898; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v2 4899; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4900; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 4901; GFX8-NEXT: s_setpc_b64 s[30:31] 4902; 4903; GFX7-LABEL: extractelement_vgpr_v16i8_idx13: 4904; GFX7: ; %bb.0: 4905; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4906; GFX7-NEXT: s_mov_b32 s6, 0 4907; GFX7-NEXT: s_mov_b32 s7, 0xf000 4908; GFX7-NEXT: s_mov_b64 s[4:5], 0 4909; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 4910; GFX7-NEXT: s_waitcnt vmcnt(0) 4911; GFX7-NEXT: v_bfe_u32 v2, v3, 8, 8 4912; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v3 4913; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v3 4914; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 4915; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 4916; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 4917; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 4918; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 4919; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 4920; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 4921; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 4922; GFX7-NEXT: s_setpc_b64 s[30:31] 4923; 4924; GFX10-LABEL: extractelement_vgpr_v16i8_idx13: 4925; GFX10: ; %bb.0: 4926; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4927; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4928; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4929; GFX10-NEXT: s_mov_b32 s4, 8 4930; GFX10-NEXT: s_waitcnt vmcnt(0) 4931; GFX10-NEXT: v_mov_b32_e32 v0, 16 4932; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4933; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v3 4934; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4935; GFX10-NEXT: v_and_or_b32 v1, 0xff, v3, v1 4936; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 4937; GFX10-NEXT: v_or3_b32 v0, v1, v0, v2 4938; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 4939; GFX10-NEXT: s_setpc_b64 s[30:31] 4940; 4941; GFX11-LABEL: extractelement_vgpr_v16i8_idx13: 4942; GFX11: ; %bb.0: 4943; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4944; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4945; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 4946; GFX11-NEXT: s_waitcnt vmcnt(0) 4947; GFX11-NEXT: v_bfe_u32 v0, v3, 8, 8 4948; GFX11-NEXT: v_bfe_u32 v1, v3, 16, 8 4949; GFX11-NEXT: v_lshrrev_b32_e32 v2, 24, v3 4950; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 4951; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v0 4952; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4953; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 4954; GFX11-NEXT: v_lshlrev_b32_e32 v2, 24, v2 4955; GFX11-NEXT: v_and_or_b32 v0, 0xff, v3, v0 4956; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4957; GFX11-NEXT: v_or3_b32 v0, v0, v1, v2 4958; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0 4959; GFX11-NEXT: s_setpc_b64 s[30:31] 4960 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 4961 %element = extractelement <16 x i8> %vector, i32 13 4962 ret i8 %element 4963} 4964 4965define i8 @extractelement_vgpr_v16i8_idx14(<16 x i8> addrspace(1)* %ptr) { 4966; GFX9-LABEL: extractelement_vgpr_v16i8_idx14: 4967; GFX9: ; %bb.0: 4968; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4969; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4970; GFX9-NEXT: s_waitcnt vmcnt(0) 4971; GFX9-NEXT: v_mov_b32_e32 v1, 8 4972; GFX9-NEXT: s_mov_b32 s4, 16 4973; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 4974; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v3 4975; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4976; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4977; GFX9-NEXT: v_and_or_b32 v0, v3, v0, v1 4978; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 4979; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 4980; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4981; GFX9-NEXT: s_setpc_b64 s[30:31] 4982; 4983; GFX8-LABEL: extractelement_vgpr_v16i8_idx14: 4984; GFX8: ; %bb.0: 4985; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4986; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4987; GFX8-NEXT: s_waitcnt vmcnt(0) 4988; GFX8-NEXT: v_mov_b32_e32 v0, 8 4989; GFX8-NEXT: v_mov_b32_e32 v1, 16 4990; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 4991; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v3 4992; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 4993; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 4994; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4995; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v2 4996; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4997; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4998; GFX8-NEXT: s_setpc_b64 s[30:31] 4999; 5000; GFX7-LABEL: extractelement_vgpr_v16i8_idx14: 5001; GFX7: ; %bb.0: 5002; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5003; GFX7-NEXT: s_mov_b32 s6, 0 5004; GFX7-NEXT: s_mov_b32 s7, 0xf000 5005; GFX7-NEXT: s_mov_b64 s[4:5], 0 5006; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 5007; GFX7-NEXT: s_waitcnt vmcnt(0) 5008; GFX7-NEXT: v_bfe_u32 v2, v3, 8, 8 5009; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v3 5010; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v3 5011; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 5012; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 5013; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 5014; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 5015; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 5016; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 5017; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 5018; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5019; GFX7-NEXT: s_setpc_b64 s[30:31] 5020; 5021; GFX10-LABEL: extractelement_vgpr_v16i8_idx14: 5022; GFX10: ; %bb.0: 5023; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5024; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5025; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 5026; GFX10-NEXT: s_waitcnt vmcnt(0) 5027; GFX10-NEXT: v_mov_b32_e32 v0, 8 5028; GFX10-NEXT: s_mov_b32 s4, 16 5029; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 5030; GFX10-NEXT: v_lshrrev_b32_e32 v1, 24, v3 5031; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 5032; GFX10-NEXT: v_and_or_b32 v0, 0xff, v3, v0 5033; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v1 5034; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 5035; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5036; GFX10-NEXT: s_setpc_b64 s[30:31] 5037; 5038; GFX11-LABEL: extractelement_vgpr_v16i8_idx14: 5039; GFX11: ; %bb.0: 5040; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5041; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5042; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 5043; GFX11-NEXT: s_waitcnt vmcnt(0) 5044; GFX11-NEXT: v_bfe_u32 v0, v3, 8, 8 5045; GFX11-NEXT: v_bfe_u32 v1, v3, 16, 8 5046; GFX11-NEXT: v_lshrrev_b32_e32 v2, 24, v3 5047; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 5048; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v0 5049; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 5050; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 5051; GFX11-NEXT: v_lshlrev_b32_e32 v2, 24, v2 5052; GFX11-NEXT: v_and_or_b32 v0, 0xff, v3, v0 5053; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5054; GFX11-NEXT: v_or3_b32 v0, v0, v1, v2 5055; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5056; GFX11-NEXT: s_setpc_b64 s[30:31] 5057 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 5058 %element = extractelement <16 x i8> %vector, i32 14 5059 ret i8 %element 5060} 5061 5062define i8 @extractelement_vgpr_v16i8_idx15(<16 x i8> addrspace(1)* %ptr) { 5063; GFX9-LABEL: extractelement_vgpr_v16i8_idx15: 5064; GFX9: ; %bb.0: 5065; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5066; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 5067; GFX9-NEXT: s_waitcnt vmcnt(0) 5068; GFX9-NEXT: v_mov_b32_e32 v1, 8 5069; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 5070; GFX9-NEXT: v_mov_b32_e32 v2, 16 5071; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v3 5072; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 5073; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 5074; GFX9-NEXT: v_and_or_b32 v0, v3, v0, v1 5075; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 5076; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 5077; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 5078; GFX9-NEXT: s_setpc_b64 s[30:31] 5079; 5080; GFX8-LABEL: extractelement_vgpr_v16i8_idx15: 5081; GFX8: ; %bb.0: 5082; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5083; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 5084; GFX8-NEXT: s_waitcnt vmcnt(0) 5085; GFX8-NEXT: v_mov_b32_e32 v0, 8 5086; GFX8-NEXT: v_mov_b32_e32 v1, 16 5087; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 5088; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v3 5089; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 5090; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 5091; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 5092; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v2 5093; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 5094; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 5095; GFX8-NEXT: s_setpc_b64 s[30:31] 5096; 5097; GFX7-LABEL: extractelement_vgpr_v16i8_idx15: 5098; GFX7: ; %bb.0: 5099; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5100; GFX7-NEXT: s_mov_b32 s6, 0 5101; GFX7-NEXT: s_mov_b32 s7, 0xf000 5102; GFX7-NEXT: s_mov_b64 s[4:5], 0 5103; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 5104; GFX7-NEXT: s_waitcnt vmcnt(0) 5105; GFX7-NEXT: v_bfe_u32 v2, v3, 8, 8 5106; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v3 5107; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v3 5108; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 5109; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 5110; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 5111; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 5112; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 5113; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 5114; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 5115; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 5116; GFX7-NEXT: s_setpc_b64 s[30:31] 5117; 5118; GFX10-LABEL: extractelement_vgpr_v16i8_idx15: 5119; GFX10: ; %bb.0: 5120; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5121; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5122; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 5123; GFX10-NEXT: s_waitcnt vmcnt(0) 5124; GFX10-NEXT: v_mov_b32_e32 v0, 8 5125; GFX10-NEXT: v_mov_b32_e32 v1, 16 5126; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 5127; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v3 5128; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 5129; GFX10-NEXT: v_and_or_b32 v0, 0xff, v3, v0 5130; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 5131; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 5132; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 5133; GFX10-NEXT: s_setpc_b64 s[30:31] 5134; 5135; GFX11-LABEL: extractelement_vgpr_v16i8_idx15: 5136; GFX11: ; %bb.0: 5137; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5138; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5139; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 5140; GFX11-NEXT: s_waitcnt vmcnt(0) 5141; GFX11-NEXT: v_bfe_u32 v0, v3, 8, 8 5142; GFX11-NEXT: v_bfe_u32 v1, v3, 16, 8 5143; GFX11-NEXT: v_lshrrev_b32_e32 v2, 24, v3 5144; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 5145; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v0 5146; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 5147; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 5148; GFX11-NEXT: v_lshlrev_b32_e32 v2, 24, v2 5149; GFX11-NEXT: v_and_or_b32 v0, 0xff, v3, v0 5150; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5151; GFX11-NEXT: v_or3_b32 v0, v0, v1, v2 5152; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0 5153; GFX11-NEXT: s_setpc_b64 s[30:31] 5154 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 5155 %element = extractelement <16 x i8> %vector, i32 15 5156 ret i8 %element 5157} 5158