1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s 3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefix=GFX7 %s 4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s 5; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s 6; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s 7 8define <3 x i32> @load_lds_v3i32(<3 x i32> addrspace(3)* %ptr) { 9; GFX9-LABEL: load_lds_v3i32: 10; GFX9: ; %bb.0: 11; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX9-NEXT: ds_read_b96 v[0:2], v0 13; GFX9-NEXT: s_waitcnt lgkmcnt(0) 14; GFX9-NEXT: s_setpc_b64 s[30:31] 15; 16; GFX7-LABEL: load_lds_v3i32: 17; GFX7: ; %bb.0: 18; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19; GFX7-NEXT: s_mov_b32 m0, -1 20; GFX7-NEXT: ds_read_b96 v[0:2], v0 21; GFX7-NEXT: s_waitcnt lgkmcnt(0) 22; GFX7-NEXT: s_setpc_b64 s[30:31] 23; 24; GFX6-LABEL: load_lds_v3i32: 25; GFX6: ; %bb.0: 26; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX6-NEXT: v_mov_b32_e32 v2, v0 28; GFX6-NEXT: s_mov_b32 m0, -1 29; GFX6-NEXT: v_add_i32_e32 v2, vcc, 8, v2 30; GFX6-NEXT: ds_read_b64 v[0:1], v0 31; GFX6-NEXT: ds_read_b32 v2, v2 32; GFX6-NEXT: s_waitcnt lgkmcnt(0) 33; GFX6-NEXT: s_setpc_b64 s[30:31] 34; 35; GFX10-LABEL: load_lds_v3i32: 36; GFX10: ; %bb.0: 37; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 38; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 39; GFX10-NEXT: ds_read_b96 v[0:2], v0 40; GFX10-NEXT: s_waitcnt lgkmcnt(0) 41; GFX10-NEXT: s_setpc_b64 s[30:31] 42; 43; GFX11-LABEL: load_lds_v3i32: 44; GFX11: ; %bb.0: 45; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 47; GFX11-NEXT: ds_load_b96 v[0:2], v0 48; GFX11-NEXT: s_waitcnt lgkmcnt(0) 49; GFX11-NEXT: s_setpc_b64 s[30:31] 50 %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr 51 ret <3 x i32> %load 52} 53 54define <3 x i32> @load_lds_v3i32_align1(<3 x i32> addrspace(3)* %ptr) { 55; GFX9-LABEL: load_lds_v3i32_align1: 56; GFX9: ; %bb.0: 57; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58; GFX9-NEXT: ds_read_u8 v1, v0 59; GFX9-NEXT: ds_read_u8 v2, v0 offset:1 60; GFX9-NEXT: ds_read_u8 v3, v0 offset:2 61; GFX9-NEXT: ds_read_u8 v4, v0 offset:3 62; GFX9-NEXT: ds_read_u8 v5, v0 offset:4 63; GFX9-NEXT: ds_read_u8 v6, v0 offset:5 64; GFX9-NEXT: ds_read_u8 v7, v0 offset:6 65; GFX9-NEXT: ds_read_u8 v8, v0 offset:7 66; GFX9-NEXT: ds_read_u8 v9, v0 offset:8 67; GFX9-NEXT: ds_read_u8 v10, v0 offset:9 68; GFX9-NEXT: ds_read_u8 v11, v0 offset:10 69; GFX9-NEXT: ds_read_u8 v12, v0 offset:11 70; GFX9-NEXT: s_waitcnt lgkmcnt(10) 71; GFX9-NEXT: v_lshl_or_b32 v0, v2, 8, v1 72; GFX9-NEXT: s_waitcnt lgkmcnt(8) 73; GFX9-NEXT: v_lshl_or_b32 v1, v4, 8, v3 74; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 75; GFX9-NEXT: s_waitcnt lgkmcnt(6) 76; GFX9-NEXT: v_lshl_or_b32 v1, v6, 8, v5 77; GFX9-NEXT: s_waitcnt lgkmcnt(4) 78; GFX9-NEXT: v_lshl_or_b32 v2, v8, 8, v7 79; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 80; GFX9-NEXT: s_waitcnt lgkmcnt(2) 81; GFX9-NEXT: v_lshl_or_b32 v2, v10, 8, v9 82; GFX9-NEXT: s_waitcnt lgkmcnt(0) 83; GFX9-NEXT: v_lshl_or_b32 v3, v12, 8, v11 84; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 85; GFX9-NEXT: s_setpc_b64 s[30:31] 86; 87; GFX7-LABEL: load_lds_v3i32_align1: 88; GFX7: ; %bb.0: 89; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 90; GFX7-NEXT: s_mov_b32 m0, -1 91; GFX7-NEXT: ds_read_u8 v1, v0 offset:6 92; GFX7-NEXT: ds_read_u8 v2, v0 offset:4 93; GFX7-NEXT: ds_read_u8 v3, v0 offset:2 94; GFX7-NEXT: ds_read_u8 v4, v0 offset:1 95; GFX7-NEXT: ds_read_u8 v5, v0 96; GFX7-NEXT: ds_read_u8 v6, v0 offset:3 97; GFX7-NEXT: ds_read_u8 v7, v0 offset:5 98; GFX7-NEXT: ds_read_u8 v8, v0 offset:7 99; GFX7-NEXT: s_waitcnt lgkmcnt(4) 100; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 101; GFX7-NEXT: s_waitcnt lgkmcnt(3) 102; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 103; GFX7-NEXT: s_waitcnt lgkmcnt(2) 104; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v6 105; GFX7-NEXT: v_or_b32_e32 v3, v5, v3 106; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 107; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 108; GFX7-NEXT: s_waitcnt lgkmcnt(1) 109; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v7 110; GFX7-NEXT: ds_read_u8 v5, v0 offset:11 111; GFX7-NEXT: ds_read_u8 v6, v0 offset:10 112; GFX7-NEXT: ds_read_u8 v7, v0 offset:9 113; GFX7-NEXT: ds_read_u8 v0, v0 offset:8 114; GFX7-NEXT: v_or_b32_e32 v2, v4, v2 115; GFX7-NEXT: s_waitcnt lgkmcnt(4) 116; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v8 117; GFX7-NEXT: v_or_b32_e32 v1, v4, v1 118; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 119; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 120; GFX7-NEXT: s_waitcnt lgkmcnt(1) 121; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v7 122; GFX7-NEXT: s_waitcnt lgkmcnt(0) 123; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 124; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v5 125; GFX7-NEXT: v_or_b32_e32 v2, v2, v6 126; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 127; GFX7-NEXT: v_or_b32_e32 v2, v2, v0 128; GFX7-NEXT: v_mov_b32_e32 v0, v3 129; GFX7-NEXT: s_setpc_b64 s[30:31] 130; 131; GFX6-LABEL: load_lds_v3i32_align1: 132; GFX6: ; %bb.0: 133; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 134; GFX6-NEXT: v_add_i32_e32 v1, vcc, 5, v0 135; GFX6-NEXT: v_add_i32_e32 v2, vcc, 4, v0 136; GFX6-NEXT: v_add_i32_e32 v3, vcc, 7, v0 137; GFX6-NEXT: v_add_i32_e32 v4, vcc, 6, v0 138; GFX6-NEXT: v_add_i32_e32 v5, vcc, 9, v0 139; GFX6-NEXT: v_add_i32_e32 v6, vcc, 8, v0 140; GFX6-NEXT: v_add_i32_e32 v7, vcc, 11, v0 141; GFX6-NEXT: s_mov_b32 m0, -1 142; GFX6-NEXT: ds_read_u8 v1, v1 143; GFX6-NEXT: ds_read_u8 v2, v2 144; GFX6-NEXT: ds_read_u8 v3, v3 145; GFX6-NEXT: ds_read_u8 v4, v4 146; GFX6-NEXT: ds_read_u8 v5, v5 147; GFX6-NEXT: ds_read_u8 v6, v6 148; GFX6-NEXT: ds_read_u8 v7, v7 149; GFX6-NEXT: ds_read_u8 v8, v0 150; GFX6-NEXT: s_waitcnt lgkmcnt(7) 151; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 152; GFX6-NEXT: s_waitcnt lgkmcnt(6) 153; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 154; GFX6-NEXT: s_waitcnt lgkmcnt(5) 155; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v3 156; GFX6-NEXT: s_waitcnt lgkmcnt(4) 157; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 158; GFX6-NEXT: v_add_i32_e32 v4, vcc, 10, v0 159; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 160; GFX6-NEXT: ds_read_u8 v4, v4 161; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 162; GFX6-NEXT: s_waitcnt lgkmcnt(4) 163; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v5 164; GFX6-NEXT: s_waitcnt lgkmcnt(3) 165; GFX6-NEXT: v_or_b32_e32 v2, v2, v6 166; GFX6-NEXT: v_add_i32_e32 v5, vcc, 3, v0 167; GFX6-NEXT: v_add_i32_e32 v6, vcc, 2, v0 168; GFX6-NEXT: v_add_i32_e32 v0, vcc, 1, v0 169; GFX6-NEXT: ds_read_u8 v5, v5 170; GFX6-NEXT: ds_read_u8 v6, v6 171; GFX6-NEXT: ds_read_u8 v0, v0 172; GFX6-NEXT: s_waitcnt lgkmcnt(5) 173; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v7 174; GFX6-NEXT: s_waitcnt lgkmcnt(3) 175; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 176; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 177; GFX6-NEXT: v_or_b32_e32 v2, v3, v2 178; GFX6-NEXT: s_waitcnt lgkmcnt(2) 179; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v5 180; GFX6-NEXT: s_waitcnt lgkmcnt(1) 181; GFX6-NEXT: v_or_b32_e32 v3, v3, v6 182; GFX6-NEXT: s_waitcnt lgkmcnt(0) 183; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0 184; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 185; GFX6-NEXT: v_or_b32_e32 v0, v0, v8 186; GFX6-NEXT: v_or_b32_e32 v0, v3, v0 187; GFX6-NEXT: s_setpc_b64 s[30:31] 188; 189; GFX10-LABEL: load_lds_v3i32_align1: 190; GFX10: ; %bb.0: 191; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 192; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 193; GFX10-NEXT: ds_read_u8 v1, v0 194; GFX10-NEXT: ds_read_u8 v2, v0 offset:1 195; GFX10-NEXT: ds_read_u8 v3, v0 offset:2 196; GFX10-NEXT: ds_read_u8 v4, v0 offset:3 197; GFX10-NEXT: ds_read_u8 v5, v0 offset:4 198; GFX10-NEXT: ds_read_u8 v6, v0 offset:5 199; GFX10-NEXT: ds_read_u8 v7, v0 offset:6 200; GFX10-NEXT: ds_read_u8 v8, v0 offset:7 201; GFX10-NEXT: ds_read_u8 v9, v0 offset:8 202; GFX10-NEXT: ds_read_u8 v10, v0 offset:9 203; GFX10-NEXT: ds_read_u8 v11, v0 offset:10 204; GFX10-NEXT: ds_read_u8 v0, v0 offset:11 205; GFX10-NEXT: s_waitcnt lgkmcnt(10) 206; GFX10-NEXT: v_lshl_or_b32 v1, v2, 8, v1 207; GFX10-NEXT: s_waitcnt lgkmcnt(8) 208; GFX10-NEXT: v_lshl_or_b32 v2, v4, 8, v3 209; GFX10-NEXT: s_waitcnt lgkmcnt(6) 210; GFX10-NEXT: v_lshl_or_b32 v3, v6, 8, v5 211; GFX10-NEXT: s_waitcnt lgkmcnt(4) 212; GFX10-NEXT: v_lshl_or_b32 v4, v8, 8, v7 213; GFX10-NEXT: s_waitcnt lgkmcnt(2) 214; GFX10-NEXT: v_lshl_or_b32 v5, v10, 8, v9 215; GFX10-NEXT: s_waitcnt lgkmcnt(0) 216; GFX10-NEXT: v_lshl_or_b32 v6, v0, 8, v11 217; GFX10-NEXT: v_lshl_or_b32 v0, v2, 16, v1 218; GFX10-NEXT: v_lshl_or_b32 v1, v4, 16, v3 219; GFX10-NEXT: v_lshl_or_b32 v2, v6, 16, v5 220; GFX10-NEXT: s_setpc_b64 s[30:31] 221; 222; GFX11-LABEL: load_lds_v3i32_align1: 223; GFX11: ; %bb.0: 224; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 225; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 226; GFX11-NEXT: ds_load_u8 v1, v0 227; GFX11-NEXT: ds_load_u8 v2, v0 offset:1 228; GFX11-NEXT: ds_load_u8 v3, v0 offset:2 229; GFX11-NEXT: ds_load_u8 v4, v0 offset:3 230; GFX11-NEXT: ds_load_u8 v5, v0 offset:4 231; GFX11-NEXT: ds_load_u8 v6, v0 offset:5 232; GFX11-NEXT: ds_load_u8 v7, v0 offset:6 233; GFX11-NEXT: ds_load_u8 v8, v0 offset:7 234; GFX11-NEXT: ds_load_u8 v9, v0 offset:8 235; GFX11-NEXT: ds_load_u8 v10, v0 offset:9 236; GFX11-NEXT: ds_load_u8 v11, v0 offset:10 237; GFX11-NEXT: ds_load_u8 v0, v0 offset:11 238; GFX11-NEXT: s_waitcnt lgkmcnt(10) 239; GFX11-NEXT: v_lshl_or_b32 v1, v2, 8, v1 240; GFX11-NEXT: s_waitcnt lgkmcnt(8) 241; GFX11-NEXT: v_lshl_or_b32 v2, v4, 8, v3 242; GFX11-NEXT: s_waitcnt lgkmcnt(6) 243; GFX11-NEXT: v_lshl_or_b32 v3, v6, 8, v5 244; GFX11-NEXT: s_waitcnt lgkmcnt(4) 245; GFX11-NEXT: v_lshl_or_b32 v4, v8, 8, v7 246; GFX11-NEXT: s_waitcnt lgkmcnt(2) 247; GFX11-NEXT: v_lshl_or_b32 v5, v10, 8, v9 248; GFX11-NEXT: s_waitcnt lgkmcnt(0) 249; GFX11-NEXT: v_lshl_or_b32 v6, v0, 8, v11 250; GFX11-NEXT: v_lshl_or_b32 v0, v2, 16, v1 251; GFX11-NEXT: v_lshl_or_b32 v1, v4, 16, v3 252; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) 253; GFX11-NEXT: v_lshl_or_b32 v2, v6, 16, v5 254; GFX11-NEXT: s_setpc_b64 s[30:31] 255 %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 1 256 ret <3 x i32> %load 257} 258 259define <3 x i32> @load_lds_v3i32_align2(<3 x i32> addrspace(3)* %ptr) { 260; GFX9-LABEL: load_lds_v3i32_align2: 261; GFX9: ; %bb.0: 262; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 263; GFX9-NEXT: ds_read_u16 v1, v0 264; GFX9-NEXT: ds_read_u16 v2, v0 offset:2 265; GFX9-NEXT: ds_read_u16 v3, v0 offset:4 266; GFX9-NEXT: ds_read_u16 v4, v0 offset:6 267; GFX9-NEXT: ds_read_u16 v5, v0 offset:8 268; GFX9-NEXT: ds_read_u16 v6, v0 offset:10 269; GFX9-NEXT: s_waitcnt lgkmcnt(4) 270; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v1 271; GFX9-NEXT: s_waitcnt lgkmcnt(2) 272; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v3 273; GFX9-NEXT: s_waitcnt lgkmcnt(0) 274; GFX9-NEXT: v_lshl_or_b32 v2, v6, 16, v5 275; GFX9-NEXT: s_setpc_b64 s[30:31] 276; 277; GFX7-LABEL: load_lds_v3i32_align2: 278; GFX7: ; %bb.0: 279; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 280; GFX7-NEXT: s_mov_b32 m0, -1 281; GFX7-NEXT: ds_read_u16 v2, v0 offset:8 282; GFX7-NEXT: ds_read_u16 v1, v0 offset:4 283; GFX7-NEXT: ds_read_u16 v3, v0 offset:2 284; GFX7-NEXT: ds_read_u16 v4, v0 285; GFX7-NEXT: ds_read_u16 v5, v0 offset:6 286; GFX7-NEXT: ds_read_u16 v6, v0 offset:10 287; GFX7-NEXT: s_waitcnt lgkmcnt(3) 288; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v3 289; GFX7-NEXT: s_waitcnt lgkmcnt(2) 290; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 291; GFX7-NEXT: s_waitcnt lgkmcnt(1) 292; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v5 293; GFX7-NEXT: v_or_b32_e32 v1, v3, v1 294; GFX7-NEXT: s_waitcnt lgkmcnt(0) 295; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v6 296; GFX7-NEXT: v_or_b32_e32 v2, v3, v2 297; GFX7-NEXT: s_setpc_b64 s[30:31] 298; 299; GFX6-LABEL: load_lds_v3i32_align2: 300; GFX6: ; %bb.0: 301; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 302; GFX6-NEXT: v_add_i32_e32 v1, vcc, 6, v0 303; GFX6-NEXT: v_add_i32_e32 v2, vcc, 4, v0 304; GFX6-NEXT: v_add_i32_e32 v3, vcc, 10, v0 305; GFX6-NEXT: v_add_i32_e32 v4, vcc, 8, v0 306; GFX6-NEXT: v_add_i32_e32 v5, vcc, 2, v0 307; GFX6-NEXT: s_mov_b32 m0, -1 308; GFX6-NEXT: ds_read_u16 v1, v1 309; GFX6-NEXT: ds_read_u16 v2, v2 310; GFX6-NEXT: ds_read_u16 v3, v3 311; GFX6-NEXT: ds_read_u16 v4, v4 312; GFX6-NEXT: ds_read_u16 v5, v5 313; GFX6-NEXT: ds_read_u16 v0, v0 314; GFX6-NEXT: s_waitcnt lgkmcnt(5) 315; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 316; GFX6-NEXT: s_waitcnt lgkmcnt(4) 317; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 318; GFX6-NEXT: s_waitcnt lgkmcnt(3) 319; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 320; GFX6-NEXT: s_waitcnt lgkmcnt(1) 321; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v5 322; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 323; GFX6-NEXT: s_waitcnt lgkmcnt(0) 324; GFX6-NEXT: v_or_b32_e32 v0, v3, v0 325; GFX6-NEXT: s_setpc_b64 s[30:31] 326; 327; GFX10-LABEL: load_lds_v3i32_align2: 328; GFX10: ; %bb.0: 329; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 330; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 331; GFX10-NEXT: ds_read_u16 v1, v0 332; GFX10-NEXT: ds_read_u16 v2, v0 offset:2 333; GFX10-NEXT: ds_read_u16 v3, v0 offset:4 334; GFX10-NEXT: ds_read_u16 v4, v0 offset:6 335; GFX10-NEXT: ds_read_u16 v5, v0 offset:8 336; GFX10-NEXT: ds_read_u16 v6, v0 offset:10 337; GFX10-NEXT: s_waitcnt lgkmcnt(4) 338; GFX10-NEXT: v_lshl_or_b32 v0, v2, 16, v1 339; GFX10-NEXT: s_waitcnt lgkmcnt(2) 340; GFX10-NEXT: v_lshl_or_b32 v1, v4, 16, v3 341; GFX10-NEXT: s_waitcnt lgkmcnt(0) 342; GFX10-NEXT: v_lshl_or_b32 v2, v6, 16, v5 343; GFX10-NEXT: s_setpc_b64 s[30:31] 344; 345; GFX11-LABEL: load_lds_v3i32_align2: 346; GFX11: ; %bb.0: 347; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 348; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 349; GFX11-NEXT: ds_load_u16 v1, v0 350; GFX11-NEXT: ds_load_u16 v2, v0 offset:2 351; GFX11-NEXT: ds_load_u16 v3, v0 offset:4 352; GFX11-NEXT: ds_load_u16 v4, v0 offset:6 353; GFX11-NEXT: ds_load_u16 v5, v0 offset:8 354; GFX11-NEXT: ds_load_u16 v6, v0 offset:10 355; GFX11-NEXT: s_waitcnt lgkmcnt(4) 356; GFX11-NEXT: v_lshl_or_b32 v0, v2, 16, v1 357; GFX11-NEXT: s_waitcnt lgkmcnt(2) 358; GFX11-NEXT: v_lshl_or_b32 v1, v4, 16, v3 359; GFX11-NEXT: s_waitcnt lgkmcnt(0) 360; GFX11-NEXT: v_lshl_or_b32 v2, v6, 16, v5 361; GFX11-NEXT: s_setpc_b64 s[30:31] 362 %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 2 363 ret <3 x i32> %load 364} 365 366define <3 x i32> @load_lds_v3i32_align4(<3 x i32> addrspace(3)* %ptr) { 367; GFX9-LABEL: load_lds_v3i32_align4: 368; GFX9: ; %bb.0: 369; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 370; GFX9-NEXT: v_mov_b32_e32 v2, v0 371; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 372; GFX9-NEXT: ds_read_b32 v2, v2 offset:8 373; GFX9-NEXT: s_waitcnt lgkmcnt(0) 374; GFX9-NEXT: s_setpc_b64 s[30:31] 375; 376; GFX7-LABEL: load_lds_v3i32_align4: 377; GFX7: ; %bb.0: 378; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 379; GFX7-NEXT: v_mov_b32_e32 v2, v0 380; GFX7-NEXT: s_mov_b32 m0, -1 381; GFX7-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 382; GFX7-NEXT: ds_read_b32 v2, v2 offset:8 383; GFX7-NEXT: s_waitcnt lgkmcnt(0) 384; GFX7-NEXT: s_setpc_b64 s[30:31] 385; 386; GFX6-LABEL: load_lds_v3i32_align4: 387; GFX6: ; %bb.0: 388; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 389; GFX6-NEXT: v_add_i32_e32 v1, vcc, 4, v0 390; GFX6-NEXT: v_add_i32_e32 v2, vcc, 8, v0 391; GFX6-NEXT: s_mov_b32 m0, -1 392; GFX6-NEXT: ds_read_b32 v2, v2 393; GFX6-NEXT: ds_read_b32 v0, v0 394; GFX6-NEXT: ds_read_b32 v1, v1 395; GFX6-NEXT: s_waitcnt lgkmcnt(0) 396; GFX6-NEXT: s_setpc_b64 s[30:31] 397; 398; GFX10-LABEL: load_lds_v3i32_align4: 399; GFX10: ; %bb.0: 400; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 401; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 402; GFX10-NEXT: v_mov_b32_e32 v2, v0 403; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 404; GFX10-NEXT: ds_read_b32 v2, v2 offset:8 405; GFX10-NEXT: s_waitcnt lgkmcnt(0) 406; GFX10-NEXT: s_setpc_b64 s[30:31] 407; 408; GFX11-LABEL: load_lds_v3i32_align4: 409; GFX11: ; %bb.0: 410; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 411; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 412; GFX11-NEXT: v_mov_b32_e32 v2, v0 413; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1 414; GFX11-NEXT: ds_load_b32 v2, v2 offset:8 415; GFX11-NEXT: s_waitcnt lgkmcnt(0) 416; GFX11-NEXT: s_setpc_b64 s[30:31] 417 %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 4 418 ret <3 x i32> %load 419} 420 421define <3 x i32> @load_lds_v3i32_align8(<3 x i32> addrspace(3)* %ptr) { 422; GFX9-LABEL: load_lds_v3i32_align8: 423; GFX9: ; %bb.0: 424; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 425; GFX9-NEXT: v_mov_b32_e32 v2, v0 426; GFX9-NEXT: ds_read_b64 v[0:1], v0 427; GFX9-NEXT: ds_read_b32 v2, v2 offset:8 428; GFX9-NEXT: s_waitcnt lgkmcnt(0) 429; GFX9-NEXT: s_setpc_b64 s[30:31] 430; 431; GFX7-LABEL: load_lds_v3i32_align8: 432; GFX7: ; %bb.0: 433; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 434; GFX7-NEXT: v_mov_b32_e32 v2, v0 435; GFX7-NEXT: s_mov_b32 m0, -1 436; GFX7-NEXT: ds_read_b64 v[0:1], v0 437; GFX7-NEXT: ds_read_b32 v2, v2 offset:8 438; GFX7-NEXT: s_waitcnt lgkmcnt(0) 439; GFX7-NEXT: s_setpc_b64 s[30:31] 440; 441; GFX6-LABEL: load_lds_v3i32_align8: 442; GFX6: ; %bb.0: 443; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 444; GFX6-NEXT: v_mov_b32_e32 v2, v0 445; GFX6-NEXT: s_mov_b32 m0, -1 446; GFX6-NEXT: v_add_i32_e32 v2, vcc, 8, v2 447; GFX6-NEXT: ds_read_b64 v[0:1], v0 448; GFX6-NEXT: ds_read_b32 v2, v2 449; GFX6-NEXT: s_waitcnt lgkmcnt(0) 450; GFX6-NEXT: s_setpc_b64 s[30:31] 451; 452; GFX10-LABEL: load_lds_v3i32_align8: 453; GFX10: ; %bb.0: 454; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 455; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 456; GFX10-NEXT: v_mov_b32_e32 v2, v0 457; GFX10-NEXT: ds_read_b64 v[0:1], v0 458; GFX10-NEXT: ds_read_b32 v2, v2 offset:8 459; GFX10-NEXT: s_waitcnt lgkmcnt(0) 460; GFX10-NEXT: s_setpc_b64 s[30:31] 461; 462; GFX11-LABEL: load_lds_v3i32_align8: 463; GFX11: ; %bb.0: 464; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 465; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 466; GFX11-NEXT: v_mov_b32_e32 v2, v0 467; GFX11-NEXT: ds_load_b64 v[0:1], v0 468; GFX11-NEXT: ds_load_b32 v2, v2 offset:8 469; GFX11-NEXT: s_waitcnt lgkmcnt(0) 470; GFX11-NEXT: s_setpc_b64 s[30:31] 471 %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 8 472 ret <3 x i32> %load 473} 474 475define <3 x i32> @load_lds_v3i32_align16(<3 x i32> addrspace(3)* %ptr) { 476; GFX9-LABEL: load_lds_v3i32_align16: 477; GFX9: ; %bb.0: 478; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 479; GFX9-NEXT: ds_read_b96 v[0:2], v0 480; GFX9-NEXT: s_waitcnt lgkmcnt(0) 481; GFX9-NEXT: s_setpc_b64 s[30:31] 482; 483; GFX7-LABEL: load_lds_v3i32_align16: 484; GFX7: ; %bb.0: 485; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 486; GFX7-NEXT: s_mov_b32 m0, -1 487; GFX7-NEXT: ds_read_b96 v[0:2], v0 488; GFX7-NEXT: s_waitcnt lgkmcnt(0) 489; GFX7-NEXT: s_setpc_b64 s[30:31] 490; 491; GFX6-LABEL: load_lds_v3i32_align16: 492; GFX6: ; %bb.0: 493; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 494; GFX6-NEXT: v_mov_b32_e32 v2, v0 495; GFX6-NEXT: s_mov_b32 m0, -1 496; GFX6-NEXT: v_add_i32_e32 v2, vcc, 8, v2 497; GFX6-NEXT: ds_read_b64 v[0:1], v0 498; GFX6-NEXT: ds_read_b32 v2, v2 499; GFX6-NEXT: s_waitcnt lgkmcnt(0) 500; GFX6-NEXT: s_setpc_b64 s[30:31] 501; 502; GFX10-LABEL: load_lds_v3i32_align16: 503; GFX10: ; %bb.0: 504; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 505; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 506; GFX10-NEXT: ds_read_b96 v[0:2], v0 507; GFX10-NEXT: s_waitcnt lgkmcnt(0) 508; GFX10-NEXT: s_setpc_b64 s[30:31] 509; 510; GFX11-LABEL: load_lds_v3i32_align16: 511; GFX11: ; %bb.0: 512; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 513; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 514; GFX11-NEXT: ds_load_b96 v[0:2], v0 515; GFX11-NEXT: s_waitcnt lgkmcnt(0) 516; GFX11-NEXT: s_setpc_b64 s[30:31] 517 %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 16 518 ret <3 x i32> %load 519} 520