1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s 3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefix=GFX7 %s 4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s 5; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s 6 7define <3 x i32> @load_lds_v3i32(<3 x i32> addrspace(3)* %ptr) { 8; GFX9-LABEL: load_lds_v3i32: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: ds_read_b96 v[0:2], v0 12; GFX9-NEXT: s_waitcnt lgkmcnt(0) 13; GFX9-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX7-LABEL: load_lds_v3i32: 16; GFX7: ; %bb.0: 17; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX7-NEXT: s_mov_b32 m0, -1 19; GFX7-NEXT: ds_read_b96 v[0:2], v0 20; GFX7-NEXT: s_waitcnt lgkmcnt(0) 21; GFX7-NEXT: s_setpc_b64 s[30:31] 22; 23; GFX6-LABEL: load_lds_v3i32: 24; GFX6: ; %bb.0: 25; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GFX6-NEXT: v_mov_b32_e32 v2, v0 27; GFX6-NEXT: s_mov_b32 m0, -1 28; GFX6-NEXT: v_add_i32_e32 v2, vcc, 8, v2 29; GFX6-NEXT: ds_read_b64 v[0:1], v0 30; GFX6-NEXT: ds_read_b32 v2, v2 31; GFX6-NEXT: s_waitcnt lgkmcnt(0) 32; GFX6-NEXT: s_setpc_b64 s[30:31] 33; 34; GFX10-LABEL: load_lds_v3i32: 35; GFX10: ; %bb.0: 36; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 38; GFX10-NEXT: ds_read_b96 v[0:2], v0 39; GFX10-NEXT: s_waitcnt lgkmcnt(0) 40; GFX10-NEXT: s_setpc_b64 s[30:31] 41 %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr 42 ret <3 x i32> %load 43} 44 45define <3 x i32> @load_lds_v3i32_align1(<3 x i32> addrspace(3)* %ptr) { 46; GFX9-LABEL: load_lds_v3i32_align1: 47; GFX9: ; %bb.0: 48; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 49; GFX9-NEXT: ds_read_u8 v1, v0 50; GFX9-NEXT: ds_read_u8 v2, v0 offset:1 51; GFX9-NEXT: ds_read_u8 v3, v0 offset:2 52; GFX9-NEXT: ds_read_u8 v4, v0 offset:3 53; GFX9-NEXT: ds_read_u8 v5, v0 offset:4 54; GFX9-NEXT: ds_read_u8 v6, v0 offset:5 55; GFX9-NEXT: ds_read_u8 v7, v0 offset:6 56; GFX9-NEXT: ds_read_u8 v8, v0 offset:7 57; GFX9-NEXT: ds_read_u8 v9, v0 offset:8 58; GFX9-NEXT: ds_read_u8 v10, v0 offset:9 59; GFX9-NEXT: ds_read_u8 v11, v0 offset:10 60; GFX9-NEXT: ds_read_u8 v12, v0 offset:11 61; GFX9-NEXT: s_waitcnt lgkmcnt(10) 62; GFX9-NEXT: v_lshl_or_b32 v0, v2, 8, v1 63; GFX9-NEXT: s_waitcnt lgkmcnt(8) 64; GFX9-NEXT: v_lshl_or_b32 v1, v4, 8, v3 65; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 66; GFX9-NEXT: s_waitcnt lgkmcnt(6) 67; GFX9-NEXT: v_lshl_or_b32 v1, v6, 8, v5 68; GFX9-NEXT: s_waitcnt lgkmcnt(4) 69; GFX9-NEXT: v_lshl_or_b32 v2, v8, 8, v7 70; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 71; GFX9-NEXT: s_waitcnt lgkmcnt(2) 72; GFX9-NEXT: v_lshl_or_b32 v2, v10, 8, v9 73; GFX9-NEXT: s_waitcnt lgkmcnt(0) 74; GFX9-NEXT: v_lshl_or_b32 v3, v12, 8, v11 75; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 76; GFX9-NEXT: s_setpc_b64 s[30:31] 77; 78; GFX7-LABEL: load_lds_v3i32_align1: 79; GFX7: ; %bb.0: 80; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 81; GFX7-NEXT: s_mov_b32 m0, -1 82; GFX7-NEXT: ds_read_u8 v1, v0 offset:6 83; GFX7-NEXT: ds_read_u8 v2, v0 offset:4 84; GFX7-NEXT: ds_read_u8 v3, v0 offset:2 85; GFX7-NEXT: ds_read_u8 v4, v0 offset:1 86; GFX7-NEXT: ds_read_u8 v5, v0 87; GFX7-NEXT: ds_read_u8 v6, v0 offset:3 88; GFX7-NEXT: ds_read_u8 v7, v0 offset:5 89; GFX7-NEXT: ds_read_u8 v8, v0 offset:7 90; GFX7-NEXT: s_waitcnt lgkmcnt(4) 91; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 92; GFX7-NEXT: s_waitcnt lgkmcnt(3) 93; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 94; GFX7-NEXT: s_waitcnt lgkmcnt(2) 95; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v6 96; GFX7-NEXT: v_or_b32_e32 v3, v5, v3 97; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 98; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 99; GFX7-NEXT: s_waitcnt lgkmcnt(1) 100; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v7 101; GFX7-NEXT: ds_read_u8 v5, v0 offset:11 102; GFX7-NEXT: ds_read_u8 v6, v0 offset:10 103; GFX7-NEXT: ds_read_u8 v7, v0 offset:9 104; GFX7-NEXT: ds_read_u8 v0, v0 offset:8 105; GFX7-NEXT: v_or_b32_e32 v2, v4, v2 106; GFX7-NEXT: s_waitcnt lgkmcnt(4) 107; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v8 108; GFX7-NEXT: v_or_b32_e32 v1, v4, v1 109; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 110; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 111; GFX7-NEXT: s_waitcnt lgkmcnt(1) 112; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v7 113; GFX7-NEXT: s_waitcnt lgkmcnt(0) 114; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 115; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v5 116; GFX7-NEXT: v_or_b32_e32 v2, v2, v6 117; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 118; GFX7-NEXT: v_or_b32_e32 v2, v2, v0 119; GFX7-NEXT: v_mov_b32_e32 v0, v3 120; GFX7-NEXT: s_setpc_b64 s[30:31] 121; 122; GFX6-LABEL: load_lds_v3i32_align1: 123; GFX6: ; %bb.0: 124; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; GFX6-NEXT: v_add_i32_e32 v1, vcc, 5, v0 126; GFX6-NEXT: v_add_i32_e32 v2, vcc, 4, v0 127; GFX6-NEXT: v_add_i32_e32 v3, vcc, 7, v0 128; GFX6-NEXT: v_add_i32_e32 v4, vcc, 6, v0 129; GFX6-NEXT: v_add_i32_e32 v5, vcc, 9, v0 130; GFX6-NEXT: v_add_i32_e32 v6, vcc, 8, v0 131; GFX6-NEXT: v_add_i32_e32 v7, vcc, 11, v0 132; GFX6-NEXT: s_mov_b32 m0, -1 133; GFX6-NEXT: ds_read_u8 v1, v1 134; GFX6-NEXT: ds_read_u8 v2, v2 135; GFX6-NEXT: ds_read_u8 v3, v3 136; GFX6-NEXT: ds_read_u8 v4, v4 137; GFX6-NEXT: ds_read_u8 v5, v5 138; GFX6-NEXT: ds_read_u8 v6, v6 139; GFX6-NEXT: ds_read_u8 v7, v7 140; GFX6-NEXT: ds_read_u8 v8, v0 141; GFX6-NEXT: s_waitcnt lgkmcnt(7) 142; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 143; GFX6-NEXT: s_waitcnt lgkmcnt(6) 144; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 145; GFX6-NEXT: s_waitcnt lgkmcnt(5) 146; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v3 147; GFX6-NEXT: s_waitcnt lgkmcnt(4) 148; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 149; GFX6-NEXT: v_add_i32_e32 v4, vcc, 10, v0 150; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 151; GFX6-NEXT: ds_read_u8 v4, v4 152; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 153; GFX6-NEXT: s_waitcnt lgkmcnt(4) 154; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v5 155; GFX6-NEXT: s_waitcnt lgkmcnt(3) 156; GFX6-NEXT: v_or_b32_e32 v2, v2, v6 157; GFX6-NEXT: v_add_i32_e32 v5, vcc, 3, v0 158; GFX6-NEXT: v_add_i32_e32 v6, vcc, 2, v0 159; GFX6-NEXT: v_add_i32_e32 v0, vcc, 1, v0 160; GFX6-NEXT: ds_read_u8 v5, v5 161; GFX6-NEXT: ds_read_u8 v6, v6 162; GFX6-NEXT: ds_read_u8 v0, v0 163; GFX6-NEXT: s_waitcnt lgkmcnt(5) 164; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v7 165; GFX6-NEXT: s_waitcnt lgkmcnt(3) 166; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 167; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 168; GFX6-NEXT: v_or_b32_e32 v2, v3, v2 169; GFX6-NEXT: s_waitcnt lgkmcnt(2) 170; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v5 171; GFX6-NEXT: s_waitcnt lgkmcnt(1) 172; GFX6-NEXT: v_or_b32_e32 v3, v3, v6 173; GFX6-NEXT: s_waitcnt lgkmcnt(0) 174; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0 175; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 176; GFX6-NEXT: v_or_b32_e32 v0, v0, v8 177; GFX6-NEXT: v_or_b32_e32 v0, v3, v0 178; GFX6-NEXT: s_setpc_b64 s[30:31] 179; 180; GFX10-LABEL: load_lds_v3i32_align1: 181; GFX10: ; %bb.0: 182; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 183; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 184; GFX10-NEXT: ds_read_u8 v1, v0 185; GFX10-NEXT: ds_read_u8 v2, v0 offset:1 186; GFX10-NEXT: ds_read_u8 v3, v0 offset:2 187; GFX10-NEXT: ds_read_u8 v4, v0 offset:3 188; GFX10-NEXT: ds_read_u8 v5, v0 offset:4 189; GFX10-NEXT: ds_read_u8 v6, v0 offset:5 190; GFX10-NEXT: ds_read_u8 v7, v0 offset:6 191; GFX10-NEXT: ds_read_u8 v8, v0 offset:7 192; GFX10-NEXT: ds_read_u8 v9, v0 offset:8 193; GFX10-NEXT: ds_read_u8 v10, v0 offset:9 194; GFX10-NEXT: ds_read_u8 v11, v0 offset:10 195; GFX10-NEXT: ds_read_u8 v0, v0 offset:11 196; GFX10-NEXT: s_waitcnt lgkmcnt(10) 197; GFX10-NEXT: v_lshl_or_b32 v1, v2, 8, v1 198; GFX10-NEXT: s_waitcnt lgkmcnt(8) 199; GFX10-NEXT: v_lshl_or_b32 v2, v4, 8, v3 200; GFX10-NEXT: s_waitcnt lgkmcnt(6) 201; GFX10-NEXT: v_lshl_or_b32 v3, v6, 8, v5 202; GFX10-NEXT: s_waitcnt lgkmcnt(4) 203; GFX10-NEXT: v_lshl_or_b32 v4, v8, 8, v7 204; GFX10-NEXT: s_waitcnt lgkmcnt(2) 205; GFX10-NEXT: v_lshl_or_b32 v5, v10, 8, v9 206; GFX10-NEXT: s_waitcnt lgkmcnt(0) 207; GFX10-NEXT: v_lshl_or_b32 v6, v0, 8, v11 208; GFX10-NEXT: v_lshl_or_b32 v0, v2, 16, v1 209; GFX10-NEXT: v_lshl_or_b32 v1, v4, 16, v3 210; GFX10-NEXT: v_lshl_or_b32 v2, v6, 16, v5 211; GFX10-NEXT: s_setpc_b64 s[30:31] 212 %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 1 213 ret <3 x i32> %load 214} 215 216define <3 x i32> @load_lds_v3i32_align2(<3 x i32> addrspace(3)* %ptr) { 217; GFX9-LABEL: load_lds_v3i32_align2: 218; GFX9: ; %bb.0: 219; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 220; GFX9-NEXT: ds_read_u16 v1, v0 221; GFX9-NEXT: ds_read_u16 v2, v0 offset:2 222; GFX9-NEXT: ds_read_u16 v3, v0 offset:4 223; GFX9-NEXT: ds_read_u16 v4, v0 offset:6 224; GFX9-NEXT: ds_read_u16 v5, v0 offset:8 225; GFX9-NEXT: ds_read_u16 v6, v0 offset:10 226; GFX9-NEXT: s_waitcnt lgkmcnt(4) 227; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v1 228; GFX9-NEXT: s_waitcnt lgkmcnt(2) 229; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v3 230; GFX9-NEXT: s_waitcnt lgkmcnt(0) 231; GFX9-NEXT: v_lshl_or_b32 v2, v6, 16, v5 232; GFX9-NEXT: s_setpc_b64 s[30:31] 233; 234; GFX7-LABEL: load_lds_v3i32_align2: 235; GFX7: ; %bb.0: 236; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 237; GFX7-NEXT: s_mov_b32 m0, -1 238; GFX7-NEXT: ds_read_u16 v2, v0 offset:8 239; GFX7-NEXT: ds_read_u16 v1, v0 offset:4 240; GFX7-NEXT: ds_read_u16 v3, v0 offset:2 241; GFX7-NEXT: ds_read_u16 v4, v0 242; GFX7-NEXT: ds_read_u16 v5, v0 offset:6 243; GFX7-NEXT: ds_read_u16 v6, v0 offset:10 244; GFX7-NEXT: s_waitcnt lgkmcnt(3) 245; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v3 246; GFX7-NEXT: s_waitcnt lgkmcnt(2) 247; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 248; GFX7-NEXT: s_waitcnt lgkmcnt(1) 249; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v5 250; GFX7-NEXT: v_or_b32_e32 v1, v3, v1 251; GFX7-NEXT: s_waitcnt lgkmcnt(0) 252; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v6 253; GFX7-NEXT: v_or_b32_e32 v2, v3, v2 254; GFX7-NEXT: s_setpc_b64 s[30:31] 255; 256; GFX6-LABEL: load_lds_v3i32_align2: 257; GFX6: ; %bb.0: 258; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 259; GFX6-NEXT: v_add_i32_e32 v1, vcc, 6, v0 260; GFX6-NEXT: v_add_i32_e32 v2, vcc, 4, v0 261; GFX6-NEXT: v_add_i32_e32 v3, vcc, 10, v0 262; GFX6-NEXT: v_add_i32_e32 v4, vcc, 8, v0 263; GFX6-NEXT: v_add_i32_e32 v5, vcc, 2, v0 264; GFX6-NEXT: s_mov_b32 m0, -1 265; GFX6-NEXT: ds_read_u16 v1, v1 266; GFX6-NEXT: ds_read_u16 v2, v2 267; GFX6-NEXT: ds_read_u16 v3, v3 268; GFX6-NEXT: ds_read_u16 v4, v4 269; GFX6-NEXT: ds_read_u16 v5, v5 270; GFX6-NEXT: ds_read_u16 v0, v0 271; GFX6-NEXT: s_waitcnt lgkmcnt(5) 272; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 273; GFX6-NEXT: s_waitcnt lgkmcnt(4) 274; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 275; GFX6-NEXT: s_waitcnt lgkmcnt(3) 276; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 277; GFX6-NEXT: s_waitcnt lgkmcnt(1) 278; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v5 279; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 280; GFX6-NEXT: s_waitcnt lgkmcnt(0) 281; GFX6-NEXT: v_or_b32_e32 v0, v3, v0 282; GFX6-NEXT: s_setpc_b64 s[30:31] 283; 284; GFX10-LABEL: load_lds_v3i32_align2: 285; GFX10: ; %bb.0: 286; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 287; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 288; GFX10-NEXT: ds_read_u16 v1, v0 289; GFX10-NEXT: ds_read_u16 v2, v0 offset:2 290; GFX10-NEXT: ds_read_u16 v3, v0 offset:4 291; GFX10-NEXT: ds_read_u16 v4, v0 offset:6 292; GFX10-NEXT: ds_read_u16 v5, v0 offset:8 293; GFX10-NEXT: ds_read_u16 v6, v0 offset:10 294; GFX10-NEXT: s_waitcnt lgkmcnt(4) 295; GFX10-NEXT: v_lshl_or_b32 v0, v2, 16, v1 296; GFX10-NEXT: s_waitcnt lgkmcnt(2) 297; GFX10-NEXT: v_lshl_or_b32 v1, v4, 16, v3 298; GFX10-NEXT: s_waitcnt lgkmcnt(0) 299; GFX10-NEXT: v_lshl_or_b32 v2, v6, 16, v5 300; GFX10-NEXT: s_setpc_b64 s[30:31] 301 %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 2 302 ret <3 x i32> %load 303} 304 305define <3 x i32> @load_lds_v3i32_align4(<3 x i32> addrspace(3)* %ptr) { 306; GFX9-LABEL: load_lds_v3i32_align4: 307; GFX9: ; %bb.0: 308; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 309; GFX9-NEXT: v_mov_b32_e32 v2, v0 310; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 311; GFX9-NEXT: ds_read_b32 v2, v2 offset:8 312; GFX9-NEXT: s_waitcnt lgkmcnt(0) 313; GFX9-NEXT: s_setpc_b64 s[30:31] 314; 315; GFX7-LABEL: load_lds_v3i32_align4: 316; GFX7: ; %bb.0: 317; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 318; GFX7-NEXT: v_mov_b32_e32 v2, v0 319; GFX7-NEXT: s_mov_b32 m0, -1 320; GFX7-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 321; GFX7-NEXT: ds_read_b32 v2, v2 offset:8 322; GFX7-NEXT: s_waitcnt lgkmcnt(0) 323; GFX7-NEXT: s_setpc_b64 s[30:31] 324; 325; GFX6-LABEL: load_lds_v3i32_align4: 326; GFX6: ; %bb.0: 327; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 328; GFX6-NEXT: v_add_i32_e32 v1, vcc, 4, v0 329; GFX6-NEXT: v_add_i32_e32 v2, vcc, 8, v0 330; GFX6-NEXT: s_mov_b32 m0, -1 331; GFX6-NEXT: ds_read_b32 v2, v2 332; GFX6-NEXT: ds_read_b32 v0, v0 333; GFX6-NEXT: ds_read_b32 v1, v1 334; GFX6-NEXT: s_waitcnt lgkmcnt(0) 335; GFX6-NEXT: s_setpc_b64 s[30:31] 336; 337; GFX10-LABEL: load_lds_v3i32_align4: 338; GFX10: ; %bb.0: 339; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 340; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 341; GFX10-NEXT: v_mov_b32_e32 v2, v0 342; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 343; GFX10-NEXT: ds_read_b32 v2, v2 offset:8 344; GFX10-NEXT: s_waitcnt lgkmcnt(0) 345; GFX10-NEXT: s_setpc_b64 s[30:31] 346 %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 4 347 ret <3 x i32> %load 348} 349 350define <3 x i32> @load_lds_v3i32_align8(<3 x i32> addrspace(3)* %ptr) { 351; GFX9-LABEL: load_lds_v3i32_align8: 352; GFX9: ; %bb.0: 353; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 354; GFX9-NEXT: v_mov_b32_e32 v2, v0 355; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 356; GFX9-NEXT: ds_read_b32 v2, v2 offset:8 357; GFX9-NEXT: s_waitcnt lgkmcnt(0) 358; GFX9-NEXT: s_setpc_b64 s[30:31] 359; 360; GFX7-LABEL: load_lds_v3i32_align8: 361; GFX7: ; %bb.0: 362; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 363; GFX7-NEXT: v_mov_b32_e32 v2, v0 364; GFX7-NEXT: s_mov_b32 m0, -1 365; GFX7-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 366; GFX7-NEXT: ds_read_b32 v2, v2 offset:8 367; GFX7-NEXT: s_waitcnt lgkmcnt(0) 368; GFX7-NEXT: s_setpc_b64 s[30:31] 369; 370; GFX6-LABEL: load_lds_v3i32_align8: 371; GFX6: ; %bb.0: 372; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 373; GFX6-NEXT: v_add_i32_e32 v1, vcc, 4, v0 374; GFX6-NEXT: v_add_i32_e32 v2, vcc, 8, v0 375; GFX6-NEXT: s_mov_b32 m0, -1 376; GFX6-NEXT: ds_read_b32 v2, v2 377; GFX6-NEXT: ds_read_b32 v0, v0 378; GFX6-NEXT: ds_read_b32 v1, v1 379; GFX6-NEXT: s_waitcnt lgkmcnt(0) 380; GFX6-NEXT: s_setpc_b64 s[30:31] 381; 382; GFX10-LABEL: load_lds_v3i32_align8: 383; GFX10: ; %bb.0: 384; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 385; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 386; GFX10-NEXT: v_mov_b32_e32 v2, v0 387; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 388; GFX10-NEXT: ds_read_b32 v2, v2 offset:8 389; GFX10-NEXT: s_waitcnt lgkmcnt(0) 390; GFX10-NEXT: s_setpc_b64 s[30:31] 391 %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 8 392 ret <3 x i32> %load 393} 394 395define <3 x i32> @load_lds_v3i32_align16(<3 x i32> addrspace(3)* %ptr) { 396; GFX9-LABEL: load_lds_v3i32_align16: 397; GFX9: ; %bb.0: 398; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 399; GFX9-NEXT: ds_read_b96 v[0:2], v0 400; GFX9-NEXT: s_waitcnt lgkmcnt(0) 401; GFX9-NEXT: s_setpc_b64 s[30:31] 402; 403; GFX7-LABEL: load_lds_v3i32_align16: 404; GFX7: ; %bb.0: 405; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 406; GFX7-NEXT: s_mov_b32 m0, -1 407; GFX7-NEXT: ds_read_b96 v[0:2], v0 408; GFX7-NEXT: s_waitcnt lgkmcnt(0) 409; GFX7-NEXT: s_setpc_b64 s[30:31] 410; 411; GFX6-LABEL: load_lds_v3i32_align16: 412; GFX6: ; %bb.0: 413; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 414; GFX6-NEXT: v_mov_b32_e32 v2, v0 415; GFX6-NEXT: s_mov_b32 m0, -1 416; GFX6-NEXT: v_add_i32_e32 v2, vcc, 8, v2 417; GFX6-NEXT: ds_read_b64 v[0:1], v0 418; GFX6-NEXT: ds_read_b32 v2, v2 419; GFX6-NEXT: s_waitcnt lgkmcnt(0) 420; GFX6-NEXT: s_setpc_b64 s[30:31] 421; 422; GFX10-LABEL: load_lds_v3i32_align16: 423; GFX10: ; %bb.0: 424; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 425; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 426; GFX10-NEXT: ds_read_b96 v[0:2], v0 427; GFX10-NEXT: s_waitcnt lgkmcnt(0) 428; GFX10-NEXT: s_setpc_b64 s[30:31] 429 %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 16 430 ret <3 x i32> %load 431} 432