1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=SI 3; RUN: llc < %s -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefix=GFX7 4; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=GFX10 5; RUN: llc < %s -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1030 6; RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1100 7 8; RUN: llc < %s -global-isel -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=G_SI 9; RUN: llc < %s -global-isel -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX7 10; RUN: llc < %s -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX10 11; RUN: llc < %s -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030 12; RUN: llc < %s -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1100 13 14declare float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float, <4 x i32>, i32, i32, i32 immarg) 15declare float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float, <4 x i32>, i32, i32, i32 immarg) 16 17 18define amdgpu_kernel void @raw_buffer_atomic_min_noret_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) { 19; SI-LABEL: raw_buffer_atomic_min_noret_f32: 20; SI: ; %bb.0: ; %main_body 21; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd 22; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 23; SI-NEXT: s_waitcnt lgkmcnt(0) 24; SI-NEXT: v_mov_b32_e32 v0, s4 25; SI-NEXT: v_mov_b32_e32 v1, s5 26; SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 27; SI-NEXT: s_endpgm 28; 29; GFX7-LABEL: raw_buffer_atomic_min_noret_f32: 30; GFX7: ; %bb.0: ; %main_body 31; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd 32; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 33; GFX7-NEXT: s_waitcnt lgkmcnt(0) 34; GFX7-NEXT: v_mov_b32_e32 v0, s4 35; GFX7-NEXT: v_mov_b32_e32 v1, s5 36; GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 37; GFX7-NEXT: s_endpgm 38; 39; GFX10-LABEL: raw_buffer_atomic_min_noret_f32: 40; GFX10: ; %bb.0: ; %main_body 41; GFX10-NEXT: s_clause 0x1 42; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 43; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 44; GFX10-NEXT: s_waitcnt lgkmcnt(0) 45; GFX10-NEXT: v_mov_b32_e32 v0, s2 46; GFX10-NEXT: v_mov_b32_e32 v1, s3 47; GFX10-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 0 offen 48; GFX10-NEXT: s_endpgm 49; 50; GFX1030-LABEL: raw_buffer_atomic_min_noret_f32: 51; GFX1030: ; %bb.0: ; %main_body 52; GFX1030-NEXT: s_clause 0x1 53; GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34 54; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 55; GFX1030-NEXT: s_waitcnt lgkmcnt(0) 56; GFX1030-NEXT: v_mov_b32_e32 v0, s4 57; GFX1030-NEXT: v_mov_b32_e32 v1, s5 58; GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 59; GFX1030-NEXT: s_endpgm 60; 61; GFX1100-LABEL: raw_buffer_atomic_min_noret_f32: 62; GFX1100: ; %bb.0: ; %main_body 63; GFX1100-NEXT: s_clause 0x1 64; GFX1100-NEXT: s_load_b64 s[4:5], s[0:1], 0x34 65; GFX1100-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 66; GFX1100-NEXT: s_waitcnt lgkmcnt(0) 67; GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 68; GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen 69; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 70; GFX1100-NEXT: s_endpgm 71; 72; G_SI-LABEL: raw_buffer_atomic_min_noret_f32: 73; G_SI: ; %bb.0: ; %main_body 74; G_SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd 75; G_SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 76; G_SI-NEXT: s_waitcnt lgkmcnt(0) 77; G_SI-NEXT: v_mov_b32_e32 v0, s4 78; G_SI-NEXT: v_mov_b32_e32 v1, s5 79; G_SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 80; G_SI-NEXT: s_endpgm 81; 82; G_GFX7-LABEL: raw_buffer_atomic_min_noret_f32: 83; G_GFX7: ; %bb.0: ; %main_body 84; G_GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd 85; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 86; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 87; G_GFX7-NEXT: v_mov_b32_e32 v0, s4 88; G_GFX7-NEXT: v_mov_b32_e32 v1, s5 89; G_GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 90; G_GFX7-NEXT: s_endpgm 91; 92; G_GFX10-LABEL: raw_buffer_atomic_min_noret_f32: 93; G_GFX10: ; %bb.0: ; %main_body 94; G_GFX10-NEXT: s_clause 0x1 95; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 96; G_GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 97; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 98; G_GFX10-NEXT: v_mov_b32_e32 v0, s2 99; G_GFX10-NEXT: v_mov_b32_e32 v1, s3 100; G_GFX10-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 0 offen 101; G_GFX10-NEXT: s_endpgm 102; 103; G_GFX1030-LABEL: raw_buffer_atomic_min_noret_f32: 104; G_GFX1030: ; %bb.0: ; %main_body 105; G_GFX1030-NEXT: s_clause 0x1 106; G_GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34 107; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 108; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 109; G_GFX1030-NEXT: v_mov_b32_e32 v0, s4 110; G_GFX1030-NEXT: v_mov_b32_e32 v1, s5 111; G_GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 112; G_GFX1030-NEXT: s_endpgm 113; 114; G_GFX1100-LABEL: raw_buffer_atomic_min_noret_f32: 115; G_GFX1100: ; %bb.0: ; %main_body 116; G_GFX1100-NEXT: s_clause 0x1 117; G_GFX1100-NEXT: s_load_b64 s[4:5], s[0:1], 0x34 118; G_GFX1100-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 119; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0) 120; G_GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 121; G_GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen 122; G_GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 123; G_GFX1100-NEXT: s_endpgm 124main_body: 125 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 126 ret void 127} 128 129define amdgpu_ps void @raw_buffer_atomic_min_rtn_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) { 130; SI-LABEL: raw_buffer_atomic_min_rtn_f32: 131; SI: ; %bb.0: ; %main_body 132; SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 133; SI-NEXT: s_mov_b32 s3, 0xf000 134; SI-NEXT: s_mov_b32 s2, -1 135; SI-NEXT: s_waitcnt vmcnt(0) 136; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 137; SI-NEXT: s_endpgm 138; 139; GFX7-LABEL: raw_buffer_atomic_min_rtn_f32: 140; GFX7: ; %bb.0: ; %main_body 141; GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 142; GFX7-NEXT: s_mov_b32 s3, 0xf000 143; GFX7-NEXT: s_mov_b32 s2, -1 144; GFX7-NEXT: s_waitcnt vmcnt(0) 145; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 146; GFX7-NEXT: s_endpgm 147; 148; GFX10-LABEL: raw_buffer_atomic_min_rtn_f32: 149; GFX10: ; %bb.0: ; %main_body 150; GFX10-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 151; GFX10-NEXT: s_waitcnt vmcnt(0) 152; GFX10-NEXT: global_store_dword v[0:1], v0, off 153; GFX10-NEXT: s_endpgm 154; 155; GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32: 156; GFX1030: ; %bb.0: ; %main_body 157; GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 158; GFX1030-NEXT: s_waitcnt vmcnt(0) 159; GFX1030-NEXT: global_store_dword v[0:1], v0, off 160; GFX1030-NEXT: s_endpgm 161; 162; GFX1100-LABEL: raw_buffer_atomic_min_rtn_f32: 163; GFX1100: ; %bb.0: ; %main_body 164; GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc 165; GFX1100-NEXT: s_waitcnt vmcnt(0) 166; GFX1100-NEXT: global_store_b32 v[0:1], v0, off 167; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 168; GFX1100-NEXT: s_endpgm 169; 170; G_SI-LABEL: raw_buffer_atomic_min_rtn_f32: 171; G_SI: ; %bb.0: ; %main_body 172; G_SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 173; G_SI-NEXT: s_mov_b32 s2, -1 174; G_SI-NEXT: s_mov_b32 s3, 0xf000 175; G_SI-NEXT: s_waitcnt vmcnt(0) 176; G_SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 177; G_SI-NEXT: s_endpgm 178; 179; G_GFX7-LABEL: raw_buffer_atomic_min_rtn_f32: 180; G_GFX7: ; %bb.0: ; %main_body 181; G_GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 182; G_GFX7-NEXT: s_mov_b32 s2, -1 183; G_GFX7-NEXT: s_mov_b32 s3, 0xf000 184; G_GFX7-NEXT: s_waitcnt vmcnt(0) 185; G_GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 186; G_GFX7-NEXT: s_endpgm 187; 188; G_GFX10-LABEL: raw_buffer_atomic_min_rtn_f32: 189; G_GFX10: ; %bb.0: ; %main_body 190; G_GFX10-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 191; G_GFX10-NEXT: s_waitcnt vmcnt(0) 192; G_GFX10-NEXT: global_store_dword v[0:1], v0, off 193; G_GFX10-NEXT: s_endpgm 194; 195; G_GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32: 196; G_GFX1030: ; %bb.0: ; %main_body 197; G_GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 198; G_GFX1030-NEXT: s_waitcnt vmcnt(0) 199; G_GFX1030-NEXT: global_store_dword v[0:1], v0, off 200; G_GFX1030-NEXT: s_endpgm 201; 202; G_GFX1100-LABEL: raw_buffer_atomic_min_rtn_f32: 203; G_GFX1100: ; %bb.0: ; %main_body 204; G_GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc 205; G_GFX1100-NEXT: s_waitcnt vmcnt(0) 206; G_GFX1100-NEXT: global_store_b32 v[0:1], v0, off 207; G_GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 208; G_GFX1100-NEXT: s_endpgm 209main_body: 210 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 211 store float %ret, float addrspace(1)* undef 212 ret void 213} 214 215define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f32_off4_slc(<4 x i32> inreg %rsrc, float %data, i32 %vindex, float addrspace(3)* %out) { 216; SI-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 217; SI: ; %bb.0: ; %main_body 218; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd 219; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 220; SI-NEXT: s_load_dword s0, s[0:1], 0xf 221; SI-NEXT: s_mov_b32 m0, -1 222; SI-NEXT: s_waitcnt lgkmcnt(0) 223; SI-NEXT: v_mov_b32_e32 v0, s2 224; SI-NEXT: v_mov_b32_e32 v1, s3 225; SI-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc 226; SI-NEXT: v_mov_b32_e32 v1, s0 227; SI-NEXT: s_waitcnt vmcnt(0) 228; SI-NEXT: ds_write_b32 v1, v0 229; SI-NEXT: s_endpgm 230; 231; GFX7-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 232; GFX7: ; %bb.0: ; %main_body 233; GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd 234; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 235; GFX7-NEXT: s_load_dword s0, s[0:1], 0xf 236; GFX7-NEXT: s_mov_b32 m0, -1 237; GFX7-NEXT: s_waitcnt lgkmcnt(0) 238; GFX7-NEXT: v_mov_b32_e32 v0, s2 239; GFX7-NEXT: v_mov_b32_e32 v1, s3 240; GFX7-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc 241; GFX7-NEXT: v_mov_b32_e32 v1, s0 242; GFX7-NEXT: s_waitcnt vmcnt(0) 243; GFX7-NEXT: ds_write_b32 v1, v0 244; GFX7-NEXT: s_endpgm 245; 246; GFX10-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 247; GFX10: ; %bb.0: ; %main_body 248; GFX10-NEXT: s_clause 0x1 249; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 250; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 251; GFX10-NEXT: s_waitcnt lgkmcnt(0) 252; GFX10-NEXT: v_mov_b32_e32 v0, s2 253; GFX10-NEXT: v_mov_b32_e32 v1, s3 254; GFX10-NEXT: s_load_dword s0, s[0:1], 0x3c 255; GFX10-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc 256; GFX10-NEXT: s_waitcnt lgkmcnt(0) 257; GFX10-NEXT: v_mov_b32_e32 v1, s0 258; GFX10-NEXT: s_waitcnt vmcnt(0) 259; GFX10-NEXT: ds_write_b32 v1, v0 260; GFX10-NEXT: s_endpgm 261; 262; GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 263; GFX1030: ; %bb.0: ; %main_body 264; GFX1030-NEXT: s_clause 0x2 265; GFX1030-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 266; GFX1030-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 267; GFX1030-NEXT: s_load_dword s0, s[0:1], 0x3c 268; GFX1030-NEXT: s_waitcnt lgkmcnt(0) 269; GFX1030-NEXT: v_mov_b32_e32 v0, s2 270; GFX1030-NEXT: v_mov_b32_e32 v1, s3 271; GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc 272; GFX1030-NEXT: v_mov_b32_e32 v1, s0 273; GFX1030-NEXT: s_waitcnt vmcnt(0) 274; GFX1030-NEXT: ds_write_b32 v1, v0 275; GFX1030-NEXT: s_endpgm 276; 277; GFX1100-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 278; GFX1100: ; %bb.0: ; %main_body 279; GFX1100-NEXT: s_clause 0x2 280; GFX1100-NEXT: s_load_b64 s[2:3], s[0:1], 0x34 281; GFX1100-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 282; GFX1100-NEXT: s_load_b32 s0, s[0:1], 0x3c 283; GFX1100-NEXT: s_waitcnt lgkmcnt(0) 284; GFX1100-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 285; GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[4:7], 4 offen glc slc 286; GFX1100-NEXT: v_mov_b32_e32 v1, s0 287; GFX1100-NEXT: s_waitcnt vmcnt(0) 288; GFX1100-NEXT: ds_store_b32 v1, v0 289; GFX1100-NEXT: s_endpgm 290; 291; G_SI-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 292; G_SI: ; %bb.0: ; %main_body 293; G_SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd 294; G_SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 295; G_SI-NEXT: s_load_dword s0, s[0:1], 0xf 296; G_SI-NEXT: s_mov_b32 m0, -1 297; G_SI-NEXT: s_waitcnt lgkmcnt(0) 298; G_SI-NEXT: v_mov_b32_e32 v0, s2 299; G_SI-NEXT: v_mov_b32_e32 v1, s3 300; G_SI-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc 301; G_SI-NEXT: v_mov_b32_e32 v1, s0 302; G_SI-NEXT: s_waitcnt vmcnt(0) 303; G_SI-NEXT: ds_write_b32 v1, v0 304; G_SI-NEXT: s_endpgm 305; 306; G_GFX7-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 307; G_GFX7: ; %bb.0: ; %main_body 308; G_GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd 309; G_GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 310; G_GFX7-NEXT: s_load_dword s0, s[0:1], 0xf 311; G_GFX7-NEXT: s_mov_b32 m0, -1 312; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 313; G_GFX7-NEXT: v_mov_b32_e32 v0, s2 314; G_GFX7-NEXT: v_mov_b32_e32 v1, s3 315; G_GFX7-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc 316; G_GFX7-NEXT: v_mov_b32_e32 v1, s0 317; G_GFX7-NEXT: s_waitcnt vmcnt(0) 318; G_GFX7-NEXT: ds_write_b32 v1, v0 319; G_GFX7-NEXT: s_endpgm 320; 321; G_GFX10-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 322; G_GFX10: ; %bb.0: ; %main_body 323; G_GFX10-NEXT: s_clause 0x1 324; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 325; G_GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 326; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 327; G_GFX10-NEXT: v_mov_b32_e32 v0, s2 328; G_GFX10-NEXT: v_mov_b32_e32 v1, s3 329; G_GFX10-NEXT: s_load_dword s0, s[0:1], 0x3c 330; G_GFX10-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc 331; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 332; G_GFX10-NEXT: v_mov_b32_e32 v1, s0 333; G_GFX10-NEXT: s_waitcnt vmcnt(0) 334; G_GFX10-NEXT: ds_write_b32 v1, v0 335; G_GFX10-NEXT: s_endpgm 336; 337; G_GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 338; G_GFX1030: ; %bb.0: ; %main_body 339; G_GFX1030-NEXT: s_clause 0x2 340; G_GFX1030-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 341; G_GFX1030-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 342; G_GFX1030-NEXT: s_load_dword s0, s[0:1], 0x3c 343; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 344; G_GFX1030-NEXT: v_mov_b32_e32 v0, s2 345; G_GFX1030-NEXT: v_mov_b32_e32 v1, s3 346; G_GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc 347; G_GFX1030-NEXT: v_mov_b32_e32 v1, s0 348; G_GFX1030-NEXT: s_waitcnt vmcnt(0) 349; G_GFX1030-NEXT: ds_write_b32 v1, v0 350; G_GFX1030-NEXT: s_endpgm 351; 352; G_GFX1100-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 353; G_GFX1100: ; %bb.0: ; %main_body 354; G_GFX1100-NEXT: s_clause 0x2 355; G_GFX1100-NEXT: s_load_b64 s[2:3], s[0:1], 0x34 356; G_GFX1100-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 357; G_GFX1100-NEXT: s_load_b32 s0, s[0:1], 0x3c 358; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0) 359; G_GFX1100-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 360; G_GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[4:7], 4 offen glc slc 361; G_GFX1100-NEXT: v_mov_b32_e32 v1, s0 362; G_GFX1100-NEXT: s_waitcnt vmcnt(0) 363; G_GFX1100-NEXT: ds_store_b32 v1, v0 364; G_GFX1100-NEXT: s_endpgm 365; GFX1010-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 366main_body: 367 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) 368 store float %ret, float addrspace(3)* %out, align 8 369 ret void 370} 371 372define amdgpu_kernel void @raw_buffer_atomic_max_noret_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) { 373; SI-LABEL: raw_buffer_atomic_max_noret_f32: 374; SI: ; %bb.0: ; %main_body 375; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd 376; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 377; SI-NEXT: s_waitcnt lgkmcnt(0) 378; SI-NEXT: v_mov_b32_e32 v0, s4 379; SI-NEXT: v_mov_b32_e32 v1, s5 380; SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 381; SI-NEXT: s_endpgm 382; 383; GFX7-LABEL: raw_buffer_atomic_max_noret_f32: 384; GFX7: ; %bb.0: ; %main_body 385; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd 386; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 387; GFX7-NEXT: s_waitcnt lgkmcnt(0) 388; GFX7-NEXT: v_mov_b32_e32 v0, s4 389; GFX7-NEXT: v_mov_b32_e32 v1, s5 390; GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 391; GFX7-NEXT: s_endpgm 392; 393; GFX10-LABEL: raw_buffer_atomic_max_noret_f32: 394; GFX10: ; %bb.0: ; %main_body 395; GFX10-NEXT: s_clause 0x1 396; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 397; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 398; GFX10-NEXT: s_waitcnt lgkmcnt(0) 399; GFX10-NEXT: v_mov_b32_e32 v0, s2 400; GFX10-NEXT: v_mov_b32_e32 v1, s3 401; GFX10-NEXT: buffer_atomic_fmax v0, v1, s[4:7], 0 offen 402; GFX10-NEXT: s_endpgm 403; 404; GFX1030-LABEL: raw_buffer_atomic_max_noret_f32: 405; GFX1030: ; %bb.0: ; %main_body 406; GFX1030-NEXT: s_clause 0x1 407; GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34 408; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 409; GFX1030-NEXT: s_waitcnt lgkmcnt(0) 410; GFX1030-NEXT: v_mov_b32_e32 v0, s4 411; GFX1030-NEXT: v_mov_b32_e32 v1, s5 412; GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 413; GFX1030-NEXT: s_endpgm 414; 415; GFX1100-LABEL: raw_buffer_atomic_max_noret_f32: 416; GFX1100: ; %bb.0: ; %main_body 417; GFX1100-NEXT: s_clause 0x1 418; GFX1100-NEXT: s_load_b64 s[4:5], s[0:1], 0x34 419; GFX1100-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 420; GFX1100-NEXT: s_waitcnt lgkmcnt(0) 421; GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 422; GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen 423; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 424; GFX1100-NEXT: s_endpgm 425; 426; G_SI-LABEL: raw_buffer_atomic_max_noret_f32: 427; G_SI: ; %bb.0: ; %main_body 428; G_SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd 429; G_SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 430; G_SI-NEXT: s_waitcnt lgkmcnt(0) 431; G_SI-NEXT: v_mov_b32_e32 v0, s4 432; G_SI-NEXT: v_mov_b32_e32 v1, s5 433; G_SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 434; G_SI-NEXT: s_endpgm 435; 436; G_GFX7-LABEL: raw_buffer_atomic_max_noret_f32: 437; G_GFX7: ; %bb.0: ; %main_body 438; G_GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd 439; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 440; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 441; G_GFX7-NEXT: v_mov_b32_e32 v0, s4 442; G_GFX7-NEXT: v_mov_b32_e32 v1, s5 443; G_GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 444; G_GFX7-NEXT: s_endpgm 445; 446; G_GFX10-LABEL: raw_buffer_atomic_max_noret_f32: 447; G_GFX10: ; %bb.0: ; %main_body 448; G_GFX10-NEXT: s_clause 0x1 449; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 450; G_GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 451; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 452; G_GFX10-NEXT: v_mov_b32_e32 v0, s2 453; G_GFX10-NEXT: v_mov_b32_e32 v1, s3 454; G_GFX10-NEXT: buffer_atomic_fmax v0, v1, s[4:7], 0 offen 455; G_GFX10-NEXT: s_endpgm 456; 457; G_GFX1030-LABEL: raw_buffer_atomic_max_noret_f32: 458; G_GFX1030: ; %bb.0: ; %main_body 459; G_GFX1030-NEXT: s_clause 0x1 460; G_GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34 461; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 462; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 463; G_GFX1030-NEXT: v_mov_b32_e32 v0, s4 464; G_GFX1030-NEXT: v_mov_b32_e32 v1, s5 465; G_GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 466; G_GFX1030-NEXT: s_endpgm 467; 468; G_GFX1100-LABEL: raw_buffer_atomic_max_noret_f32: 469; G_GFX1100: ; %bb.0: ; %main_body 470; G_GFX1100-NEXT: s_clause 0x1 471; G_GFX1100-NEXT: s_load_b64 s[4:5], s[0:1], 0x34 472; G_GFX1100-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 473; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0) 474; G_GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 475; G_GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen 476; G_GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 477; G_GFX1100-NEXT: s_endpgm 478main_body: 479 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 480 ret void 481} 482 483define amdgpu_ps void @raw_buffer_atomic_max_rtn_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) { 484; SI-LABEL: raw_buffer_atomic_max_rtn_f32: 485; SI: ; %bb.0: ; %main_body 486; SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 487; SI-NEXT: s_mov_b32 s3, 0xf000 488; SI-NEXT: s_mov_b32 s2, -1 489; SI-NEXT: s_waitcnt vmcnt(0) 490; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 491; SI-NEXT: s_endpgm 492; 493; GFX7-LABEL: raw_buffer_atomic_max_rtn_f32: 494; GFX7: ; %bb.0: ; %main_body 495; GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 496; GFX7-NEXT: s_mov_b32 s3, 0xf000 497; GFX7-NEXT: s_mov_b32 s2, -1 498; GFX7-NEXT: s_waitcnt vmcnt(0) 499; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 500; GFX7-NEXT: s_endpgm 501; 502; GFX10-LABEL: raw_buffer_atomic_max_rtn_f32: 503; GFX10: ; %bb.0: ; %main_body 504; GFX10-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 505; GFX10-NEXT: s_waitcnt vmcnt(0) 506; GFX10-NEXT: global_store_dword v[0:1], v0, off 507; GFX10-NEXT: s_endpgm 508; 509; GFX1030-LABEL: raw_buffer_atomic_max_rtn_f32: 510; GFX1030: ; %bb.0: ; %main_body 511; GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 512; GFX1030-NEXT: s_waitcnt vmcnt(0) 513; GFX1030-NEXT: global_store_dword v[0:1], v0, off 514; GFX1030-NEXT: s_endpgm 515; 516; GFX1100-LABEL: raw_buffer_atomic_max_rtn_f32: 517; GFX1100: ; %bb.0: ; %main_body 518; GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen glc 519; GFX1100-NEXT: s_waitcnt vmcnt(0) 520; GFX1100-NEXT: global_store_b32 v[0:1], v0, off 521; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 522; GFX1100-NEXT: s_endpgm 523; 524; G_SI-LABEL: raw_buffer_atomic_max_rtn_f32: 525; G_SI: ; %bb.0: ; %main_body 526; G_SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 527; G_SI-NEXT: s_mov_b32 s2, -1 528; G_SI-NEXT: s_mov_b32 s3, 0xf000 529; G_SI-NEXT: s_waitcnt vmcnt(0) 530; G_SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 531; G_SI-NEXT: s_endpgm 532; 533; G_GFX7-LABEL: raw_buffer_atomic_max_rtn_f32: 534; G_GFX7: ; %bb.0: ; %main_body 535; G_GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 536; G_GFX7-NEXT: s_mov_b32 s2, -1 537; G_GFX7-NEXT: s_mov_b32 s3, 0xf000 538; G_GFX7-NEXT: s_waitcnt vmcnt(0) 539; G_GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 540; G_GFX7-NEXT: s_endpgm 541; 542; G_GFX10-LABEL: raw_buffer_atomic_max_rtn_f32: 543; G_GFX10: ; %bb.0: ; %main_body 544; G_GFX10-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 545; G_GFX10-NEXT: s_waitcnt vmcnt(0) 546; G_GFX10-NEXT: global_store_dword v[0:1], v0, off 547; G_GFX10-NEXT: s_endpgm 548; 549; G_GFX1030-LABEL: raw_buffer_atomic_max_rtn_f32: 550; G_GFX1030: ; %bb.0: ; %main_body 551; G_GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 552; G_GFX1030-NEXT: s_waitcnt vmcnt(0) 553; G_GFX1030-NEXT: global_store_dword v[0:1], v0, off 554; G_GFX1030-NEXT: s_endpgm 555; 556; G_GFX1100-LABEL: raw_buffer_atomic_max_rtn_f32: 557; G_GFX1100: ; %bb.0: ; %main_body 558; G_GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen glc 559; G_GFX1100-NEXT: s_waitcnt vmcnt(0) 560; G_GFX1100-NEXT: global_store_b32 v[0:1], v0, off 561; G_GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 562; G_GFX1100-NEXT: s_endpgm 563main_body: 564 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 565 store float %ret, float addrspace(1)* undef 566 ret void 567} 568 569define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f32_off4_slc(<4 x i32> inreg %rsrc, float %data, i32 %vindex, float addrspace(1)* %out) { 570; SI-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 571; SI: ; %bb.0: ; %main_body 572; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd 573; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 574; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xf 575; SI-NEXT: s_waitcnt lgkmcnt(0) 576; SI-NEXT: v_mov_b32_e32 v0, s2 577; SI-NEXT: v_mov_b32_e32 v1, s3 578; SI-NEXT: buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc 579; SI-NEXT: s_mov_b32 s3, 0xf000 580; SI-NEXT: s_mov_b32 s2, -1 581; SI-NEXT: s_waitcnt vmcnt(0) 582; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 583; SI-NEXT: s_endpgm 584; 585; GFX7-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 586; GFX7: ; %bb.0: ; %main_body 587; GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd 588; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 589; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xf 590; GFX7-NEXT: s_waitcnt lgkmcnt(0) 591; GFX7-NEXT: v_mov_b32_e32 v0, s2 592; GFX7-NEXT: v_mov_b32_e32 v1, s3 593; GFX7-NEXT: buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc 594; GFX7-NEXT: s_mov_b32 s3, 0xf000 595; GFX7-NEXT: s_mov_b32 s2, -1 596; GFX7-NEXT: s_waitcnt vmcnt(0) 597; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 598; GFX7-NEXT: s_endpgm 599; 600; GFX10-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 601; GFX10: ; %bb.0: ; %main_body 602; GFX10-NEXT: s_clause 0x1 603; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 604; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 605; GFX10-NEXT: s_waitcnt lgkmcnt(0) 606; GFX10-NEXT: v_mov_b32_e32 v0, s2 607; GFX10-NEXT: v_mov_b32_e32 v1, s3 608; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c 609; GFX10-NEXT: buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc 610; GFX10-NEXT: v_mov_b32_e32 v1, 0 611; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 612; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 613; GFX10-NEXT: s_endpgm 614; 615; GFX1030-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 616; GFX1030: ; %bb.0: ; %main_body 617; GFX1030-NEXT: s_clause 0x2 618; GFX1030-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 619; GFX1030-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 620; GFX1030-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c 621; GFX1030-NEXT: s_waitcnt lgkmcnt(0) 622; GFX1030-NEXT: v_mov_b32_e32 v0, s2 623; GFX1030-NEXT: v_mov_b32_e32 v1, s3 624; GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc 625; GFX1030-NEXT: v_mov_b32_e32 v1, 0 626; GFX1030-NEXT: s_waitcnt vmcnt(0) 627; GFX1030-NEXT: global_store_dword v1, v0, s[0:1] 628; GFX1030-NEXT: s_endpgm 629; 630; GFX1100-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 631; GFX1100: ; %bb.0: ; %main_body 632; GFX1100-NEXT: s_clause 0x2 633; GFX1100-NEXT: s_load_b64 s[2:3], s[0:1], 0x34 634; GFX1100-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 635; GFX1100-NEXT: s_load_b64 s[0:1], s[0:1], 0x3c 636; GFX1100-NEXT: s_waitcnt lgkmcnt(0) 637; GFX1100-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 638; GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[4:7], 4 offen glc slc 639; GFX1100-NEXT: v_mov_b32_e32 v1, 0 640; GFX1100-NEXT: s_waitcnt vmcnt(0) 641; GFX1100-NEXT: global_store_b32 v1, v0, s[0:1] 642; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 643; GFX1100-NEXT: s_endpgm 644; 645; G_SI-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 646; G_SI: ; %bb.0: ; %main_body 647; G_SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd 648; G_SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 649; G_SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xf 650; G_SI-NEXT: s_waitcnt lgkmcnt(0) 651; G_SI-NEXT: v_mov_b32_e32 v0, s2 652; G_SI-NEXT: v_mov_b32_e32 v1, s3 653; G_SI-NEXT: buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc 654; G_SI-NEXT: s_mov_b32 s2, -1 655; G_SI-NEXT: s_mov_b32 s3, 0xf000 656; G_SI-NEXT: s_waitcnt vmcnt(0) 657; G_SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 658; G_SI-NEXT: s_endpgm 659; 660; G_GFX7-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 661; G_GFX7: ; %bb.0: ; %main_body 662; G_GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd 663; G_GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 664; G_GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xf 665; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 666; G_GFX7-NEXT: v_mov_b32_e32 v0, s2 667; G_GFX7-NEXT: v_mov_b32_e32 v1, s3 668; G_GFX7-NEXT: buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc 669; G_GFX7-NEXT: s_mov_b32 s2, -1 670; G_GFX7-NEXT: s_mov_b32 s3, 0xf000 671; G_GFX7-NEXT: s_waitcnt vmcnt(0) 672; G_GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 673; G_GFX7-NEXT: s_endpgm 674; 675; G_GFX10-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 676; G_GFX10: ; %bb.0: ; %main_body 677; G_GFX10-NEXT: s_clause 0x1 678; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 679; G_GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 680; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 681; G_GFX10-NEXT: v_mov_b32_e32 v0, s2 682; G_GFX10-NEXT: v_mov_b32_e32 v1, s3 683; G_GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c 684; G_GFX10-NEXT: buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc 685; G_GFX10-NEXT: v_mov_b32_e32 v1, 0 686; G_GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 687; G_GFX10-NEXT: global_store_dword v1, v0, s[0:1] 688; G_GFX10-NEXT: s_endpgm 689; 690; G_GFX1030-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 691; G_GFX1030: ; %bb.0: ; %main_body 692; G_GFX1030-NEXT: s_clause 0x2 693; G_GFX1030-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 694; G_GFX1030-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 695; G_GFX1030-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c 696; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 697; G_GFX1030-NEXT: v_mov_b32_e32 v0, s2 698; G_GFX1030-NEXT: v_mov_b32_e32 v1, s3 699; G_GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc 700; G_GFX1030-NEXT: v_mov_b32_e32 v1, 0 701; G_GFX1030-NEXT: s_waitcnt vmcnt(0) 702; G_GFX1030-NEXT: global_store_dword v1, v0, s[0:1] 703; G_GFX1030-NEXT: s_endpgm 704; 705; G_GFX1100-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 706; G_GFX1100: ; %bb.0: ; %main_body 707; G_GFX1100-NEXT: s_clause 0x2 708; G_GFX1100-NEXT: s_load_b64 s[2:3], s[0:1], 0x34 709; G_GFX1100-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 710; G_GFX1100-NEXT: s_load_b64 s[0:1], s[0:1], 0x3c 711; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0) 712; G_GFX1100-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 713; G_GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[4:7], 4 offen glc slc 714; G_GFX1100-NEXT: v_mov_b32_e32 v1, 0 715; G_GFX1100-NEXT: s_waitcnt vmcnt(0) 716; G_GFX1100-NEXT: global_store_b32 v1, v0, s[0:1] 717; G_GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 718; G_GFX1100-NEXT: s_endpgm 719main_body: 720 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) 721 store float %ret, float addrspace(1)* %out, align 8 722 ret void 723} 724