1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s 4; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 5; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 6 7define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 8; VERDE-LABEL: sample_1d: 9; VERDE: ; %bb.0: ; %main_body 10; VERDE-NEXT: s_mov_b64 s[12:13], exec 11; VERDE-NEXT: s_wqm_b64 exec, exec 12; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 13; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf 14; VERDE-NEXT: s_waitcnt vmcnt(0) 15; VERDE-NEXT: ; return to shader part epilog 16; 17; GFX6789-LABEL: sample_1d: 18; GFX6789: ; %bb.0: ; %main_body 19; GFX6789-NEXT: s_mov_b64 s[12:13], exec 20; GFX6789-NEXT: s_wqm_b64 exec, exec 21; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 22; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf 23; GFX6789-NEXT: s_waitcnt vmcnt(0) 24; GFX6789-NEXT: ; return to shader part epilog 25; 26; GFX10PLUS-LABEL: sample_1d: 27; GFX10PLUS: ; %bb.0: ; %main_body 28; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 29; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 30; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 31; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 32; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 33; GFX10PLUS-NEXT: ; return to shader part epilog 34main_body: 35 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 36 ret <4 x float> %v 37} 38 39define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { 40; VERDE-LABEL: sample_1d_tfe: 41; VERDE: ; %bb.0: ; %main_body 42; VERDE-NEXT: s_mov_b64 s[14:15], exec 43; VERDE-NEXT: s_wqm_b64 exec, exec 44; VERDE-NEXT: v_mov_b32_e32 v5, v0 45; VERDE-NEXT: v_mov_b32_e32 v0, 0 46; VERDE-NEXT: v_mov_b32_e32 v1, v0 47; VERDE-NEXT: v_mov_b32_e32 v2, v0 48; VERDE-NEXT: v_mov_b32_e32 v3, v0 49; VERDE-NEXT: v_mov_b32_e32 v4, v0 50; VERDE-NEXT: s_and_b64 exec, exec, s[14:15] 51; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe 52; VERDE-NEXT: s_mov_b32 s15, 0xf000 53; VERDE-NEXT: s_mov_b32 s14, -1 54; VERDE-NEXT: s_waitcnt vmcnt(0) 55; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0 56; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 57; VERDE-NEXT: ; return to shader part epilog 58; 59; GFX6789-LABEL: sample_1d_tfe: 60; GFX6789: ; %bb.0: ; %main_body 61; GFX6789-NEXT: s_mov_b64 s[14:15], exec 62; GFX6789-NEXT: s_wqm_b64 exec, exec 63; GFX6789-NEXT: v_mov_b32_e32 v6, 0 64; GFX6789-NEXT: v_mov_b32_e32 v5, v0 65; GFX6789-NEXT: v_mov_b32_e32 v7, v6 66; GFX6789-NEXT: v_mov_b32_e32 v8, v6 67; GFX6789-NEXT: v_mov_b32_e32 v9, v6 68; GFX6789-NEXT: v_mov_b32_e32 v10, v6 69; GFX6789-NEXT: v_mov_b32_e32 v0, v6 70; GFX6789-NEXT: v_mov_b32_e32 v1, v7 71; GFX6789-NEXT: v_mov_b32_e32 v2, v8 72; GFX6789-NEXT: v_mov_b32_e32 v3, v9 73; GFX6789-NEXT: v_mov_b32_e32 v4, v10 74; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15] 75; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe 76; GFX6789-NEXT: s_waitcnt vmcnt(0) 77; GFX6789-NEXT: global_store_dword v6, v4, s[12:13] 78; GFX6789-NEXT: s_waitcnt vmcnt(0) 79; GFX6789-NEXT: ; return to shader part epilog 80; 81; GFX10-LABEL: sample_1d_tfe: 82; GFX10: ; %bb.0: ; %main_body 83; GFX10-NEXT: s_mov_b32 s14, exec_lo 84; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 85; GFX10-NEXT: v_mov_b32_e32 v6, 0 86; GFX10-NEXT: v_mov_b32_e32 v5, v0 87; GFX10-NEXT: v_mov_b32_e32 v7, v6 88; GFX10-NEXT: v_mov_b32_e32 v8, v6 89; GFX10-NEXT: v_mov_b32_e32 v9, v6 90; GFX10-NEXT: v_mov_b32_e32 v10, v6 91; GFX10-NEXT: v_mov_b32_e32 v0, v6 92; GFX10-NEXT: v_mov_b32_e32 v1, v7 93; GFX10-NEXT: v_mov_b32_e32 v2, v8 94; GFX10-NEXT: v_mov_b32_e32 v3, v9 95; GFX10-NEXT: v_mov_b32_e32 v4, v10 96; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 97; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe 98; GFX10-NEXT: s_waitcnt vmcnt(0) 99; GFX10-NEXT: global_store_dword v6, v4, s[12:13] 100; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 101; GFX10-NEXT: ; return to shader part epilog 102; 103; GFX11-LABEL: sample_1d_tfe: 104; GFX11: ; %bb.0: ; %main_body 105; GFX11-NEXT: s_mov_b32 s14, exec_lo 106; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo 107; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0 108; GFX11-NEXT: v_mov_b32_e32 v9, v6 109; GFX11-NEXT: v_mov_b32_e32 v10, v6 110; GFX11-NEXT: v_mov_b32_e32 v8, v6 111; GFX11-NEXT: v_mov_b32_e32 v7, v6 112; GFX11-NEXT: v_mov_b32_e32 v0, v6 113; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v1, v7 114; GFX11-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v10 115; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s14 116; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe 117; GFX11-NEXT: s_waitcnt vmcnt(0) 118; GFX11-NEXT: global_store_b32 v6, v4, s[12:13] 119; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 120; GFX11-NEXT: ; return to shader part epilog 121main_body: 122 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 123 %v.vec = extractvalue {<4 x float>, i32} %v, 0 124 %v.err = extractvalue {<4 x float>, i32} %v, 1 125 store i32 %v.err, i32 addrspace(1)* %out, align 4 126 ret <4 x float> %v.vec 127} 128 129define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { 130; VERDE-LABEL: sample_1d_tfe_adjust_writemask_1: 131; VERDE: ; %bb.0: ; %main_body 132; VERDE-NEXT: s_mov_b64 s[12:13], exec 133; VERDE-NEXT: s_wqm_b64 exec, exec 134; VERDE-NEXT: v_mov_b32_e32 v2, v0 135; VERDE-NEXT: v_mov_b32_e32 v0, 0 136; VERDE-NEXT: v_mov_b32_e32 v1, v0 137; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 138; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe 139; VERDE-NEXT: s_waitcnt vmcnt(0) 140; VERDE-NEXT: ; return to shader part epilog 141; 142; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_1: 143; GFX6789: ; %bb.0: ; %main_body 144; GFX6789-NEXT: s_mov_b64 s[12:13], exec 145; GFX6789-NEXT: s_wqm_b64 exec, exec 146; GFX6789-NEXT: v_mov_b32_e32 v2, v0 147; GFX6789-NEXT: v_mov_b32_e32 v0, 0 148; GFX6789-NEXT: v_mov_b32_e32 v1, v0 149; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 150; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe 151; GFX6789-NEXT: s_waitcnt vmcnt(0) 152; GFX6789-NEXT: ; return to shader part epilog 153; 154; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_1: 155; GFX10PLUS: ; %bb.0: ; %main_body 156; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 157; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 158; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 159; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 160; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 161; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 162; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe 163; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 164; GFX10PLUS-NEXT: ; return to shader part epilog 165main_body: 166 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 167 %res.vec = extractvalue {<4 x float>,i32} %v, 0 168 %res.f = extractelement <4 x float> %res.vec, i32 0 169 %res.err = extractvalue {<4 x float>,i32} %v, 1 170 %res.errf = bitcast i32 %res.err to float 171 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0 172 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1 173 ret <2 x float> %res 174} 175 176define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 177; VERDE-LABEL: sample_1d_tfe_adjust_writemask_2: 178; VERDE: ; %bb.0: ; %main_body 179; VERDE-NEXT: s_mov_b64 s[12:13], exec 180; VERDE-NEXT: s_wqm_b64 exec, exec 181; VERDE-NEXT: v_mov_b32_e32 v2, v0 182; VERDE-NEXT: v_mov_b32_e32 v0, 0 183; VERDE-NEXT: v_mov_b32_e32 v1, v0 184; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 185; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe 186; VERDE-NEXT: s_waitcnt vmcnt(0) 187; VERDE-NEXT: ; return to shader part epilog 188; 189; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_2: 190; GFX6789: ; %bb.0: ; %main_body 191; GFX6789-NEXT: s_mov_b64 s[12:13], exec 192; GFX6789-NEXT: s_wqm_b64 exec, exec 193; GFX6789-NEXT: v_mov_b32_e32 v2, v0 194; GFX6789-NEXT: v_mov_b32_e32 v0, 0 195; GFX6789-NEXT: v_mov_b32_e32 v1, v0 196; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 197; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe 198; GFX6789-NEXT: s_waitcnt vmcnt(0) 199; GFX6789-NEXT: ; return to shader part epilog 200; 201; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_2: 202; GFX10PLUS: ; %bb.0: ; %main_body 203; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 204; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 205; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 206; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 207; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 208; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 209; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe 210; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 211; GFX10PLUS-NEXT: ; return to shader part epilog 212main_body: 213 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 214 %res.vec = extractvalue {<4 x float>,i32} %v, 0 215 %res.f = extractelement <4 x float> %res.vec, i32 1 216 %res.err = extractvalue {<4 x float>,i32} %v, 1 217 %res.errf = bitcast i32 %res.err to float 218 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0 219 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1 220 ret <2 x float> %res 221} 222 223define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 224; VERDE-LABEL: sample_1d_tfe_adjust_writemask_3: 225; VERDE: ; %bb.0: ; %main_body 226; VERDE-NEXT: s_mov_b64 s[12:13], exec 227; VERDE-NEXT: s_wqm_b64 exec, exec 228; VERDE-NEXT: v_mov_b32_e32 v2, v0 229; VERDE-NEXT: v_mov_b32_e32 v0, 0 230; VERDE-NEXT: v_mov_b32_e32 v1, v0 231; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 232; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe 233; VERDE-NEXT: s_waitcnt vmcnt(0) 234; VERDE-NEXT: ; return to shader part epilog 235; 236; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_3: 237; GFX6789: ; %bb.0: ; %main_body 238; GFX6789-NEXT: s_mov_b64 s[12:13], exec 239; GFX6789-NEXT: s_wqm_b64 exec, exec 240; GFX6789-NEXT: v_mov_b32_e32 v2, v0 241; GFX6789-NEXT: v_mov_b32_e32 v0, 0 242; GFX6789-NEXT: v_mov_b32_e32 v1, v0 243; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 244; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe 245; GFX6789-NEXT: s_waitcnt vmcnt(0) 246; GFX6789-NEXT: ; return to shader part epilog 247; 248; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_3: 249; GFX10PLUS: ; %bb.0: ; %main_body 250; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 251; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 252; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 253; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 254; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 255; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 256; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe 257; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 258; GFX10PLUS-NEXT: ; return to shader part epilog 259main_body: 260 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 261 %res.vec = extractvalue {<4 x float>,i32} %v, 0 262 %res.f = extractelement <4 x float> %res.vec, i32 2 263 %res.err = extractvalue {<4 x float>,i32} %v, 1 264 %res.errf = bitcast i32 %res.err to float 265 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0 266 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1 267 ret <2 x float> %res 268} 269 270define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 271; VERDE-LABEL: sample_1d_tfe_adjust_writemask_4: 272; VERDE: ; %bb.0: ; %main_body 273; VERDE-NEXT: s_mov_b64 s[12:13], exec 274; VERDE-NEXT: s_wqm_b64 exec, exec 275; VERDE-NEXT: v_mov_b32_e32 v2, v0 276; VERDE-NEXT: v_mov_b32_e32 v0, 0 277; VERDE-NEXT: v_mov_b32_e32 v1, v0 278; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 279; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe 280; VERDE-NEXT: s_waitcnt vmcnt(0) 281; VERDE-NEXT: ; return to shader part epilog 282; 283; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_4: 284; GFX6789: ; %bb.0: ; %main_body 285; GFX6789-NEXT: s_mov_b64 s[12:13], exec 286; GFX6789-NEXT: s_wqm_b64 exec, exec 287; GFX6789-NEXT: v_mov_b32_e32 v2, v0 288; GFX6789-NEXT: v_mov_b32_e32 v0, 0 289; GFX6789-NEXT: v_mov_b32_e32 v1, v0 290; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 291; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe 292; GFX6789-NEXT: s_waitcnt vmcnt(0) 293; GFX6789-NEXT: ; return to shader part epilog 294; 295; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_4: 296; GFX10PLUS: ; %bb.0: ; %main_body 297; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 298; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 299; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 300; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 301; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 302; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 303; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe 304; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 305; GFX10PLUS-NEXT: ; return to shader part epilog 306main_body: 307 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 308 %res.vec = extractvalue {<4 x float>,i32} %v, 0 309 %res.f = extractelement <4 x float> %res.vec, i32 3 310 %res.err = extractvalue {<4 x float>,i32} %v, 1 311 %res.errf = bitcast i32 %res.err to float 312 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0 313 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1 314 ret <2 x float> %res 315} 316 317define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 318; VERDE-LABEL: sample_1d_tfe_adjust_writemask_12: 319; VERDE: ; %bb.0: ; %main_body 320; VERDE-NEXT: s_mov_b64 s[12:13], exec 321; VERDE-NEXT: s_wqm_b64 exec, exec 322; VERDE-NEXT: v_mov_b32_e32 v3, v0 323; VERDE-NEXT: v_mov_b32_e32 v0, 0 324; VERDE-NEXT: v_mov_b32_e32 v1, v0 325; VERDE-NEXT: v_mov_b32_e32 v2, v0 326; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 327; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe 328; VERDE-NEXT: s_waitcnt vmcnt(0) 329; VERDE-NEXT: ; return to shader part epilog 330; 331; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_12: 332; GFX6789: ; %bb.0: ; %main_body 333; GFX6789-NEXT: s_mov_b64 s[12:13], exec 334; GFX6789-NEXT: s_wqm_b64 exec, exec 335; GFX6789-NEXT: v_mov_b32_e32 v3, v0 336; GFX6789-NEXT: v_mov_b32_e32 v0, 0 337; GFX6789-NEXT: v_mov_b32_e32 v1, v0 338; GFX6789-NEXT: v_mov_b32_e32 v2, v0 339; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 340; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe 341; GFX6789-NEXT: s_waitcnt vmcnt(0) 342; GFX6789-NEXT: ; return to shader part epilog 343; 344; GFX10-LABEL: sample_1d_tfe_adjust_writemask_12: 345; GFX10: ; %bb.0: ; %main_body 346; GFX10-NEXT: s_mov_b32 s12, exec_lo 347; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 348; GFX10-NEXT: v_mov_b32_e32 v3, v0 349; GFX10-NEXT: v_mov_b32_e32 v0, 0 350; GFX10-NEXT: v_mov_b32_e32 v1, v0 351; GFX10-NEXT: v_mov_b32_e32 v2, v0 352; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 353; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe 354; GFX10-NEXT: s_waitcnt vmcnt(0) 355; GFX10-NEXT: ; return to shader part epilog 356; 357; GFX11-LABEL: sample_1d_tfe_adjust_writemask_12: 358; GFX11: ; %bb.0: ; %main_body 359; GFX11-NEXT: s_mov_b32 s12, exec_lo 360; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo 361; GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0 362; GFX11-NEXT: v_mov_b32_e32 v1, v0 363; GFX11-NEXT: v_mov_b32_e32 v2, v0 364; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 365; GFX11-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe 366; GFX11-NEXT: s_waitcnt vmcnt(0) 367; GFX11-NEXT: ; return to shader part epilog 368main_body: 369 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 370 %res.vec = extractvalue {<4 x float>,i32} %v, 0 371 %res.f1 = extractelement <4 x float> %res.vec, i32 0 372 %res.f2 = extractelement <4 x float> %res.vec, i32 1 373 %res.err = extractvalue {<4 x float>,i32} %v, 1 374 %res.errf = bitcast i32 %res.err to float 375 %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0 376 %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1 377 %res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2 378 ret <4 x float> %res 379} 380 381define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 382; VERDE-LABEL: sample_1d_tfe_adjust_writemask_24: 383; VERDE: ; %bb.0: ; %main_body 384; VERDE-NEXT: s_mov_b64 s[12:13], exec 385; VERDE-NEXT: s_wqm_b64 exec, exec 386; VERDE-NEXT: v_mov_b32_e32 v3, v0 387; VERDE-NEXT: v_mov_b32_e32 v0, 0 388; VERDE-NEXT: v_mov_b32_e32 v1, v0 389; VERDE-NEXT: v_mov_b32_e32 v2, v0 390; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 391; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe 392; VERDE-NEXT: s_waitcnt vmcnt(0) 393; VERDE-NEXT: ; return to shader part epilog 394; 395; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_24: 396; GFX6789: ; %bb.0: ; %main_body 397; GFX6789-NEXT: s_mov_b64 s[12:13], exec 398; GFX6789-NEXT: s_wqm_b64 exec, exec 399; GFX6789-NEXT: v_mov_b32_e32 v3, v0 400; GFX6789-NEXT: v_mov_b32_e32 v0, 0 401; GFX6789-NEXT: v_mov_b32_e32 v1, v0 402; GFX6789-NEXT: v_mov_b32_e32 v2, v0 403; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 404; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe 405; GFX6789-NEXT: s_waitcnt vmcnt(0) 406; GFX6789-NEXT: ; return to shader part epilog 407; 408; GFX10-LABEL: sample_1d_tfe_adjust_writemask_24: 409; GFX10: ; %bb.0: ; %main_body 410; GFX10-NEXT: s_mov_b32 s12, exec_lo 411; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 412; GFX10-NEXT: v_mov_b32_e32 v3, v0 413; GFX10-NEXT: v_mov_b32_e32 v0, 0 414; GFX10-NEXT: v_mov_b32_e32 v1, v0 415; GFX10-NEXT: v_mov_b32_e32 v2, v0 416; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 417; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe 418; GFX10-NEXT: s_waitcnt vmcnt(0) 419; GFX10-NEXT: ; return to shader part epilog 420; 421; GFX11-LABEL: sample_1d_tfe_adjust_writemask_24: 422; GFX11: ; %bb.0: ; %main_body 423; GFX11-NEXT: s_mov_b32 s12, exec_lo 424; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo 425; GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0 426; GFX11-NEXT: v_mov_b32_e32 v1, v0 427; GFX11-NEXT: v_mov_b32_e32 v2, v0 428; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 429; GFX11-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe 430; GFX11-NEXT: s_waitcnt vmcnt(0) 431; GFX11-NEXT: ; return to shader part epilog 432main_body: 433 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 434 %res.vec = extractvalue {<4 x float>,i32} %v, 0 435 %res.f1 = extractelement <4 x float> %res.vec, i32 1 436 %res.f2 = extractelement <4 x float> %res.vec, i32 3 437 %res.err = extractvalue {<4 x float>,i32} %v, 1 438 %res.errf = bitcast i32 %res.err to float 439 %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0 440 %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1 441 %res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2 442 ret <4 x float> %res 443} 444 445define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 446; VERDE-LABEL: sample_1d_tfe_adjust_writemask_134: 447; VERDE: ; %bb.0: ; %main_body 448; VERDE-NEXT: s_mov_b64 s[12:13], exec 449; VERDE-NEXT: s_wqm_b64 exec, exec 450; VERDE-NEXT: v_mov_b32_e32 v4, v0 451; VERDE-NEXT: v_mov_b32_e32 v0, 0 452; VERDE-NEXT: v_mov_b32_e32 v1, v0 453; VERDE-NEXT: v_mov_b32_e32 v2, v0 454; VERDE-NEXT: v_mov_b32_e32 v3, v0 455; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 456; VERDE-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe 457; VERDE-NEXT: s_waitcnt vmcnt(0) 458; VERDE-NEXT: ; return to shader part epilog 459; 460; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_134: 461; GFX6789: ; %bb.0: ; %main_body 462; GFX6789-NEXT: s_mov_b64 s[12:13], exec 463; GFX6789-NEXT: s_wqm_b64 exec, exec 464; GFX6789-NEXT: v_mov_b32_e32 v4, v0 465; GFX6789-NEXT: v_mov_b32_e32 v0, 0 466; GFX6789-NEXT: v_mov_b32_e32 v1, v0 467; GFX6789-NEXT: v_mov_b32_e32 v2, v0 468; GFX6789-NEXT: v_mov_b32_e32 v3, v0 469; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 470; GFX6789-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe 471; GFX6789-NEXT: s_waitcnt vmcnt(0) 472; GFX6789-NEXT: ; return to shader part epilog 473; 474; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_134: 475; GFX10PLUS: ; %bb.0: ; %main_body 476; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 477; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 478; GFX10PLUS-NEXT: v_mov_b32_e32 v4, v0 479; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 480; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 481; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 482; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v0 483; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 484; GFX10PLUS-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe 485; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 486; GFX10PLUS-NEXT: ; return to shader part epilog 487main_body: 488 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 489 %res.vec = extractvalue {<4 x float>,i32} %v, 0 490 %res.f1 = extractelement <4 x float> %res.vec, i32 0 491 %res.f2 = extractelement <4 x float> %res.vec, i32 2 492 %res.f3 = extractelement <4 x float> %res.vec, i32 3 493 %res.err = extractvalue {<4 x float>,i32} %v, 1 494 %res.errf = bitcast i32 %res.err to float 495 %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0 496 %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1 497 %res.tmp3 = insertelement <4 x float> %res.tmp2, float %res.f3, i32 2 498 %res = insertelement <4 x float> %res.tmp3, float %res.errf, i32 3 499 ret <4 x float> %res 500} 501 502define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { 503; VERDE-LABEL: sample_1d_lwe: 504; VERDE: ; %bb.0: ; %main_body 505; VERDE-NEXT: s_mov_b64 s[14:15], exec 506; VERDE-NEXT: s_wqm_b64 exec, exec 507; VERDE-NEXT: v_mov_b32_e32 v5, v0 508; VERDE-NEXT: v_mov_b32_e32 v0, 0 509; VERDE-NEXT: v_mov_b32_e32 v1, v0 510; VERDE-NEXT: v_mov_b32_e32 v2, v0 511; VERDE-NEXT: v_mov_b32_e32 v3, v0 512; VERDE-NEXT: v_mov_b32_e32 v4, v0 513; VERDE-NEXT: s_and_b64 exec, exec, s[14:15] 514; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe 515; VERDE-NEXT: s_mov_b32 s15, 0xf000 516; VERDE-NEXT: s_mov_b32 s14, -1 517; VERDE-NEXT: s_waitcnt vmcnt(0) 518; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0 519; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 520; VERDE-NEXT: ; return to shader part epilog 521; 522; GFX6789-LABEL: sample_1d_lwe: 523; GFX6789: ; %bb.0: ; %main_body 524; GFX6789-NEXT: s_mov_b64 s[14:15], exec 525; GFX6789-NEXT: s_wqm_b64 exec, exec 526; GFX6789-NEXT: v_mov_b32_e32 v6, 0 527; GFX6789-NEXT: v_mov_b32_e32 v5, v0 528; GFX6789-NEXT: v_mov_b32_e32 v7, v6 529; GFX6789-NEXT: v_mov_b32_e32 v8, v6 530; GFX6789-NEXT: v_mov_b32_e32 v9, v6 531; GFX6789-NEXT: v_mov_b32_e32 v10, v6 532; GFX6789-NEXT: v_mov_b32_e32 v0, v6 533; GFX6789-NEXT: v_mov_b32_e32 v1, v7 534; GFX6789-NEXT: v_mov_b32_e32 v2, v8 535; GFX6789-NEXT: v_mov_b32_e32 v3, v9 536; GFX6789-NEXT: v_mov_b32_e32 v4, v10 537; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15] 538; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe 539; GFX6789-NEXT: s_waitcnt vmcnt(0) 540; GFX6789-NEXT: global_store_dword v6, v4, s[12:13] 541; GFX6789-NEXT: s_waitcnt vmcnt(0) 542; GFX6789-NEXT: ; return to shader part epilog 543; 544; GFX10-LABEL: sample_1d_lwe: 545; GFX10: ; %bb.0: ; %main_body 546; GFX10-NEXT: s_mov_b32 s14, exec_lo 547; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 548; GFX10-NEXT: v_mov_b32_e32 v6, 0 549; GFX10-NEXT: v_mov_b32_e32 v5, v0 550; GFX10-NEXT: v_mov_b32_e32 v7, v6 551; GFX10-NEXT: v_mov_b32_e32 v8, v6 552; GFX10-NEXT: v_mov_b32_e32 v9, v6 553; GFX10-NEXT: v_mov_b32_e32 v10, v6 554; GFX10-NEXT: v_mov_b32_e32 v0, v6 555; GFX10-NEXT: v_mov_b32_e32 v1, v7 556; GFX10-NEXT: v_mov_b32_e32 v2, v8 557; GFX10-NEXT: v_mov_b32_e32 v3, v9 558; GFX10-NEXT: v_mov_b32_e32 v4, v10 559; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 560; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe 561; GFX10-NEXT: s_waitcnt vmcnt(0) 562; GFX10-NEXT: global_store_dword v6, v4, s[12:13] 563; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 564; GFX10-NEXT: ; return to shader part epilog 565; 566; GFX11-LABEL: sample_1d_lwe: 567; GFX11: ; %bb.0: ; %main_body 568; GFX11-NEXT: s_mov_b32 s14, exec_lo 569; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo 570; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0 571; GFX11-NEXT: v_mov_b32_e32 v9, v6 572; GFX11-NEXT: v_mov_b32_e32 v10, v6 573; GFX11-NEXT: v_mov_b32_e32 v8, v6 574; GFX11-NEXT: v_mov_b32_e32 v7, v6 575; GFX11-NEXT: v_mov_b32_e32 v0, v6 576; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v1, v7 577; GFX11-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v10 578; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s14 579; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe 580; GFX11-NEXT: s_waitcnt vmcnt(0) 581; GFX11-NEXT: global_store_b32 v6, v4, s[12:13] 582; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 583; GFX11-NEXT: ; return to shader part epilog 584main_body: 585 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0) 586 %v.vec = extractvalue {<4 x float>, i32} %v, 0 587 %v.err = extractvalue {<4 x float>, i32} %v, 1 588 store i32 %v.err, i32 addrspace(1)* %out, align 4 589 ret <4 x float> %v.vec 590} 591 592define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 593; VERDE-LABEL: sample_2d: 594; VERDE: ; %bb.0: ; %main_body 595; VERDE-NEXT: s_mov_b64 s[12:13], exec 596; VERDE-NEXT: s_wqm_b64 exec, exec 597; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 598; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 599; VERDE-NEXT: s_waitcnt vmcnt(0) 600; VERDE-NEXT: ; return to shader part epilog 601; 602; GFX6789-LABEL: sample_2d: 603; GFX6789: ; %bb.0: ; %main_body 604; GFX6789-NEXT: s_mov_b64 s[12:13], exec 605; GFX6789-NEXT: s_wqm_b64 exec, exec 606; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 607; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 608; GFX6789-NEXT: s_waitcnt vmcnt(0) 609; GFX6789-NEXT: ; return to shader part epilog 610; 611; GFX10PLUS-LABEL: sample_2d: 612; GFX10PLUS: ; %bb.0: ; %main_body 613; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 614; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 615; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 616; GFX10PLUS-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 617; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 618; GFX10PLUS-NEXT: ; return to shader part epilog 619main_body: 620 %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 621 ret <4 x float> %v 622} 623 624define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) { 625; VERDE-LABEL: sample_3d: 626; VERDE: ; %bb.0: ; %main_body 627; VERDE-NEXT: s_mov_b64 s[12:13], exec 628; VERDE-NEXT: s_wqm_b64 exec, exec 629; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 630; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 631; VERDE-NEXT: s_waitcnt vmcnt(0) 632; VERDE-NEXT: ; return to shader part epilog 633; 634; GFX6789-LABEL: sample_3d: 635; GFX6789: ; %bb.0: ; %main_body 636; GFX6789-NEXT: s_mov_b64 s[12:13], exec 637; GFX6789-NEXT: s_wqm_b64 exec, exec 638; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 639; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 640; GFX6789-NEXT: s_waitcnt vmcnt(0) 641; GFX6789-NEXT: ; return to shader part epilog 642; 643; GFX10PLUS-LABEL: sample_3d: 644; GFX10PLUS: ; %bb.0: ; %main_body 645; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 646; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 647; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 648; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 649; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 650; GFX10PLUS-NEXT: ; return to shader part epilog 651main_body: 652 %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 653 ret <4 x float> %v 654} 655 656define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) { 657; VERDE-LABEL: sample_cube: 658; VERDE: ; %bb.0: ; %main_body 659; VERDE-NEXT: s_mov_b64 s[12:13], exec 660; VERDE-NEXT: s_wqm_b64 exec, exec 661; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 662; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da 663; VERDE-NEXT: s_waitcnt vmcnt(0) 664; VERDE-NEXT: ; return to shader part epilog 665; 666; GFX6789-LABEL: sample_cube: 667; GFX6789: ; %bb.0: ; %main_body 668; GFX6789-NEXT: s_mov_b64 s[12:13], exec 669; GFX6789-NEXT: s_wqm_b64 exec, exec 670; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 671; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da 672; GFX6789-NEXT: s_waitcnt vmcnt(0) 673; GFX6789-NEXT: ; return to shader part epilog 674; 675; GFX10PLUS-LABEL: sample_cube: 676; GFX10PLUS: ; %bb.0: ; %main_body 677; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 678; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 679; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 680; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE 681; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 682; GFX10PLUS-NEXT: ; return to shader part epilog 683main_body: 684 %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 685 ret <4 x float> %v 686} 687 688define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) { 689; VERDE-LABEL: sample_1darray: 690; VERDE: ; %bb.0: ; %main_body 691; VERDE-NEXT: s_mov_b64 s[12:13], exec 692; VERDE-NEXT: s_wqm_b64 exec, exec 693; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 694; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da 695; VERDE-NEXT: s_waitcnt vmcnt(0) 696; VERDE-NEXT: ; return to shader part epilog 697; 698; GFX6789-LABEL: sample_1darray: 699; GFX6789: ; %bb.0: ; %main_body 700; GFX6789-NEXT: s_mov_b64 s[12:13], exec 701; GFX6789-NEXT: s_wqm_b64 exec, exec 702; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 703; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da 704; GFX6789-NEXT: s_waitcnt vmcnt(0) 705; GFX6789-NEXT: ; return to shader part epilog 706; 707; GFX10PLUS-LABEL: sample_1darray: 708; GFX10PLUS: ; %bb.0: ; %main_body 709; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 710; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 711; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 712; GFX10PLUS-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY 713; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 714; GFX10PLUS-NEXT: ; return to shader part epilog 715main_body: 716 %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 717 ret <4 x float> %v 718} 719 720define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) { 721; VERDE-LABEL: sample_2darray: 722; VERDE: ; %bb.0: ; %main_body 723; VERDE-NEXT: s_mov_b64 s[12:13], exec 724; VERDE-NEXT: s_wqm_b64 exec, exec 725; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 726; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da 727; VERDE-NEXT: s_waitcnt vmcnt(0) 728; VERDE-NEXT: ; return to shader part epilog 729; 730; GFX6789-LABEL: sample_2darray: 731; GFX6789: ; %bb.0: ; %main_body 732; GFX6789-NEXT: s_mov_b64 s[12:13], exec 733; GFX6789-NEXT: s_wqm_b64 exec, exec 734; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 735; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da 736; GFX6789-NEXT: s_waitcnt vmcnt(0) 737; GFX6789-NEXT: ; return to shader part epilog 738; 739; GFX10PLUS-LABEL: sample_2darray: 740; GFX10PLUS: ; %bb.0: ; %main_body 741; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 742; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 743; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 744; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY 745; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 746; GFX10PLUS-NEXT: ; return to shader part epilog 747main_body: 748 %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 749 ret <4 x float> %v 750} 751 752define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { 753; VERDE-LABEL: sample_c_1d: 754; VERDE: ; %bb.0: ; %main_body 755; VERDE-NEXT: s_mov_b64 s[12:13], exec 756; VERDE-NEXT: s_wqm_b64 exec, exec 757; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 758; VERDE-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 759; VERDE-NEXT: s_waitcnt vmcnt(0) 760; VERDE-NEXT: ; return to shader part epilog 761; 762; GFX6789-LABEL: sample_c_1d: 763; GFX6789: ; %bb.0: ; %main_body 764; GFX6789-NEXT: s_mov_b64 s[12:13], exec 765; GFX6789-NEXT: s_wqm_b64 exec, exec 766; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 767; GFX6789-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 768; GFX6789-NEXT: s_waitcnt vmcnt(0) 769; GFX6789-NEXT: ; return to shader part epilog 770; 771; GFX10PLUS-LABEL: sample_c_1d: 772; GFX10PLUS: ; %bb.0: ; %main_body 773; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 774; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 775; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 776; GFX10PLUS-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 777; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 778; GFX10PLUS-NEXT: ; return to shader part epilog 779main_body: 780 %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 781 ret <4 x float> %v 782} 783 784define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { 785; VERDE-LABEL: sample_c_2d: 786; VERDE: ; %bb.0: ; %main_body 787; VERDE-NEXT: s_mov_b64 s[12:13], exec 788; VERDE-NEXT: s_wqm_b64 exec, exec 789; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 790; VERDE-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 791; VERDE-NEXT: s_waitcnt vmcnt(0) 792; VERDE-NEXT: ; return to shader part epilog 793; 794; GFX6789-LABEL: sample_c_2d: 795; GFX6789: ; %bb.0: ; %main_body 796; GFX6789-NEXT: s_mov_b64 s[12:13], exec 797; GFX6789-NEXT: s_wqm_b64 exec, exec 798; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 799; GFX6789-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 800; GFX6789-NEXT: s_waitcnt vmcnt(0) 801; GFX6789-NEXT: ; return to shader part epilog 802; 803; GFX10PLUS-LABEL: sample_c_2d: 804; GFX10PLUS: ; %bb.0: ; %main_body 805; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 806; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 807; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 808; GFX10PLUS-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 809; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 810; GFX10PLUS-NEXT: ; return to shader part epilog 811main_body: 812 %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 813 ret <4 x float> %v 814} 815 816define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) { 817; VERDE-LABEL: sample_cl_1d: 818; VERDE: ; %bb.0: ; %main_body 819; VERDE-NEXT: s_mov_b64 s[12:13], exec 820; VERDE-NEXT: s_wqm_b64 exec, exec 821; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 822; VERDE-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 823; VERDE-NEXT: s_waitcnt vmcnt(0) 824; VERDE-NEXT: ; return to shader part epilog 825; 826; GFX6789-LABEL: sample_cl_1d: 827; GFX6789: ; %bb.0: ; %main_body 828; GFX6789-NEXT: s_mov_b64 s[12:13], exec 829; GFX6789-NEXT: s_wqm_b64 exec, exec 830; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 831; GFX6789-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 832; GFX6789-NEXT: s_waitcnt vmcnt(0) 833; GFX6789-NEXT: ; return to shader part epilog 834; 835; GFX10PLUS-LABEL: sample_cl_1d: 836; GFX10PLUS: ; %bb.0: ; %main_body 837; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 838; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 839; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 840; GFX10PLUS-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 841; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 842; GFX10PLUS-NEXT: ; return to shader part epilog 843main_body: 844 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 845 ret <4 x float> %v 846} 847 848define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) { 849; VERDE-LABEL: sample_cl_2d: 850; VERDE: ; %bb.0: ; %main_body 851; VERDE-NEXT: s_mov_b64 s[12:13], exec 852; VERDE-NEXT: s_wqm_b64 exec, exec 853; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 854; VERDE-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 855; VERDE-NEXT: s_waitcnt vmcnt(0) 856; VERDE-NEXT: ; return to shader part epilog 857; 858; GFX6789-LABEL: sample_cl_2d: 859; GFX6789: ; %bb.0: ; %main_body 860; GFX6789-NEXT: s_mov_b64 s[12:13], exec 861; GFX6789-NEXT: s_wqm_b64 exec, exec 862; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 863; GFX6789-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 864; GFX6789-NEXT: s_waitcnt vmcnt(0) 865; GFX6789-NEXT: ; return to shader part epilog 866; 867; GFX10PLUS-LABEL: sample_cl_2d: 868; GFX10PLUS: ; %bb.0: ; %main_body 869; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 870; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 871; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 872; GFX10PLUS-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 873; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 874; GFX10PLUS-NEXT: ; return to shader part epilog 875main_body: 876 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 877 ret <4 x float> %v 878} 879 880define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) { 881; VERDE-LABEL: sample_c_cl_1d: 882; VERDE: ; %bb.0: ; %main_body 883; VERDE-NEXT: s_mov_b64 s[12:13], exec 884; VERDE-NEXT: s_wqm_b64 exec, exec 885; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 886; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 887; VERDE-NEXT: s_waitcnt vmcnt(0) 888; VERDE-NEXT: ; return to shader part epilog 889; 890; GFX6789-LABEL: sample_c_cl_1d: 891; GFX6789: ; %bb.0: ; %main_body 892; GFX6789-NEXT: s_mov_b64 s[12:13], exec 893; GFX6789-NEXT: s_wqm_b64 exec, exec 894; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 895; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 896; GFX6789-NEXT: s_waitcnt vmcnt(0) 897; GFX6789-NEXT: ; return to shader part epilog 898; 899; GFX10PLUS-LABEL: sample_c_cl_1d: 900; GFX10PLUS: ; %bb.0: ; %main_body 901; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 902; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 903; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 904; GFX10PLUS-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 905; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 906; GFX10PLUS-NEXT: ; return to shader part epilog 907main_body: 908 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 909 ret <4 x float> %v 910} 911 912define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) { 913; VERDE-LABEL: sample_c_cl_2d: 914; VERDE: ; %bb.0: ; %main_body 915; VERDE-NEXT: s_mov_b64 s[12:13], exec 916; VERDE-NEXT: s_wqm_b64 exec, exec 917; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 918; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 919; VERDE-NEXT: s_waitcnt vmcnt(0) 920; VERDE-NEXT: ; return to shader part epilog 921; 922; GFX6789-LABEL: sample_c_cl_2d: 923; GFX6789: ; %bb.0: ; %main_body 924; GFX6789-NEXT: s_mov_b64 s[12:13], exec 925; GFX6789-NEXT: s_wqm_b64 exec, exec 926; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 927; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 928; GFX6789-NEXT: s_waitcnt vmcnt(0) 929; GFX6789-NEXT: ; return to shader part epilog 930; 931; GFX10PLUS-LABEL: sample_c_cl_2d: 932; GFX10PLUS: ; %bb.0: ; %main_body 933; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 934; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 935; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 936; GFX10PLUS-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 937; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 938; GFX10PLUS-NEXT: ; return to shader part epilog 939main_body: 940 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 941 ret <4 x float> %v 942} 943 944define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) { 945; VERDE-LABEL: sample_b_1d: 946; VERDE: ; %bb.0: ; %main_body 947; VERDE-NEXT: s_mov_b64 s[12:13], exec 948; VERDE-NEXT: s_wqm_b64 exec, exec 949; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 950; VERDE-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 951; VERDE-NEXT: s_waitcnt vmcnt(0) 952; VERDE-NEXT: ; return to shader part epilog 953; 954; GFX6789-LABEL: sample_b_1d: 955; GFX6789: ; %bb.0: ; %main_body 956; GFX6789-NEXT: s_mov_b64 s[12:13], exec 957; GFX6789-NEXT: s_wqm_b64 exec, exec 958; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 959; GFX6789-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 960; GFX6789-NEXT: s_waitcnt vmcnt(0) 961; GFX6789-NEXT: ; return to shader part epilog 962; 963; GFX10PLUS-LABEL: sample_b_1d: 964; GFX10PLUS: ; %bb.0: ; %main_body 965; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 966; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 967; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 968; GFX10PLUS-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 969; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 970; GFX10PLUS-NEXT: ; return to shader part epilog 971main_body: 972 %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 973 ret <4 x float> %v 974} 975 976define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { 977; VERDE-LABEL: sample_b_2d: 978; VERDE: ; %bb.0: ; %main_body 979; VERDE-NEXT: s_mov_b64 s[12:13], exec 980; VERDE-NEXT: s_wqm_b64 exec, exec 981; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 982; VERDE-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 983; VERDE-NEXT: s_waitcnt vmcnt(0) 984; VERDE-NEXT: ; return to shader part epilog 985; 986; GFX6789-LABEL: sample_b_2d: 987; GFX6789: ; %bb.0: ; %main_body 988; GFX6789-NEXT: s_mov_b64 s[12:13], exec 989; GFX6789-NEXT: s_wqm_b64 exec, exec 990; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 991; GFX6789-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 992; GFX6789-NEXT: s_waitcnt vmcnt(0) 993; GFX6789-NEXT: ; return to shader part epilog 994; 995; GFX10PLUS-LABEL: sample_b_2d: 996; GFX10PLUS: ; %bb.0: ; %main_body 997; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 998; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 999; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1000; GFX10PLUS-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1001; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1002; GFX10PLUS-NEXT: ; return to shader part epilog 1003main_body: 1004 %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1005 ret <4 x float> %v 1006} 1007 1008define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) { 1009; VERDE-LABEL: sample_c_b_1d: 1010; VERDE: ; %bb.0: ; %main_body 1011; VERDE-NEXT: s_mov_b64 s[12:13], exec 1012; VERDE-NEXT: s_wqm_b64 exec, exec 1013; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1014; VERDE-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1015; VERDE-NEXT: s_waitcnt vmcnt(0) 1016; VERDE-NEXT: ; return to shader part epilog 1017; 1018; GFX6789-LABEL: sample_c_b_1d: 1019; GFX6789: ; %bb.0: ; %main_body 1020; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1021; GFX6789-NEXT: s_wqm_b64 exec, exec 1022; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1023; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1024; GFX6789-NEXT: s_waitcnt vmcnt(0) 1025; GFX6789-NEXT: ; return to shader part epilog 1026; 1027; GFX10PLUS-LABEL: sample_c_b_1d: 1028; GFX10PLUS: ; %bb.0: ; %main_body 1029; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1030; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1031; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1032; GFX10PLUS-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1033; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1034; GFX10PLUS-NEXT: ; return to shader part epilog 1035main_body: 1036 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1037 ret <4 x float> %v 1038} 1039 1040define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) { 1041; VERDE-LABEL: sample_c_b_2d: 1042; VERDE: ; %bb.0: ; %main_body 1043; VERDE-NEXT: s_mov_b64 s[12:13], exec 1044; VERDE-NEXT: s_wqm_b64 exec, exec 1045; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1046; VERDE-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1047; VERDE-NEXT: s_waitcnt vmcnt(0) 1048; VERDE-NEXT: ; return to shader part epilog 1049; 1050; GFX6789-LABEL: sample_c_b_2d: 1051; GFX6789: ; %bb.0: ; %main_body 1052; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1053; GFX6789-NEXT: s_wqm_b64 exec, exec 1054; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1055; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1056; GFX6789-NEXT: s_waitcnt vmcnt(0) 1057; GFX6789-NEXT: ; return to shader part epilog 1058; 1059; GFX10PLUS-LABEL: sample_c_b_2d: 1060; GFX10PLUS: ; %bb.0: ; %main_body 1061; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1062; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1063; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1064; GFX10PLUS-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1065; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1066; GFX10PLUS-NEXT: ; return to shader part epilog 1067main_body: 1068 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1069 ret <4 x float> %v 1070} 1071 1072define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) { 1073; VERDE-LABEL: sample_b_cl_1d: 1074; VERDE: ; %bb.0: ; %main_body 1075; VERDE-NEXT: s_mov_b64 s[12:13], exec 1076; VERDE-NEXT: s_wqm_b64 exec, exec 1077; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1078; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1079; VERDE-NEXT: s_waitcnt vmcnt(0) 1080; VERDE-NEXT: ; return to shader part epilog 1081; 1082; GFX6789-LABEL: sample_b_cl_1d: 1083; GFX6789: ; %bb.0: ; %main_body 1084; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1085; GFX6789-NEXT: s_wqm_b64 exec, exec 1086; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1087; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1088; GFX6789-NEXT: s_waitcnt vmcnt(0) 1089; GFX6789-NEXT: ; return to shader part epilog 1090; 1091; GFX10PLUS-LABEL: sample_b_cl_1d: 1092; GFX10PLUS: ; %bb.0: ; %main_body 1093; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1094; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1095; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1096; GFX10PLUS-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1097; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1098; GFX10PLUS-NEXT: ; return to shader part epilog 1099main_body: 1100 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1101 ret <4 x float> %v 1102} 1103 1104define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) { 1105; VERDE-LABEL: sample_b_cl_2d: 1106; VERDE: ; %bb.0: ; %main_body 1107; VERDE-NEXT: s_mov_b64 s[12:13], exec 1108; VERDE-NEXT: s_wqm_b64 exec, exec 1109; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1110; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1111; VERDE-NEXT: s_waitcnt vmcnt(0) 1112; VERDE-NEXT: ; return to shader part epilog 1113; 1114; GFX6789-LABEL: sample_b_cl_2d: 1115; GFX6789: ; %bb.0: ; %main_body 1116; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1117; GFX6789-NEXT: s_wqm_b64 exec, exec 1118; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1119; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1120; GFX6789-NEXT: s_waitcnt vmcnt(0) 1121; GFX6789-NEXT: ; return to shader part epilog 1122; 1123; GFX10PLUS-LABEL: sample_b_cl_2d: 1124; GFX10PLUS: ; %bb.0: ; %main_body 1125; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1126; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1127; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1128; GFX10PLUS-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1129; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1130; GFX10PLUS-NEXT: ; return to shader part epilog 1131main_body: 1132 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1133 ret <4 x float> %v 1134} 1135 1136define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) { 1137; VERDE-LABEL: sample_c_b_cl_1d: 1138; VERDE: ; %bb.0: ; %main_body 1139; VERDE-NEXT: s_mov_b64 s[12:13], exec 1140; VERDE-NEXT: s_wqm_b64 exec, exec 1141; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1142; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1143; VERDE-NEXT: s_waitcnt vmcnt(0) 1144; VERDE-NEXT: ; return to shader part epilog 1145; 1146; GFX6789-LABEL: sample_c_b_cl_1d: 1147; GFX6789: ; %bb.0: ; %main_body 1148; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1149; GFX6789-NEXT: s_wqm_b64 exec, exec 1150; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1151; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1152; GFX6789-NEXT: s_waitcnt vmcnt(0) 1153; GFX6789-NEXT: ; return to shader part epilog 1154; 1155; GFX10PLUS-LABEL: sample_c_b_cl_1d: 1156; GFX10PLUS: ; %bb.0: ; %main_body 1157; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1158; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1159; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1160; GFX10PLUS-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1161; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1162; GFX10PLUS-NEXT: ; return to shader part epilog 1163main_body: 1164 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1165 ret <4 x float> %v 1166} 1167 1168define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { 1169; VERDE-LABEL: sample_c_b_cl_2d: 1170; VERDE: ; %bb.0: ; %main_body 1171; VERDE-NEXT: s_mov_b64 s[12:13], exec 1172; VERDE-NEXT: s_wqm_b64 exec, exec 1173; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1174; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf 1175; VERDE-NEXT: s_waitcnt vmcnt(0) 1176; VERDE-NEXT: ; return to shader part epilog 1177; 1178; GFX6789-LABEL: sample_c_b_cl_2d: 1179; GFX6789: ; %bb.0: ; %main_body 1180; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1181; GFX6789-NEXT: s_wqm_b64 exec, exec 1182; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1183; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf 1184; GFX6789-NEXT: s_waitcnt vmcnt(0) 1185; GFX6789-NEXT: ; return to shader part epilog 1186; 1187; GFX10PLUS-LABEL: sample_c_b_cl_2d: 1188; GFX10PLUS: ; %bb.0: ; %main_body 1189; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1190; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1191; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1192; GFX10PLUS-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1193; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1194; GFX10PLUS-NEXT: ; return to shader part epilog 1195main_body: 1196 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1197 ret <4 x float> %v 1198} 1199 1200define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { 1201; VERDE-LABEL: sample_d_1d: 1202; VERDE: ; %bb.0: ; %main_body 1203; VERDE-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1204; VERDE-NEXT: s_waitcnt vmcnt(0) 1205; VERDE-NEXT: ; return to shader part epilog 1206; 1207; GFX6789-LABEL: sample_d_1d: 1208; GFX6789: ; %bb.0: ; %main_body 1209; GFX6789-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1210; GFX6789-NEXT: s_waitcnt vmcnt(0) 1211; GFX6789-NEXT: ; return to shader part epilog 1212; 1213; GFX10PLUS-LABEL: sample_d_1d: 1214; GFX10PLUS: ; %bb.0: ; %main_body 1215; GFX10PLUS-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1216; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1217; GFX10PLUS-NEXT: ; return to shader part epilog 1218main_body: 1219 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1220 ret <4 x float> %v 1221} 1222 1223define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { 1224; VERDE-LABEL: sample_d_2d: 1225; VERDE: ; %bb.0: ; %main_body 1226; VERDE-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf 1227; VERDE-NEXT: s_waitcnt vmcnt(0) 1228; VERDE-NEXT: ; return to shader part epilog 1229; 1230; GFX6789-LABEL: sample_d_2d: 1231; GFX6789: ; %bb.0: ; %main_body 1232; GFX6789-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf 1233; GFX6789-NEXT: s_waitcnt vmcnt(0) 1234; GFX6789-NEXT: ; return to shader part epilog 1235; 1236; GFX10PLUS-LABEL: sample_d_2d: 1237; GFX10PLUS: ; %bb.0: ; %main_body 1238; GFX10PLUS-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1239; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1240; GFX10PLUS-NEXT: ; return to shader part epilog 1241main_body: 1242 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1243 ret <4 x float> %v 1244} 1245 1246define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { 1247; VERDE-LABEL: sample_c_d_1d: 1248; VERDE: ; %bb.0: ; %main_body 1249; VERDE-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1250; VERDE-NEXT: s_waitcnt vmcnt(0) 1251; VERDE-NEXT: ; return to shader part epilog 1252; 1253; GFX6789-LABEL: sample_c_d_1d: 1254; GFX6789: ; %bb.0: ; %main_body 1255; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1256; GFX6789-NEXT: s_waitcnt vmcnt(0) 1257; GFX6789-NEXT: ; return to shader part epilog 1258; 1259; GFX10PLUS-LABEL: sample_c_d_1d: 1260; GFX10PLUS: ; %bb.0: ; %main_body 1261; GFX10PLUS-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1262; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1263; GFX10PLUS-NEXT: ; return to shader part epilog 1264main_body: 1265 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1266 ret <4 x float> %v 1267} 1268 1269define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { 1270; VERDE-LABEL: sample_c_d_2d: 1271; VERDE: ; %bb.0: ; %main_body 1272; VERDE-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf 1273; VERDE-NEXT: s_waitcnt vmcnt(0) 1274; VERDE-NEXT: ; return to shader part epilog 1275; 1276; GFX6789-LABEL: sample_c_d_2d: 1277; GFX6789: ; %bb.0: ; %main_body 1278; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf 1279; GFX6789-NEXT: s_waitcnt vmcnt(0) 1280; GFX6789-NEXT: ; return to shader part epilog 1281; 1282; GFX10PLUS-LABEL: sample_c_d_2d: 1283; GFX10PLUS: ; %bb.0: ; %main_body 1284; GFX10PLUS-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1285; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1286; GFX10PLUS-NEXT: ; return to shader part epilog 1287main_body: 1288 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1289 ret <4 x float> %v 1290} 1291 1292define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { 1293; VERDE-LABEL: sample_d_cl_1d: 1294; VERDE: ; %bb.0: ; %main_body 1295; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1296; VERDE-NEXT: s_waitcnt vmcnt(0) 1297; VERDE-NEXT: ; return to shader part epilog 1298; 1299; GFX6789-LABEL: sample_d_cl_1d: 1300; GFX6789: ; %bb.0: ; %main_body 1301; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1302; GFX6789-NEXT: s_waitcnt vmcnt(0) 1303; GFX6789-NEXT: ; return to shader part epilog 1304; 1305; GFX10PLUS-LABEL: sample_d_cl_1d: 1306; GFX10PLUS: ; %bb.0: ; %main_body 1307; GFX10PLUS-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1308; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1309; GFX10PLUS-NEXT: ; return to shader part epilog 1310main_body: 1311 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1312 ret <4 x float> %v 1313} 1314 1315define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { 1316; VERDE-LABEL: sample_d_cl_2d: 1317; VERDE: ; %bb.0: ; %main_body 1318; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf 1319; VERDE-NEXT: s_waitcnt vmcnt(0) 1320; VERDE-NEXT: ; return to shader part epilog 1321; 1322; GFX6789-LABEL: sample_d_cl_2d: 1323; GFX6789: ; %bb.0: ; %main_body 1324; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf 1325; GFX6789-NEXT: s_waitcnt vmcnt(0) 1326; GFX6789-NEXT: ; return to shader part epilog 1327; 1328; GFX10PLUS-LABEL: sample_d_cl_2d: 1329; GFX10PLUS: ; %bb.0: ; %main_body 1330; GFX10PLUS-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1331; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1332; GFX10PLUS-NEXT: ; return to shader part epilog 1333main_body: 1334 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1335 ret <4 x float> %v 1336} 1337 1338define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { 1339; VERDE-LABEL: sample_c_d_cl_1d: 1340; VERDE: ; %bb.0: ; %main_body 1341; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf 1342; VERDE-NEXT: s_waitcnt vmcnt(0) 1343; VERDE-NEXT: ; return to shader part epilog 1344; 1345; GFX6789-LABEL: sample_c_d_cl_1d: 1346; GFX6789: ; %bb.0: ; %main_body 1347; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf 1348; GFX6789-NEXT: s_waitcnt vmcnt(0) 1349; GFX6789-NEXT: ; return to shader part epilog 1350; 1351; GFX10PLUS-LABEL: sample_c_d_cl_1d: 1352; GFX10PLUS: ; %bb.0: ; %main_body 1353; GFX10PLUS-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1354; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1355; GFX10PLUS-NEXT: ; return to shader part epilog 1356main_body: 1357 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1358 ret <4 x float> %v 1359} 1360 1361define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { 1362; VERDE-LABEL: sample_c_d_cl_2d: 1363; VERDE: ; %bb.0: ; %main_body 1364; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf 1365; VERDE-NEXT: s_waitcnt vmcnt(0) 1366; VERDE-NEXT: ; return to shader part epilog 1367; 1368; GFX6789-LABEL: sample_c_d_cl_2d: 1369; GFX6789: ; %bb.0: ; %main_body 1370; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf 1371; GFX6789-NEXT: s_waitcnt vmcnt(0) 1372; GFX6789-NEXT: ; return to shader part epilog 1373; 1374; GFX10PLUS-LABEL: sample_c_d_cl_2d: 1375; GFX10PLUS: ; %bb.0: ; %main_body 1376; GFX10PLUS-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1377; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1378; GFX10PLUS-NEXT: ; return to shader part epilog 1379main_body: 1380 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1381 ret <4 x float> %v 1382} 1383 1384define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { 1385; VERDE-LABEL: sample_l_1d: 1386; VERDE: ; %bb.0: ; %main_body 1387; VERDE-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1388; VERDE-NEXT: s_waitcnt vmcnt(0) 1389; VERDE-NEXT: ; return to shader part epilog 1390; 1391; GFX6789-LABEL: sample_l_1d: 1392; GFX6789: ; %bb.0: ; %main_body 1393; GFX6789-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1394; GFX6789-NEXT: s_waitcnt vmcnt(0) 1395; GFX6789-NEXT: ; return to shader part epilog 1396; 1397; GFX10PLUS-LABEL: sample_l_1d: 1398; GFX10PLUS: ; %bb.0: ; %main_body 1399; GFX10PLUS-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1400; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1401; GFX10PLUS-NEXT: ; return to shader part epilog 1402main_body: 1403 %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1404 ret <4 x float> %v 1405} 1406 1407define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { 1408; VERDE-LABEL: sample_l_2d: 1409; VERDE: ; %bb.0: ; %main_body 1410; VERDE-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1411; VERDE-NEXT: s_waitcnt vmcnt(0) 1412; VERDE-NEXT: ; return to shader part epilog 1413; 1414; GFX6789-LABEL: sample_l_2d: 1415; GFX6789: ; %bb.0: ; %main_body 1416; GFX6789-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1417; GFX6789-NEXT: s_waitcnt vmcnt(0) 1418; GFX6789-NEXT: ; return to shader part epilog 1419; 1420; GFX10PLUS-LABEL: sample_l_2d: 1421; GFX10PLUS: ; %bb.0: ; %main_body 1422; GFX10PLUS-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1423; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1424; GFX10PLUS-NEXT: ; return to shader part epilog 1425main_body: 1426 %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1427 ret <4 x float> %v 1428} 1429 1430define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { 1431; VERDE-LABEL: sample_c_l_1d: 1432; VERDE: ; %bb.0: ; %main_body 1433; VERDE-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1434; VERDE-NEXT: s_waitcnt vmcnt(0) 1435; VERDE-NEXT: ; return to shader part epilog 1436; 1437; GFX6789-LABEL: sample_c_l_1d: 1438; GFX6789: ; %bb.0: ; %main_body 1439; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1440; GFX6789-NEXT: s_waitcnt vmcnt(0) 1441; GFX6789-NEXT: ; return to shader part epilog 1442; 1443; GFX10PLUS-LABEL: sample_c_l_1d: 1444; GFX10PLUS: ; %bb.0: ; %main_body 1445; GFX10PLUS-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1446; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1447; GFX10PLUS-NEXT: ; return to shader part epilog 1448main_body: 1449 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1450 ret <4 x float> %v 1451} 1452 1453define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { 1454; VERDE-LABEL: sample_c_l_2d: 1455; VERDE: ; %bb.0: ; %main_body 1456; VERDE-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1457; VERDE-NEXT: s_waitcnt vmcnt(0) 1458; VERDE-NEXT: ; return to shader part epilog 1459; 1460; GFX6789-LABEL: sample_c_l_2d: 1461; GFX6789: ; %bb.0: ; %main_body 1462; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1463; GFX6789-NEXT: s_waitcnt vmcnt(0) 1464; GFX6789-NEXT: ; return to shader part epilog 1465; 1466; GFX10PLUS-LABEL: sample_c_l_2d: 1467; GFX10PLUS: ; %bb.0: ; %main_body 1468; GFX10PLUS-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1469; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1470; GFX10PLUS-NEXT: ; return to shader part epilog 1471main_body: 1472 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1473 ret <4 x float> %v 1474} 1475 1476define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1477; VERDE-LABEL: sample_lz_1d: 1478; VERDE: ; %bb.0: ; %main_body 1479; VERDE-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf 1480; VERDE-NEXT: s_waitcnt vmcnt(0) 1481; VERDE-NEXT: ; return to shader part epilog 1482; 1483; GFX6789-LABEL: sample_lz_1d: 1484; GFX6789: ; %bb.0: ; %main_body 1485; GFX6789-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf 1486; GFX6789-NEXT: s_waitcnt vmcnt(0) 1487; GFX6789-NEXT: ; return to shader part epilog 1488; 1489; GFX10PLUS-LABEL: sample_lz_1d: 1490; GFX10PLUS: ; %bb.0: ; %main_body 1491; GFX10PLUS-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1492; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1493; GFX10PLUS-NEXT: ; return to shader part epilog 1494main_body: 1495 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1496 ret <4 x float> %v 1497} 1498 1499define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 1500; VERDE-LABEL: sample_lz_2d: 1501; VERDE: ; %bb.0: ; %main_body 1502; VERDE-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1503; VERDE-NEXT: s_waitcnt vmcnt(0) 1504; VERDE-NEXT: ; return to shader part epilog 1505; 1506; GFX6789-LABEL: sample_lz_2d: 1507; GFX6789: ; %bb.0: ; %main_body 1508; GFX6789-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1509; GFX6789-NEXT: s_waitcnt vmcnt(0) 1510; GFX6789-NEXT: ; return to shader part epilog 1511; 1512; GFX10PLUS-LABEL: sample_lz_2d: 1513; GFX10PLUS: ; %bb.0: ; %main_body 1514; GFX10PLUS-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1515; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1516; GFX10PLUS-NEXT: ; return to shader part epilog 1517main_body: 1518 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1519 ret <4 x float> %v 1520} 1521 1522define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { 1523; VERDE-LABEL: sample_c_lz_1d: 1524; VERDE: ; %bb.0: ; %main_body 1525; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1526; VERDE-NEXT: s_waitcnt vmcnt(0) 1527; VERDE-NEXT: ; return to shader part epilog 1528; 1529; GFX6789-LABEL: sample_c_lz_1d: 1530; GFX6789: ; %bb.0: ; %main_body 1531; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1532; GFX6789-NEXT: s_waitcnt vmcnt(0) 1533; GFX6789-NEXT: ; return to shader part epilog 1534; 1535; GFX10PLUS-LABEL: sample_c_lz_1d: 1536; GFX10PLUS: ; %bb.0: ; %main_body 1537; GFX10PLUS-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1538; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1539; GFX10PLUS-NEXT: ; return to shader part epilog 1540main_body: 1541 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1542 ret <4 x float> %v 1543} 1544 1545define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { 1546; VERDE-LABEL: sample_c_lz_2d: 1547; VERDE: ; %bb.0: ; %main_body 1548; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1549; VERDE-NEXT: s_waitcnt vmcnt(0) 1550; VERDE-NEXT: ; return to shader part epilog 1551; 1552; GFX6789-LABEL: sample_c_lz_2d: 1553; GFX6789: ; %bb.0: ; %main_body 1554; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1555; GFX6789-NEXT: s_waitcnt vmcnt(0) 1556; GFX6789-NEXT: ; return to shader part epilog 1557; 1558; GFX10PLUS-LABEL: sample_c_lz_2d: 1559; GFX10PLUS: ; %bb.0: ; %main_body 1560; GFX10PLUS-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1561; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1562; GFX10PLUS-NEXT: ; return to shader part epilog 1563main_body: 1564 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1565 ret <4 x float> %v 1566} 1567 1568define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { 1569; VERDE-LABEL: sample_c_d_o_2darray_V1: 1570; VERDE: ; %bb.0: ; %main_body 1571; VERDE-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da 1572; VERDE-NEXT: s_waitcnt vmcnt(0) 1573; VERDE-NEXT: ; return to shader part epilog 1574; 1575; GFX6789-LABEL: sample_c_d_o_2darray_V1: 1576; GFX6789: ; %bb.0: ; %main_body 1577; GFX6789-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da 1578; GFX6789-NEXT: s_waitcnt vmcnt(0) 1579; GFX6789-NEXT: ; return to shader part epilog 1580; 1581; GFX10PLUS-LABEL: sample_c_d_o_2darray_V1: 1582; GFX10PLUS: ; %bb.0: ; %main_body 1583; GFX10PLUS-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY 1584; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1585; GFX10PLUS-NEXT: ; return to shader part epilog 1586main_body: 1587 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1588 ret float %v 1589} 1590 1591define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, i32 addrspace(1)* inreg %out) { 1592; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe: 1593; VERDE: ; %bb.0: ; %main_body 1594; VERDE-NEXT: v_mov_b32_e32 v9, 0 1595; VERDE-NEXT: v_mov_b32_e32 v10, v9 1596; VERDE-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da 1597; VERDE-NEXT: s_mov_b32 s15, 0xf000 1598; VERDE-NEXT: s_mov_b32 s14, -1 1599; VERDE-NEXT: s_waitcnt vmcnt(0) 1600; VERDE-NEXT: v_mov_b32_e32 v0, v9 1601; VERDE-NEXT: buffer_store_dword v10, off, s[12:15], 0 1602; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1603; VERDE-NEXT: ; return to shader part epilog 1604; 1605; GFX6789-LABEL: sample_c_d_o_2darray_V1_tfe: 1606; GFX6789: ; %bb.0: ; %main_body 1607; GFX6789-NEXT: v_mov_b32_e32 v11, 0 1608; GFX6789-NEXT: v_mov_b32_e32 v12, v11 1609; GFX6789-NEXT: v_mov_b32_e32 v9, v11 1610; GFX6789-NEXT: v_mov_b32_e32 v10, v12 1611; GFX6789-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da 1612; GFX6789-NEXT: s_waitcnt vmcnt(0) 1613; GFX6789-NEXT: v_mov_b32_e32 v0, v9 1614; GFX6789-NEXT: global_store_dword v11, v10, s[12:13] 1615; GFX6789-NEXT: s_waitcnt vmcnt(0) 1616; GFX6789-NEXT: ; return to shader part epilog 1617; 1618; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe: 1619; GFX10: ; %bb.0: ; %main_body 1620; GFX10-NEXT: v_mov_b32_e32 v11, 0 1621; GFX10-NEXT: v_mov_b32_e32 v12, v11 1622; GFX10-NEXT: v_mov_b32_e32 v9, v11 1623; GFX10-NEXT: v_mov_b32_e32 v10, v12 1624; GFX10-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe 1625; GFX10-NEXT: s_waitcnt vmcnt(0) 1626; GFX10-NEXT: v_mov_b32_e32 v0, v9 1627; GFX10-NEXT: global_store_dword v11, v10, s[12:13] 1628; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1629; GFX10-NEXT: ; return to shader part epilog 1630; 1631; GFX11-LABEL: sample_c_d_o_2darray_V1_tfe: 1632; GFX11: ; %bb.0: ; %main_body 1633; GFX11-NEXT: v_mov_b32_e32 v11, 0 1634; GFX11-NEXT: v_mov_b32_e32 v12, v11 1635; GFX11-NEXT: v_dual_mov_b32 v9, v11 :: v_dual_mov_b32 v10, v12 1636; GFX11-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe 1637; GFX11-NEXT: s_waitcnt vmcnt(0) 1638; GFX11-NEXT: v_mov_b32_e32 v0, v9 1639; GFX11-NEXT: global_store_b32 v11, v10, s[12:13] 1640; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1641; GFX11-NEXT: ; return to shader part epilog 1642main_body: 1643 %v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 1644 %v.vec = extractvalue {float, i32} %v, 0 1645 %v.err = extractvalue {float, i32} %v, 1 1646 store i32 %v.err, i32 addrspace(1)* %out, align 4 1647 ret float %v.vec 1648} 1649 1650define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { 1651; VERDE-LABEL: sample_c_d_o_2darray_V2: 1652; VERDE: ; %bb.0: ; %main_body 1653; VERDE-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da 1654; VERDE-NEXT: s_waitcnt vmcnt(0) 1655; VERDE-NEXT: ; return to shader part epilog 1656; 1657; GFX6789-LABEL: sample_c_d_o_2darray_V2: 1658; GFX6789: ; %bb.0: ; %main_body 1659; GFX6789-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da 1660; GFX6789-NEXT: s_waitcnt vmcnt(0) 1661; GFX6789-NEXT: ; return to shader part epilog 1662; 1663; GFX10PLUS-LABEL: sample_c_d_o_2darray_V2: 1664; GFX10PLUS: ; %bb.0: ; %main_body 1665; GFX10PLUS-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY 1666; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1667; GFX10PLUS-NEXT: ; return to shader part epilog 1668main_body: 1669 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1670 ret <2 x float> %v 1671} 1672 1673define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { 1674; VERDE-LABEL: sample_c_d_o_2darray_V2_tfe: 1675; VERDE: ; %bb.0: ; %main_body 1676; VERDE-NEXT: v_mov_b32_e32 v9, 0 1677; VERDE-NEXT: v_mov_b32_e32 v10, v9 1678; VERDE-NEXT: v_mov_b32_e32 v11, v9 1679; VERDE-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da 1680; VERDE-NEXT: s_waitcnt vmcnt(0) 1681; VERDE-NEXT: v_mov_b32_e32 v0, v9 1682; VERDE-NEXT: v_mov_b32_e32 v1, v10 1683; VERDE-NEXT: v_mov_b32_e32 v2, v11 1684; VERDE-NEXT: ; return to shader part epilog 1685; 1686; GFX6789-LABEL: sample_c_d_o_2darray_V2_tfe: 1687; GFX6789: ; %bb.0: ; %main_body 1688; GFX6789-NEXT: v_mov_b32_e32 v9, 0 1689; GFX6789-NEXT: v_mov_b32_e32 v10, v9 1690; GFX6789-NEXT: v_mov_b32_e32 v11, v9 1691; GFX6789-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da 1692; GFX6789-NEXT: s_waitcnt vmcnt(0) 1693; GFX6789-NEXT: v_mov_b32_e32 v0, v9 1694; GFX6789-NEXT: v_mov_b32_e32 v1, v10 1695; GFX6789-NEXT: v_mov_b32_e32 v2, v11 1696; GFX6789-NEXT: ; return to shader part epilog 1697; 1698; GFX10-LABEL: sample_c_d_o_2darray_V2_tfe: 1699; GFX10: ; %bb.0: ; %main_body 1700; GFX10-NEXT: v_mov_b32_e32 v9, 0 1701; GFX10-NEXT: v_mov_b32_e32 v10, v9 1702; GFX10-NEXT: v_mov_b32_e32 v11, v9 1703; GFX10-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe 1704; GFX10-NEXT: s_waitcnt vmcnt(0) 1705; GFX10-NEXT: v_mov_b32_e32 v0, v9 1706; GFX10-NEXT: v_mov_b32_e32 v1, v10 1707; GFX10-NEXT: v_mov_b32_e32 v2, v11 1708; GFX10-NEXT: ; return to shader part epilog 1709; 1710; GFX11-LABEL: sample_c_d_o_2darray_V2_tfe: 1711; GFX11: ; %bb.0: ; %main_body 1712; GFX11-NEXT: v_mov_b32_e32 v9, 0 1713; GFX11-NEXT: v_mov_b32_e32 v10, v9 1714; GFX11-NEXT: v_mov_b32_e32 v11, v9 1715; GFX11-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe 1716; GFX11-NEXT: s_waitcnt vmcnt(0) 1717; GFX11-NEXT: v_mov_b32_e32 v2, v11 1718; GFX11-NEXT: v_dual_mov_b32 v0, v9 :: v_dual_mov_b32 v1, v10 1719; GFX11-NEXT: ; return to shader part epilog 1720main_body: 1721 %v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 1722 %v.vec = extractvalue {<2 x float>, i32} %v, 0 1723 %v.f1 = extractelement <2 x float> %v.vec, i32 0 1724 %v.f2 = extractelement <2 x float> %v.vec, i32 1 1725 %v.err = extractvalue {<2 x float>, i32} %v, 1 1726 %v.errf = bitcast i32 %v.err to float 1727 %res.0 = insertelement <4 x float> undef, float %v.f1, i32 0 1728 %res.1 = insertelement <4 x float> %res.0, float %v.f2, i32 1 1729 %res.2 = insertelement <4 x float> %res.1, float %v.errf, i32 2 1730 ret <4 x float> %res.2 1731} 1732 1733define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1734; VERDE-LABEL: sample_1d_unorm: 1735; VERDE: ; %bb.0: ; %main_body 1736; VERDE-NEXT: s_mov_b64 s[12:13], exec 1737; VERDE-NEXT: s_wqm_b64 exec, exec 1738; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1739; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm 1740; VERDE-NEXT: s_waitcnt vmcnt(0) 1741; VERDE-NEXT: ; return to shader part epilog 1742; 1743; GFX6789-LABEL: sample_1d_unorm: 1744; GFX6789: ; %bb.0: ; %main_body 1745; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1746; GFX6789-NEXT: s_wqm_b64 exec, exec 1747; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1748; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm 1749; GFX6789-NEXT: s_waitcnt vmcnt(0) 1750; GFX6789-NEXT: ; return to shader part epilog 1751; 1752; GFX10PLUS-LABEL: sample_1d_unorm: 1753; GFX10PLUS: ; %bb.0: ; %main_body 1754; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1755; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1756; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1757; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm 1758; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1759; GFX10PLUS-NEXT: ; return to shader part epilog 1760main_body: 1761 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0) 1762 ret <4 x float> %v 1763} 1764 1765define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1766; VERDE-LABEL: sample_1d_glc: 1767; VERDE: ; %bb.0: ; %main_body 1768; VERDE-NEXT: s_mov_b64 s[12:13], exec 1769; VERDE-NEXT: s_wqm_b64 exec, exec 1770; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1771; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc 1772; VERDE-NEXT: s_waitcnt vmcnt(0) 1773; VERDE-NEXT: ; return to shader part epilog 1774; 1775; GFX6789-LABEL: sample_1d_glc: 1776; GFX6789: ; %bb.0: ; %main_body 1777; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1778; GFX6789-NEXT: s_wqm_b64 exec, exec 1779; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1780; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc 1781; GFX6789-NEXT: s_waitcnt vmcnt(0) 1782; GFX6789-NEXT: ; return to shader part epilog 1783; 1784; GFX10PLUS-LABEL: sample_1d_glc: 1785; GFX10PLUS: ; %bb.0: ; %main_body 1786; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1787; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1788; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1789; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc 1790; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1791; GFX10PLUS-NEXT: ; return to shader part epilog 1792main_body: 1793 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1) 1794 ret <4 x float> %v 1795} 1796 1797define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1798; VERDE-LABEL: sample_1d_slc: 1799; VERDE: ; %bb.0: ; %main_body 1800; VERDE-NEXT: s_mov_b64 s[12:13], exec 1801; VERDE-NEXT: s_wqm_b64 exec, exec 1802; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1803; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc 1804; VERDE-NEXT: s_waitcnt vmcnt(0) 1805; VERDE-NEXT: ; return to shader part epilog 1806; 1807; GFX6789-LABEL: sample_1d_slc: 1808; GFX6789: ; %bb.0: ; %main_body 1809; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1810; GFX6789-NEXT: s_wqm_b64 exec, exec 1811; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1812; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc 1813; GFX6789-NEXT: s_waitcnt vmcnt(0) 1814; GFX6789-NEXT: ; return to shader part epilog 1815; 1816; GFX10PLUS-LABEL: sample_1d_slc: 1817; GFX10PLUS: ; %bb.0: ; %main_body 1818; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1819; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1820; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1821; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc 1822; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1823; GFX10PLUS-NEXT: ; return to shader part epilog 1824main_body: 1825 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 2) 1826 ret <4 x float> %v 1827} 1828 1829define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1830; VERDE-LABEL: sample_1d_glc_slc: 1831; VERDE: ; %bb.0: ; %main_body 1832; VERDE-NEXT: s_mov_b64 s[12:13], exec 1833; VERDE-NEXT: s_wqm_b64 exec, exec 1834; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1835; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc 1836; VERDE-NEXT: s_waitcnt vmcnt(0) 1837; VERDE-NEXT: ; return to shader part epilog 1838; 1839; GFX6789-LABEL: sample_1d_glc_slc: 1840; GFX6789: ; %bb.0: ; %main_body 1841; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1842; GFX6789-NEXT: s_wqm_b64 exec, exec 1843; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1844; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc 1845; GFX6789-NEXT: s_waitcnt vmcnt(0) 1846; GFX6789-NEXT: ; return to shader part epilog 1847; 1848; GFX10PLUS-LABEL: sample_1d_glc_slc: 1849; GFX10PLUS: ; %bb.0: ; %main_body 1850; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1851; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1852; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1853; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc 1854; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1855; GFX10PLUS-NEXT: ; return to shader part epilog 1856main_body: 1857 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 3) 1858 ret <4 x float> %v 1859} 1860 1861define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1862; VERDE-LABEL: adjust_writemask_sample_0: 1863; VERDE: ; %bb.0: ; %main_body 1864; VERDE-NEXT: s_mov_b64 s[12:13], exec 1865; VERDE-NEXT: s_wqm_b64 exec, exec 1866; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1867; VERDE-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 1868; VERDE-NEXT: s_waitcnt vmcnt(0) 1869; VERDE-NEXT: ; return to shader part epilog 1870; 1871; GFX6789-LABEL: adjust_writemask_sample_0: 1872; GFX6789: ; %bb.0: ; %main_body 1873; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1874; GFX6789-NEXT: s_wqm_b64 exec, exec 1875; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1876; GFX6789-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 1877; GFX6789-NEXT: s_waitcnt vmcnt(0) 1878; GFX6789-NEXT: ; return to shader part epilog 1879; 1880; GFX10PLUS-LABEL: adjust_writemask_sample_0: 1881; GFX10PLUS: ; %bb.0: ; %main_body 1882; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1883; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1884; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1885; GFX10PLUS-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D 1886; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1887; GFX10PLUS-NEXT: ; return to shader part epilog 1888main_body: 1889 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1890 %elt0 = extractelement <4 x float> %r, i32 0 1891 ret float %elt0 1892} 1893 1894define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1895; VERDE-LABEL: adjust_writemask_sample_01: 1896; VERDE: ; %bb.0: ; %main_body 1897; VERDE-NEXT: s_mov_b64 s[12:13], exec 1898; VERDE-NEXT: s_wqm_b64 exec, exec 1899; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1900; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 1901; VERDE-NEXT: s_waitcnt vmcnt(0) 1902; VERDE-NEXT: ; return to shader part epilog 1903; 1904; GFX6789-LABEL: adjust_writemask_sample_01: 1905; GFX6789: ; %bb.0: ; %main_body 1906; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1907; GFX6789-NEXT: s_wqm_b64 exec, exec 1908; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1909; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 1910; GFX6789-NEXT: s_waitcnt vmcnt(0) 1911; GFX6789-NEXT: ; return to shader part epilog 1912; 1913; GFX10PLUS-LABEL: adjust_writemask_sample_01: 1914; GFX10PLUS: ; %bb.0: ; %main_body 1915; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1916; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1917; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1918; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D 1919; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1920; GFX10PLUS-NEXT: ; return to shader part epilog 1921main_body: 1922 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1923 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1> 1924 ret <2 x float> %out 1925} 1926 1927define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1928; VERDE-LABEL: adjust_writemask_sample_012: 1929; VERDE: ; %bb.0: ; %main_body 1930; VERDE-NEXT: s_mov_b64 s[12:13], exec 1931; VERDE-NEXT: s_wqm_b64 exec, exec 1932; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1933; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 1934; VERDE-NEXT: s_waitcnt vmcnt(0) 1935; VERDE-NEXT: ; return to shader part epilog 1936; 1937; GFX6789-LABEL: adjust_writemask_sample_012: 1938; GFX6789: ; %bb.0: ; %main_body 1939; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1940; GFX6789-NEXT: s_wqm_b64 exec, exec 1941; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1942; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 1943; GFX6789-NEXT: s_waitcnt vmcnt(0) 1944; GFX6789-NEXT: ; return to shader part epilog 1945; 1946; GFX10PLUS-LABEL: adjust_writemask_sample_012: 1947; GFX10PLUS: ; %bb.0: ; %main_body 1948; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1949; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1950; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1951; GFX10PLUS-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_1D 1952; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1953; GFX10PLUS-NEXT: ; return to shader part epilog 1954main_body: 1955 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1956 %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 1957 ret <3 x float> %out 1958} 1959 1960define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1961; VERDE-LABEL: adjust_writemask_sample_12: 1962; VERDE: ; %bb.0: ; %main_body 1963; VERDE-NEXT: s_mov_b64 s[12:13], exec 1964; VERDE-NEXT: s_wqm_b64 exec, exec 1965; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1966; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 1967; VERDE-NEXT: s_waitcnt vmcnt(0) 1968; VERDE-NEXT: ; return to shader part epilog 1969; 1970; GFX6789-LABEL: adjust_writemask_sample_12: 1971; GFX6789: ; %bb.0: ; %main_body 1972; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1973; GFX6789-NEXT: s_wqm_b64 exec, exec 1974; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1975; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 1976; GFX6789-NEXT: s_waitcnt vmcnt(0) 1977; GFX6789-NEXT: ; return to shader part epilog 1978; 1979; GFX10PLUS-LABEL: adjust_writemask_sample_12: 1980; GFX10PLUS: ; %bb.0: ; %main_body 1981; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1982; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1983; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1984; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D 1985; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1986; GFX10PLUS-NEXT: ; return to shader part epilog 1987main_body: 1988 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1989 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2> 1990 ret <2 x float> %out 1991} 1992 1993define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1994; VERDE-LABEL: adjust_writemask_sample_03: 1995; VERDE: ; %bb.0: ; %main_body 1996; VERDE-NEXT: s_mov_b64 s[12:13], exec 1997; VERDE-NEXT: s_wqm_b64 exec, exec 1998; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1999; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 2000; VERDE-NEXT: s_waitcnt vmcnt(0) 2001; VERDE-NEXT: ; return to shader part epilog 2002; 2003; GFX6789-LABEL: adjust_writemask_sample_03: 2004; GFX6789: ; %bb.0: ; %main_body 2005; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2006; GFX6789-NEXT: s_wqm_b64 exec, exec 2007; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2008; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 2009; GFX6789-NEXT: s_waitcnt vmcnt(0) 2010; GFX6789-NEXT: ; return to shader part epilog 2011; 2012; GFX10PLUS-LABEL: adjust_writemask_sample_03: 2013; GFX10PLUS: ; %bb.0: ; %main_body 2014; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2015; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2016; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2017; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 dim:SQ_RSRC_IMG_1D 2018; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2019; GFX10PLUS-NEXT: ; return to shader part epilog 2020main_body: 2021 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2022 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3> 2023 ret <2 x float> %out 2024} 2025 2026define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2027; VERDE-LABEL: adjust_writemask_sample_13: 2028; VERDE: ; %bb.0: ; %main_body 2029; VERDE-NEXT: s_mov_b64 s[12:13], exec 2030; VERDE-NEXT: s_wqm_b64 exec, exec 2031; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2032; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa 2033; VERDE-NEXT: s_waitcnt vmcnt(0) 2034; VERDE-NEXT: ; return to shader part epilog 2035; 2036; GFX6789-LABEL: adjust_writemask_sample_13: 2037; GFX6789: ; %bb.0: ; %main_body 2038; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2039; GFX6789-NEXT: s_wqm_b64 exec, exec 2040; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2041; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa 2042; GFX6789-NEXT: s_waitcnt vmcnt(0) 2043; GFX6789-NEXT: ; return to shader part epilog 2044; 2045; GFX10PLUS-LABEL: adjust_writemask_sample_13: 2046; GFX10PLUS: ; %bb.0: ; %main_body 2047; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2048; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2049; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2050; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D 2051; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2052; GFX10PLUS-NEXT: ; return to shader part epilog 2053main_body: 2054 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2055 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3> 2056 ret <2 x float> %out 2057} 2058 2059define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2060; VERDE-LABEL: adjust_writemask_sample_123: 2061; VERDE: ; %bb.0: ; %main_body 2062; VERDE-NEXT: s_mov_b64 s[12:13], exec 2063; VERDE-NEXT: s_wqm_b64 exec, exec 2064; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2065; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe 2066; VERDE-NEXT: s_waitcnt vmcnt(0) 2067; VERDE-NEXT: ; return to shader part epilog 2068; 2069; GFX6789-LABEL: adjust_writemask_sample_123: 2070; GFX6789: ; %bb.0: ; %main_body 2071; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2072; GFX6789-NEXT: s_wqm_b64 exec, exec 2073; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2074; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe 2075; GFX6789-NEXT: s_waitcnt vmcnt(0) 2076; GFX6789-NEXT: ; return to shader part epilog 2077; 2078; GFX10PLUS-LABEL: adjust_writemask_sample_123: 2079; GFX10PLUS: ; %bb.0: ; %main_body 2080; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2081; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2082; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2083; GFX10PLUS-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe dim:SQ_RSRC_IMG_1D 2084; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2085; GFX10PLUS-NEXT: ; return to shader part epilog 2086main_body: 2087 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2088 %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 2089 ret <3 x float> %out 2090} 2091 2092define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2093; VERDE-LABEL: adjust_writemask_sample_none_enabled: 2094; VERDE: ; %bb.0: ; %main_body 2095; VERDE-NEXT: ; return to shader part epilog 2096; 2097; GFX6789-LABEL: adjust_writemask_sample_none_enabled: 2098; GFX6789: ; %bb.0: ; %main_body 2099; GFX6789-NEXT: ; return to shader part epilog 2100; 2101; GFX10PLUS-LABEL: adjust_writemask_sample_none_enabled: 2102; GFX10PLUS: ; %bb.0: ; %main_body 2103; GFX10PLUS-NEXT: ; return to shader part epilog 2104main_body: 2105 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2106 ret <4 x float> %r 2107} 2108 2109define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2110; VERDE-LABEL: adjust_writemask_sample_123_to_12: 2111; VERDE: ; %bb.0: ; %main_body 2112; VERDE-NEXT: s_mov_b64 s[12:13], exec 2113; VERDE-NEXT: s_wqm_b64 exec, exec 2114; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2115; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 2116; VERDE-NEXT: s_waitcnt vmcnt(0) 2117; VERDE-NEXT: ; return to shader part epilog 2118; 2119; GFX6789-LABEL: adjust_writemask_sample_123_to_12: 2120; GFX6789: ; %bb.0: ; %main_body 2121; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2122; GFX6789-NEXT: s_wqm_b64 exec, exec 2123; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2124; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 2125; GFX6789-NEXT: s_waitcnt vmcnt(0) 2126; GFX6789-NEXT: ; return to shader part epilog 2127; 2128; GFX10PLUS-LABEL: adjust_writemask_sample_123_to_12: 2129; GFX10PLUS: ; %bb.0: ; %main_body 2130; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2131; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2132; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2133; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D 2134; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2135; GFX10PLUS-NEXT: ; return to shader part epilog 2136main_body: 2137 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2138 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1> 2139 ret <2 x float> %out 2140} 2141 2142define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2143; VERDE-LABEL: adjust_writemask_sample_013_to_13: 2144; VERDE: ; %bb.0: ; %main_body 2145; VERDE-NEXT: s_mov_b64 s[12:13], exec 2146; VERDE-NEXT: s_wqm_b64 exec, exec 2147; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2148; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa 2149; VERDE-NEXT: s_waitcnt vmcnt(0) 2150; VERDE-NEXT: ; return to shader part epilog 2151; 2152; GFX6789-LABEL: adjust_writemask_sample_013_to_13: 2153; GFX6789: ; %bb.0: ; %main_body 2154; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2155; GFX6789-NEXT: s_wqm_b64 exec, exec 2156; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2157; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa 2158; GFX6789-NEXT: s_waitcnt vmcnt(0) 2159; GFX6789-NEXT: ; return to shader part epilog 2160; 2161; GFX10PLUS-LABEL: adjust_writemask_sample_013_to_13: 2162; GFX10PLUS: ; %bb.0: ; %main_body 2163; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2164; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2165; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2166; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D 2167; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2168; GFX10PLUS-NEXT: ; return to shader part epilog 2169main_body: 2170 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2171 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2> 2172 ret <2 x float> %out 2173} 2174 2175declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2176declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2177declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2178declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2179declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2180declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2181declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2182 2183declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2184declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2185declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2186declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2187declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2188declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2189 2190declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2191declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2192declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2193declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2194declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2195declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2196declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2197declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2198 2199declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2200declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2201declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2202declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2203declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2204declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2205declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2206declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2207 2208declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2209declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2210declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2211declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2212 2213declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2214declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2215declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2216declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2217 2218declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2219declare {float, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2220declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2221declare {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2222 2223attributes #0 = { nounwind } 2224attributes #1 = { nounwind readonly } 2225attributes #2 = { nounwind readnone } 2226