1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s 4; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 5; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 6 7define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 8; VERDE-LABEL: sample_1d: 9; VERDE: ; %bb.0: ; %main_body 10; VERDE-NEXT: s_mov_b64 s[12:13], exec 11; VERDE-NEXT: s_wqm_b64 exec, exec 12; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 13; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf 14; VERDE-NEXT: s_waitcnt vmcnt(0) 15; VERDE-NEXT: ; return to shader part epilog 16; 17; GFX6789-LABEL: sample_1d: 18; GFX6789: ; %bb.0: ; %main_body 19; GFX6789-NEXT: s_mov_b64 s[12:13], exec 20; GFX6789-NEXT: s_wqm_b64 exec, exec 21; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 22; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf 23; GFX6789-NEXT: s_waitcnt vmcnt(0) 24; GFX6789-NEXT: ; return to shader part epilog 25; 26; GFX10PLUS-LABEL: sample_1d: 27; GFX10PLUS: ; %bb.0: ; %main_body 28; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 29; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 30; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 31; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 32; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 33; GFX10PLUS-NEXT: ; return to shader part epilog 34main_body: 35 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 36 ret <4 x float> %v 37} 38 39define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { 40; VERDE-LABEL: sample_1d_tfe: 41; VERDE: ; %bb.0: ; %main_body 42; VERDE-NEXT: s_mov_b64 s[14:15], exec 43; VERDE-NEXT: s_wqm_b64 exec, exec 44; VERDE-NEXT: v_mov_b32_e32 v5, v0 45; VERDE-NEXT: v_mov_b32_e32 v0, 0 46; VERDE-NEXT: v_mov_b32_e32 v1, v0 47; VERDE-NEXT: v_mov_b32_e32 v2, v0 48; VERDE-NEXT: v_mov_b32_e32 v3, v0 49; VERDE-NEXT: v_mov_b32_e32 v4, v0 50; VERDE-NEXT: s_and_b64 exec, exec, s[14:15] 51; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe 52; VERDE-NEXT: s_mov_b32 s15, 0xf000 53; VERDE-NEXT: s_mov_b32 s14, -1 54; VERDE-NEXT: s_waitcnt vmcnt(0) 55; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0 56; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 57; VERDE-NEXT: ; return to shader part epilog 58; 59; GFX6789-LABEL: sample_1d_tfe: 60; GFX6789: ; %bb.0: ; %main_body 61; GFX6789-NEXT: s_mov_b64 s[14:15], exec 62; GFX6789-NEXT: s_wqm_b64 exec, exec 63; GFX6789-NEXT: v_mov_b32_e32 v6, 0 64; GFX6789-NEXT: v_mov_b32_e32 v5, v0 65; GFX6789-NEXT: v_mov_b32_e32 v7, v6 66; GFX6789-NEXT: v_mov_b32_e32 v8, v6 67; GFX6789-NEXT: v_mov_b32_e32 v9, v6 68; GFX6789-NEXT: v_mov_b32_e32 v10, v6 69; GFX6789-NEXT: v_mov_b32_e32 v0, v6 70; GFX6789-NEXT: v_mov_b32_e32 v1, v7 71; GFX6789-NEXT: v_mov_b32_e32 v2, v8 72; GFX6789-NEXT: v_mov_b32_e32 v3, v9 73; GFX6789-NEXT: v_mov_b32_e32 v4, v10 74; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15] 75; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe 76; GFX6789-NEXT: s_waitcnt vmcnt(0) 77; GFX6789-NEXT: global_store_dword v6, v4, s[12:13] 78; GFX6789-NEXT: s_waitcnt vmcnt(0) 79; GFX6789-NEXT: ; return to shader part epilog 80; 81; GFX10-LABEL: sample_1d_tfe: 82; GFX10: ; %bb.0: ; %main_body 83; GFX10-NEXT: s_mov_b32 s14, exec_lo 84; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 85; GFX10-NEXT: v_mov_b32_e32 v6, 0 86; GFX10-NEXT: v_mov_b32_e32 v5, v0 87; GFX10-NEXT: v_mov_b32_e32 v7, v6 88; GFX10-NEXT: v_mov_b32_e32 v8, v6 89; GFX10-NEXT: v_mov_b32_e32 v9, v6 90; GFX10-NEXT: v_mov_b32_e32 v10, v6 91; GFX10-NEXT: v_mov_b32_e32 v0, v6 92; GFX10-NEXT: v_mov_b32_e32 v1, v7 93; GFX10-NEXT: v_mov_b32_e32 v2, v8 94; GFX10-NEXT: v_mov_b32_e32 v3, v9 95; GFX10-NEXT: v_mov_b32_e32 v4, v10 96; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 97; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe 98; GFX10-NEXT: s_waitcnt vmcnt(0) 99; GFX10-NEXT: global_store_dword v6, v4, s[12:13] 100; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 101; GFX10-NEXT: ; return to shader part epilog 102; 103; GFX11-LABEL: sample_1d_tfe: 104; GFX11: ; %bb.0: ; %main_body 105; GFX11-NEXT: s_mov_b32 s14, exec_lo 106; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo 107; GFX11-NEXT: v_mov_b32_e32 v6, 0 108; GFX11-NEXT: v_mov_b32_e32 v5, v0 109; GFX11-NEXT: v_mov_b32_e32 v7, v6 110; GFX11-NEXT: v_mov_b32_e32 v8, v6 111; GFX11-NEXT: v_mov_b32_e32 v9, v6 112; GFX11-NEXT: v_mov_b32_e32 v10, v6 113; GFX11-NEXT: v_mov_b32_e32 v0, v6 114; GFX11-NEXT: v_mov_b32_e32 v1, v7 115; GFX11-NEXT: v_mov_b32_e32 v2, v8 116; GFX11-NEXT: v_mov_b32_e32 v3, v9 117; GFX11-NEXT: v_mov_b32_e32 v4, v10 118; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s14 119; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe 120; GFX11-NEXT: s_waitcnt vmcnt(0) 121; GFX11-NEXT: global_store_b32 v6, v4, s[12:13] 122; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 123; GFX11-NEXT: ; return to shader part epilog 124main_body: 125 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 126 %v.vec = extractvalue {<4 x float>, i32} %v, 0 127 %v.err = extractvalue {<4 x float>, i32} %v, 1 128 store i32 %v.err, i32 addrspace(1)* %out, align 4 129 ret <4 x float> %v.vec 130} 131 132define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { 133; VERDE-LABEL: sample_1d_tfe_adjust_writemask_1: 134; VERDE: ; %bb.0: ; %main_body 135; VERDE-NEXT: s_mov_b64 s[12:13], exec 136; VERDE-NEXT: s_wqm_b64 exec, exec 137; VERDE-NEXT: v_mov_b32_e32 v2, v0 138; VERDE-NEXT: v_mov_b32_e32 v0, 0 139; VERDE-NEXT: v_mov_b32_e32 v1, v0 140; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 141; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe 142; VERDE-NEXT: s_waitcnt vmcnt(0) 143; VERDE-NEXT: ; return to shader part epilog 144; 145; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_1: 146; GFX6789: ; %bb.0: ; %main_body 147; GFX6789-NEXT: s_mov_b64 s[12:13], exec 148; GFX6789-NEXT: s_wqm_b64 exec, exec 149; GFX6789-NEXT: v_mov_b32_e32 v2, v0 150; GFX6789-NEXT: v_mov_b32_e32 v0, 0 151; GFX6789-NEXT: v_mov_b32_e32 v1, v0 152; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 153; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe 154; GFX6789-NEXT: s_waitcnt vmcnt(0) 155; GFX6789-NEXT: ; return to shader part epilog 156; 157; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_1: 158; GFX10PLUS: ; %bb.0: ; %main_body 159; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 160; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 161; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 162; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 163; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 164; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 165; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe 166; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 167; GFX10PLUS-NEXT: ; return to shader part epilog 168main_body: 169 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 170 %res.vec = extractvalue {<4 x float>,i32} %v, 0 171 %res.f = extractelement <4 x float> %res.vec, i32 0 172 %res.err = extractvalue {<4 x float>,i32} %v, 1 173 %res.errf = bitcast i32 %res.err to float 174 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0 175 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1 176 ret <2 x float> %res 177} 178 179define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 180; VERDE-LABEL: sample_1d_tfe_adjust_writemask_2: 181; VERDE: ; %bb.0: ; %main_body 182; VERDE-NEXT: s_mov_b64 s[12:13], exec 183; VERDE-NEXT: s_wqm_b64 exec, exec 184; VERDE-NEXT: v_mov_b32_e32 v2, v0 185; VERDE-NEXT: v_mov_b32_e32 v0, 0 186; VERDE-NEXT: v_mov_b32_e32 v1, v0 187; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 188; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe 189; VERDE-NEXT: s_waitcnt vmcnt(0) 190; VERDE-NEXT: ; return to shader part epilog 191; 192; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_2: 193; GFX6789: ; %bb.0: ; %main_body 194; GFX6789-NEXT: s_mov_b64 s[12:13], exec 195; GFX6789-NEXT: s_wqm_b64 exec, exec 196; GFX6789-NEXT: v_mov_b32_e32 v2, v0 197; GFX6789-NEXT: v_mov_b32_e32 v0, 0 198; GFX6789-NEXT: v_mov_b32_e32 v1, v0 199; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 200; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe 201; GFX6789-NEXT: s_waitcnt vmcnt(0) 202; GFX6789-NEXT: ; return to shader part epilog 203; 204; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_2: 205; GFX10PLUS: ; %bb.0: ; %main_body 206; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 207; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 208; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 209; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 210; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 211; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 212; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe 213; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 214; GFX10PLUS-NEXT: ; return to shader part epilog 215main_body: 216 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 217 %res.vec = extractvalue {<4 x float>,i32} %v, 0 218 %res.f = extractelement <4 x float> %res.vec, i32 1 219 %res.err = extractvalue {<4 x float>,i32} %v, 1 220 %res.errf = bitcast i32 %res.err to float 221 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0 222 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1 223 ret <2 x float> %res 224} 225 226define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 227; VERDE-LABEL: sample_1d_tfe_adjust_writemask_3: 228; VERDE: ; %bb.0: ; %main_body 229; VERDE-NEXT: s_mov_b64 s[12:13], exec 230; VERDE-NEXT: s_wqm_b64 exec, exec 231; VERDE-NEXT: v_mov_b32_e32 v2, v0 232; VERDE-NEXT: v_mov_b32_e32 v0, 0 233; VERDE-NEXT: v_mov_b32_e32 v1, v0 234; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 235; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe 236; VERDE-NEXT: s_waitcnt vmcnt(0) 237; VERDE-NEXT: ; return to shader part epilog 238; 239; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_3: 240; GFX6789: ; %bb.0: ; %main_body 241; GFX6789-NEXT: s_mov_b64 s[12:13], exec 242; GFX6789-NEXT: s_wqm_b64 exec, exec 243; GFX6789-NEXT: v_mov_b32_e32 v2, v0 244; GFX6789-NEXT: v_mov_b32_e32 v0, 0 245; GFX6789-NEXT: v_mov_b32_e32 v1, v0 246; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 247; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe 248; GFX6789-NEXT: s_waitcnt vmcnt(0) 249; GFX6789-NEXT: ; return to shader part epilog 250; 251; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_3: 252; GFX10PLUS: ; %bb.0: ; %main_body 253; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 254; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 255; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 256; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 257; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 258; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 259; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe 260; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 261; GFX10PLUS-NEXT: ; return to shader part epilog 262main_body: 263 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 264 %res.vec = extractvalue {<4 x float>,i32} %v, 0 265 %res.f = extractelement <4 x float> %res.vec, i32 2 266 %res.err = extractvalue {<4 x float>,i32} %v, 1 267 %res.errf = bitcast i32 %res.err to float 268 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0 269 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1 270 ret <2 x float> %res 271} 272 273define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 274; VERDE-LABEL: sample_1d_tfe_adjust_writemask_4: 275; VERDE: ; %bb.0: ; %main_body 276; VERDE-NEXT: s_mov_b64 s[12:13], exec 277; VERDE-NEXT: s_wqm_b64 exec, exec 278; VERDE-NEXT: v_mov_b32_e32 v2, v0 279; VERDE-NEXT: v_mov_b32_e32 v0, 0 280; VERDE-NEXT: v_mov_b32_e32 v1, v0 281; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 282; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe 283; VERDE-NEXT: s_waitcnt vmcnt(0) 284; VERDE-NEXT: ; return to shader part epilog 285; 286; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_4: 287; GFX6789: ; %bb.0: ; %main_body 288; GFX6789-NEXT: s_mov_b64 s[12:13], exec 289; GFX6789-NEXT: s_wqm_b64 exec, exec 290; GFX6789-NEXT: v_mov_b32_e32 v2, v0 291; GFX6789-NEXT: v_mov_b32_e32 v0, 0 292; GFX6789-NEXT: v_mov_b32_e32 v1, v0 293; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 294; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe 295; GFX6789-NEXT: s_waitcnt vmcnt(0) 296; GFX6789-NEXT: ; return to shader part epilog 297; 298; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_4: 299; GFX10PLUS: ; %bb.0: ; %main_body 300; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 301; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 302; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 303; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 304; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 305; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 306; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe 307; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 308; GFX10PLUS-NEXT: ; return to shader part epilog 309main_body: 310 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 311 %res.vec = extractvalue {<4 x float>,i32} %v, 0 312 %res.f = extractelement <4 x float> %res.vec, i32 3 313 %res.err = extractvalue {<4 x float>,i32} %v, 1 314 %res.errf = bitcast i32 %res.err to float 315 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0 316 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1 317 ret <2 x float> %res 318} 319 320define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 321; VERDE-LABEL: sample_1d_tfe_adjust_writemask_12: 322; VERDE: ; %bb.0: ; %main_body 323; VERDE-NEXT: s_mov_b64 s[12:13], exec 324; VERDE-NEXT: s_wqm_b64 exec, exec 325; VERDE-NEXT: v_mov_b32_e32 v3, v0 326; VERDE-NEXT: v_mov_b32_e32 v0, 0 327; VERDE-NEXT: v_mov_b32_e32 v1, v0 328; VERDE-NEXT: v_mov_b32_e32 v2, v0 329; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 330; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe 331; VERDE-NEXT: s_waitcnt vmcnt(0) 332; VERDE-NEXT: ; return to shader part epilog 333; 334; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_12: 335; GFX6789: ; %bb.0: ; %main_body 336; GFX6789-NEXT: s_mov_b64 s[12:13], exec 337; GFX6789-NEXT: s_wqm_b64 exec, exec 338; GFX6789-NEXT: v_mov_b32_e32 v3, v0 339; GFX6789-NEXT: v_mov_b32_e32 v0, 0 340; GFX6789-NEXT: v_mov_b32_e32 v1, v0 341; GFX6789-NEXT: v_mov_b32_e32 v2, v0 342; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 343; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe 344; GFX6789-NEXT: s_waitcnt vmcnt(0) 345; GFX6789-NEXT: ; return to shader part epilog 346; 347; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_12: 348; GFX10PLUS: ; %bb.0: ; %main_body 349; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 350; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 351; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v0 352; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 353; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 354; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 355; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 356; GFX10PLUS-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe 357; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 358; GFX10PLUS-NEXT: ; return to shader part epilog 359main_body: 360 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 361 %res.vec = extractvalue {<4 x float>,i32} %v, 0 362 %res.f1 = extractelement <4 x float> %res.vec, i32 0 363 %res.f2 = extractelement <4 x float> %res.vec, i32 1 364 %res.err = extractvalue {<4 x float>,i32} %v, 1 365 %res.errf = bitcast i32 %res.err to float 366 %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0 367 %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1 368 %res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2 369 ret <4 x float> %res 370} 371 372define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 373; VERDE-LABEL: sample_1d_tfe_adjust_writemask_24: 374; VERDE: ; %bb.0: ; %main_body 375; VERDE-NEXT: s_mov_b64 s[12:13], exec 376; VERDE-NEXT: s_wqm_b64 exec, exec 377; VERDE-NEXT: v_mov_b32_e32 v3, v0 378; VERDE-NEXT: v_mov_b32_e32 v0, 0 379; VERDE-NEXT: v_mov_b32_e32 v1, v0 380; VERDE-NEXT: v_mov_b32_e32 v2, v0 381; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 382; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe 383; VERDE-NEXT: s_waitcnt vmcnt(0) 384; VERDE-NEXT: ; return to shader part epilog 385; 386; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_24: 387; GFX6789: ; %bb.0: ; %main_body 388; GFX6789-NEXT: s_mov_b64 s[12:13], exec 389; GFX6789-NEXT: s_wqm_b64 exec, exec 390; GFX6789-NEXT: v_mov_b32_e32 v3, v0 391; GFX6789-NEXT: v_mov_b32_e32 v0, 0 392; GFX6789-NEXT: v_mov_b32_e32 v1, v0 393; GFX6789-NEXT: v_mov_b32_e32 v2, v0 394; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 395; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe 396; GFX6789-NEXT: s_waitcnt vmcnt(0) 397; GFX6789-NEXT: ; return to shader part epilog 398; 399; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_24: 400; GFX10PLUS: ; %bb.0: ; %main_body 401; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 402; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 403; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v0 404; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 405; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 406; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 407; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 408; GFX10PLUS-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe 409; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 410; GFX10PLUS-NEXT: ; return to shader part epilog 411main_body: 412 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 413 %res.vec = extractvalue {<4 x float>,i32} %v, 0 414 %res.f1 = extractelement <4 x float> %res.vec, i32 1 415 %res.f2 = extractelement <4 x float> %res.vec, i32 3 416 %res.err = extractvalue {<4 x float>,i32} %v, 1 417 %res.errf = bitcast i32 %res.err to float 418 %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0 419 %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1 420 %res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2 421 ret <4 x float> %res 422} 423 424define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 425; VERDE-LABEL: sample_1d_tfe_adjust_writemask_134: 426; VERDE: ; %bb.0: ; %main_body 427; VERDE-NEXT: s_mov_b64 s[12:13], exec 428; VERDE-NEXT: s_wqm_b64 exec, exec 429; VERDE-NEXT: v_mov_b32_e32 v4, v0 430; VERDE-NEXT: v_mov_b32_e32 v0, 0 431; VERDE-NEXT: v_mov_b32_e32 v1, v0 432; VERDE-NEXT: v_mov_b32_e32 v2, v0 433; VERDE-NEXT: v_mov_b32_e32 v3, v0 434; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 435; VERDE-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe 436; VERDE-NEXT: s_waitcnt vmcnt(0) 437; VERDE-NEXT: ; return to shader part epilog 438; 439; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_134: 440; GFX6789: ; %bb.0: ; %main_body 441; GFX6789-NEXT: s_mov_b64 s[12:13], exec 442; GFX6789-NEXT: s_wqm_b64 exec, exec 443; GFX6789-NEXT: v_mov_b32_e32 v4, v0 444; GFX6789-NEXT: v_mov_b32_e32 v0, 0 445; GFX6789-NEXT: v_mov_b32_e32 v1, v0 446; GFX6789-NEXT: v_mov_b32_e32 v2, v0 447; GFX6789-NEXT: v_mov_b32_e32 v3, v0 448; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 449; GFX6789-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe 450; GFX6789-NEXT: s_waitcnt vmcnt(0) 451; GFX6789-NEXT: ; return to shader part epilog 452; 453; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_134: 454; GFX10PLUS: ; %bb.0: ; %main_body 455; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 456; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 457; GFX10PLUS-NEXT: v_mov_b32_e32 v4, v0 458; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 459; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 460; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 461; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v0 462; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 463; GFX10PLUS-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe 464; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 465; GFX10PLUS-NEXT: ; return to shader part epilog 466main_body: 467 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 468 %res.vec = extractvalue {<4 x float>,i32} %v, 0 469 %res.f1 = extractelement <4 x float> %res.vec, i32 0 470 %res.f2 = extractelement <4 x float> %res.vec, i32 2 471 %res.f3 = extractelement <4 x float> %res.vec, i32 3 472 %res.err = extractvalue {<4 x float>,i32} %v, 1 473 %res.errf = bitcast i32 %res.err to float 474 %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0 475 %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1 476 %res.tmp3 = insertelement <4 x float> %res.tmp2, float %res.f3, i32 2 477 %res = insertelement <4 x float> %res.tmp3, float %res.errf, i32 3 478 ret <4 x float> %res 479} 480 481define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { 482; VERDE-LABEL: sample_1d_lwe: 483; VERDE: ; %bb.0: ; %main_body 484; VERDE-NEXT: s_mov_b64 s[14:15], exec 485; VERDE-NEXT: s_wqm_b64 exec, exec 486; VERDE-NEXT: v_mov_b32_e32 v5, v0 487; VERDE-NEXT: v_mov_b32_e32 v0, 0 488; VERDE-NEXT: v_mov_b32_e32 v1, v0 489; VERDE-NEXT: v_mov_b32_e32 v2, v0 490; VERDE-NEXT: v_mov_b32_e32 v3, v0 491; VERDE-NEXT: v_mov_b32_e32 v4, v0 492; VERDE-NEXT: s_and_b64 exec, exec, s[14:15] 493; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe 494; VERDE-NEXT: s_mov_b32 s15, 0xf000 495; VERDE-NEXT: s_mov_b32 s14, -1 496; VERDE-NEXT: s_waitcnt vmcnt(0) 497; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0 498; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 499; VERDE-NEXT: ; return to shader part epilog 500; 501; GFX6789-LABEL: sample_1d_lwe: 502; GFX6789: ; %bb.0: ; %main_body 503; GFX6789-NEXT: s_mov_b64 s[14:15], exec 504; GFX6789-NEXT: s_wqm_b64 exec, exec 505; GFX6789-NEXT: v_mov_b32_e32 v6, 0 506; GFX6789-NEXT: v_mov_b32_e32 v5, v0 507; GFX6789-NEXT: v_mov_b32_e32 v7, v6 508; GFX6789-NEXT: v_mov_b32_e32 v8, v6 509; GFX6789-NEXT: v_mov_b32_e32 v9, v6 510; GFX6789-NEXT: v_mov_b32_e32 v10, v6 511; GFX6789-NEXT: v_mov_b32_e32 v0, v6 512; GFX6789-NEXT: v_mov_b32_e32 v1, v7 513; GFX6789-NEXT: v_mov_b32_e32 v2, v8 514; GFX6789-NEXT: v_mov_b32_e32 v3, v9 515; GFX6789-NEXT: v_mov_b32_e32 v4, v10 516; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15] 517; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe 518; GFX6789-NEXT: s_waitcnt vmcnt(0) 519; GFX6789-NEXT: global_store_dword v6, v4, s[12:13] 520; GFX6789-NEXT: s_waitcnt vmcnt(0) 521; GFX6789-NEXT: ; return to shader part epilog 522; 523; GFX10-LABEL: sample_1d_lwe: 524; GFX10: ; %bb.0: ; %main_body 525; GFX10-NEXT: s_mov_b32 s14, exec_lo 526; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 527; GFX10-NEXT: v_mov_b32_e32 v6, 0 528; GFX10-NEXT: v_mov_b32_e32 v5, v0 529; GFX10-NEXT: v_mov_b32_e32 v7, v6 530; GFX10-NEXT: v_mov_b32_e32 v8, v6 531; GFX10-NEXT: v_mov_b32_e32 v9, v6 532; GFX10-NEXT: v_mov_b32_e32 v10, v6 533; GFX10-NEXT: v_mov_b32_e32 v0, v6 534; GFX10-NEXT: v_mov_b32_e32 v1, v7 535; GFX10-NEXT: v_mov_b32_e32 v2, v8 536; GFX10-NEXT: v_mov_b32_e32 v3, v9 537; GFX10-NEXT: v_mov_b32_e32 v4, v10 538; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 539; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe 540; GFX10-NEXT: s_waitcnt vmcnt(0) 541; GFX10-NEXT: global_store_dword v6, v4, s[12:13] 542; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 543; GFX10-NEXT: ; return to shader part epilog 544; 545; GFX11-LABEL: sample_1d_lwe: 546; GFX11: ; %bb.0: ; %main_body 547; GFX11-NEXT: s_mov_b32 s14, exec_lo 548; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo 549; GFX11-NEXT: v_mov_b32_e32 v6, 0 550; GFX11-NEXT: v_mov_b32_e32 v5, v0 551; GFX11-NEXT: v_mov_b32_e32 v7, v6 552; GFX11-NEXT: v_mov_b32_e32 v8, v6 553; GFX11-NEXT: v_mov_b32_e32 v9, v6 554; GFX11-NEXT: v_mov_b32_e32 v10, v6 555; GFX11-NEXT: v_mov_b32_e32 v0, v6 556; GFX11-NEXT: v_mov_b32_e32 v1, v7 557; GFX11-NEXT: v_mov_b32_e32 v2, v8 558; GFX11-NEXT: v_mov_b32_e32 v3, v9 559; GFX11-NEXT: v_mov_b32_e32 v4, v10 560; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s14 561; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe 562; GFX11-NEXT: s_waitcnt vmcnt(0) 563; GFX11-NEXT: global_store_b32 v6, v4, s[12:13] 564; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 565; GFX11-NEXT: ; return to shader part epilog 566main_body: 567 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0) 568 %v.vec = extractvalue {<4 x float>, i32} %v, 0 569 %v.err = extractvalue {<4 x float>, i32} %v, 1 570 store i32 %v.err, i32 addrspace(1)* %out, align 4 571 ret <4 x float> %v.vec 572} 573 574define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 575; VERDE-LABEL: sample_2d: 576; VERDE: ; %bb.0: ; %main_body 577; VERDE-NEXT: s_mov_b64 s[12:13], exec 578; VERDE-NEXT: s_wqm_b64 exec, exec 579; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 580; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 581; VERDE-NEXT: s_waitcnt vmcnt(0) 582; VERDE-NEXT: ; return to shader part epilog 583; 584; GFX6789-LABEL: sample_2d: 585; GFX6789: ; %bb.0: ; %main_body 586; GFX6789-NEXT: s_mov_b64 s[12:13], exec 587; GFX6789-NEXT: s_wqm_b64 exec, exec 588; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 589; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 590; GFX6789-NEXT: s_waitcnt vmcnt(0) 591; GFX6789-NEXT: ; return to shader part epilog 592; 593; GFX10PLUS-LABEL: sample_2d: 594; GFX10PLUS: ; %bb.0: ; %main_body 595; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 596; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 597; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 598; GFX10PLUS-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 599; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 600; GFX10PLUS-NEXT: ; return to shader part epilog 601main_body: 602 %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 603 ret <4 x float> %v 604} 605 606define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) { 607; VERDE-LABEL: sample_3d: 608; VERDE: ; %bb.0: ; %main_body 609; VERDE-NEXT: s_mov_b64 s[12:13], exec 610; VERDE-NEXT: s_wqm_b64 exec, exec 611; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 612; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 613; VERDE-NEXT: s_waitcnt vmcnt(0) 614; VERDE-NEXT: ; return to shader part epilog 615; 616; GFX6789-LABEL: sample_3d: 617; GFX6789: ; %bb.0: ; %main_body 618; GFX6789-NEXT: s_mov_b64 s[12:13], exec 619; GFX6789-NEXT: s_wqm_b64 exec, exec 620; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 621; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 622; GFX6789-NEXT: s_waitcnt vmcnt(0) 623; GFX6789-NEXT: ; return to shader part epilog 624; 625; GFX10PLUS-LABEL: sample_3d: 626; GFX10PLUS: ; %bb.0: ; %main_body 627; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 628; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 629; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 630; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 631; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 632; GFX10PLUS-NEXT: ; return to shader part epilog 633main_body: 634 %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 635 ret <4 x float> %v 636} 637 638define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) { 639; VERDE-LABEL: sample_cube: 640; VERDE: ; %bb.0: ; %main_body 641; VERDE-NEXT: s_mov_b64 s[12:13], exec 642; VERDE-NEXT: s_wqm_b64 exec, exec 643; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 644; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da 645; VERDE-NEXT: s_waitcnt vmcnt(0) 646; VERDE-NEXT: ; return to shader part epilog 647; 648; GFX6789-LABEL: sample_cube: 649; GFX6789: ; %bb.0: ; %main_body 650; GFX6789-NEXT: s_mov_b64 s[12:13], exec 651; GFX6789-NEXT: s_wqm_b64 exec, exec 652; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 653; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da 654; GFX6789-NEXT: s_waitcnt vmcnt(0) 655; GFX6789-NEXT: ; return to shader part epilog 656; 657; GFX10PLUS-LABEL: sample_cube: 658; GFX10PLUS: ; %bb.0: ; %main_body 659; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 660; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 661; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 662; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE 663; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 664; GFX10PLUS-NEXT: ; return to shader part epilog 665main_body: 666 %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 667 ret <4 x float> %v 668} 669 670define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) { 671; VERDE-LABEL: sample_1darray: 672; VERDE: ; %bb.0: ; %main_body 673; VERDE-NEXT: s_mov_b64 s[12:13], exec 674; VERDE-NEXT: s_wqm_b64 exec, exec 675; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 676; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da 677; VERDE-NEXT: s_waitcnt vmcnt(0) 678; VERDE-NEXT: ; return to shader part epilog 679; 680; GFX6789-LABEL: sample_1darray: 681; GFX6789: ; %bb.0: ; %main_body 682; GFX6789-NEXT: s_mov_b64 s[12:13], exec 683; GFX6789-NEXT: s_wqm_b64 exec, exec 684; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 685; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da 686; GFX6789-NEXT: s_waitcnt vmcnt(0) 687; GFX6789-NEXT: ; return to shader part epilog 688; 689; GFX10PLUS-LABEL: sample_1darray: 690; GFX10PLUS: ; %bb.0: ; %main_body 691; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 692; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 693; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 694; GFX10PLUS-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY 695; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 696; GFX10PLUS-NEXT: ; return to shader part epilog 697main_body: 698 %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 699 ret <4 x float> %v 700} 701 702define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) { 703; VERDE-LABEL: sample_2darray: 704; VERDE: ; %bb.0: ; %main_body 705; VERDE-NEXT: s_mov_b64 s[12:13], exec 706; VERDE-NEXT: s_wqm_b64 exec, exec 707; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 708; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da 709; VERDE-NEXT: s_waitcnt vmcnt(0) 710; VERDE-NEXT: ; return to shader part epilog 711; 712; GFX6789-LABEL: sample_2darray: 713; GFX6789: ; %bb.0: ; %main_body 714; GFX6789-NEXT: s_mov_b64 s[12:13], exec 715; GFX6789-NEXT: s_wqm_b64 exec, exec 716; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 717; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da 718; GFX6789-NEXT: s_waitcnt vmcnt(0) 719; GFX6789-NEXT: ; return to shader part epilog 720; 721; GFX10PLUS-LABEL: sample_2darray: 722; GFX10PLUS: ; %bb.0: ; %main_body 723; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 724; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 725; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 726; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY 727; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 728; GFX10PLUS-NEXT: ; return to shader part epilog 729main_body: 730 %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 731 ret <4 x float> %v 732} 733 734define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { 735; VERDE-LABEL: sample_c_1d: 736; VERDE: ; %bb.0: ; %main_body 737; VERDE-NEXT: s_mov_b64 s[12:13], exec 738; VERDE-NEXT: s_wqm_b64 exec, exec 739; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 740; VERDE-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 741; VERDE-NEXT: s_waitcnt vmcnt(0) 742; VERDE-NEXT: ; return to shader part epilog 743; 744; GFX6789-LABEL: sample_c_1d: 745; GFX6789: ; %bb.0: ; %main_body 746; GFX6789-NEXT: s_mov_b64 s[12:13], exec 747; GFX6789-NEXT: s_wqm_b64 exec, exec 748; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 749; GFX6789-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 750; GFX6789-NEXT: s_waitcnt vmcnt(0) 751; GFX6789-NEXT: ; return to shader part epilog 752; 753; GFX10PLUS-LABEL: sample_c_1d: 754; GFX10PLUS: ; %bb.0: ; %main_body 755; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 756; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 757; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 758; GFX10PLUS-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 759; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 760; GFX10PLUS-NEXT: ; return to shader part epilog 761main_body: 762 %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 763 ret <4 x float> %v 764} 765 766define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { 767; VERDE-LABEL: sample_c_2d: 768; VERDE: ; %bb.0: ; %main_body 769; VERDE-NEXT: s_mov_b64 s[12:13], exec 770; VERDE-NEXT: s_wqm_b64 exec, exec 771; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 772; VERDE-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 773; VERDE-NEXT: s_waitcnt vmcnt(0) 774; VERDE-NEXT: ; return to shader part epilog 775; 776; GFX6789-LABEL: sample_c_2d: 777; GFX6789: ; %bb.0: ; %main_body 778; GFX6789-NEXT: s_mov_b64 s[12:13], exec 779; GFX6789-NEXT: s_wqm_b64 exec, exec 780; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 781; GFX6789-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 782; GFX6789-NEXT: s_waitcnt vmcnt(0) 783; GFX6789-NEXT: ; return to shader part epilog 784; 785; GFX10PLUS-LABEL: sample_c_2d: 786; GFX10PLUS: ; %bb.0: ; %main_body 787; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 788; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 789; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 790; GFX10PLUS-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 791; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 792; GFX10PLUS-NEXT: ; return to shader part epilog 793main_body: 794 %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 795 ret <4 x float> %v 796} 797 798define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) { 799; VERDE-LABEL: sample_cl_1d: 800; VERDE: ; %bb.0: ; %main_body 801; VERDE-NEXT: s_mov_b64 s[12:13], exec 802; VERDE-NEXT: s_wqm_b64 exec, exec 803; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 804; VERDE-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 805; VERDE-NEXT: s_waitcnt vmcnt(0) 806; VERDE-NEXT: ; return to shader part epilog 807; 808; GFX6789-LABEL: sample_cl_1d: 809; GFX6789: ; %bb.0: ; %main_body 810; GFX6789-NEXT: s_mov_b64 s[12:13], exec 811; GFX6789-NEXT: s_wqm_b64 exec, exec 812; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 813; GFX6789-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 814; GFX6789-NEXT: s_waitcnt vmcnt(0) 815; GFX6789-NEXT: ; return to shader part epilog 816; 817; GFX10PLUS-LABEL: sample_cl_1d: 818; GFX10PLUS: ; %bb.0: ; %main_body 819; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 820; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 821; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 822; GFX10PLUS-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 823; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 824; GFX10PLUS-NEXT: ; return to shader part epilog 825main_body: 826 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 827 ret <4 x float> %v 828} 829 830define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) { 831; VERDE-LABEL: sample_cl_2d: 832; VERDE: ; %bb.0: ; %main_body 833; VERDE-NEXT: s_mov_b64 s[12:13], exec 834; VERDE-NEXT: s_wqm_b64 exec, exec 835; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 836; VERDE-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 837; VERDE-NEXT: s_waitcnt vmcnt(0) 838; VERDE-NEXT: ; return to shader part epilog 839; 840; GFX6789-LABEL: sample_cl_2d: 841; GFX6789: ; %bb.0: ; %main_body 842; GFX6789-NEXT: s_mov_b64 s[12:13], exec 843; GFX6789-NEXT: s_wqm_b64 exec, exec 844; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 845; GFX6789-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 846; GFX6789-NEXT: s_waitcnt vmcnt(0) 847; GFX6789-NEXT: ; return to shader part epilog 848; 849; GFX10PLUS-LABEL: sample_cl_2d: 850; GFX10PLUS: ; %bb.0: ; %main_body 851; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 852; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 853; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 854; GFX10PLUS-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 855; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 856; GFX10PLUS-NEXT: ; return to shader part epilog 857main_body: 858 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 859 ret <4 x float> %v 860} 861 862define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) { 863; VERDE-LABEL: sample_c_cl_1d: 864; VERDE: ; %bb.0: ; %main_body 865; VERDE-NEXT: s_mov_b64 s[12:13], exec 866; VERDE-NEXT: s_wqm_b64 exec, exec 867; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 868; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 869; VERDE-NEXT: s_waitcnt vmcnt(0) 870; VERDE-NEXT: ; return to shader part epilog 871; 872; GFX6789-LABEL: sample_c_cl_1d: 873; GFX6789: ; %bb.0: ; %main_body 874; GFX6789-NEXT: s_mov_b64 s[12:13], exec 875; GFX6789-NEXT: s_wqm_b64 exec, exec 876; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 877; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 878; GFX6789-NEXT: s_waitcnt vmcnt(0) 879; GFX6789-NEXT: ; return to shader part epilog 880; 881; GFX10PLUS-LABEL: sample_c_cl_1d: 882; GFX10PLUS: ; %bb.0: ; %main_body 883; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 884; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 885; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 886; GFX10PLUS-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 887; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 888; GFX10PLUS-NEXT: ; return to shader part epilog 889main_body: 890 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 891 ret <4 x float> %v 892} 893 894define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) { 895; VERDE-LABEL: sample_c_cl_2d: 896; VERDE: ; %bb.0: ; %main_body 897; VERDE-NEXT: s_mov_b64 s[12:13], exec 898; VERDE-NEXT: s_wqm_b64 exec, exec 899; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 900; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 901; VERDE-NEXT: s_waitcnt vmcnt(0) 902; VERDE-NEXT: ; return to shader part epilog 903; 904; GFX6789-LABEL: sample_c_cl_2d: 905; GFX6789: ; %bb.0: ; %main_body 906; GFX6789-NEXT: s_mov_b64 s[12:13], exec 907; GFX6789-NEXT: s_wqm_b64 exec, exec 908; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 909; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 910; GFX6789-NEXT: s_waitcnt vmcnt(0) 911; GFX6789-NEXT: ; return to shader part epilog 912; 913; GFX10PLUS-LABEL: sample_c_cl_2d: 914; GFX10PLUS: ; %bb.0: ; %main_body 915; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 916; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 917; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 918; GFX10PLUS-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 919; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 920; GFX10PLUS-NEXT: ; return to shader part epilog 921main_body: 922 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 923 ret <4 x float> %v 924} 925 926define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) { 927; VERDE-LABEL: sample_b_1d: 928; VERDE: ; %bb.0: ; %main_body 929; VERDE-NEXT: s_mov_b64 s[12:13], exec 930; VERDE-NEXT: s_wqm_b64 exec, exec 931; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 932; VERDE-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 933; VERDE-NEXT: s_waitcnt vmcnt(0) 934; VERDE-NEXT: ; return to shader part epilog 935; 936; GFX6789-LABEL: sample_b_1d: 937; GFX6789: ; %bb.0: ; %main_body 938; GFX6789-NEXT: s_mov_b64 s[12:13], exec 939; GFX6789-NEXT: s_wqm_b64 exec, exec 940; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 941; GFX6789-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 942; GFX6789-NEXT: s_waitcnt vmcnt(0) 943; GFX6789-NEXT: ; return to shader part epilog 944; 945; GFX10PLUS-LABEL: sample_b_1d: 946; GFX10PLUS: ; %bb.0: ; %main_body 947; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 948; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 949; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 950; GFX10PLUS-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 951; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 952; GFX10PLUS-NEXT: ; return to shader part epilog 953main_body: 954 %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 955 ret <4 x float> %v 956} 957 958define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { 959; VERDE-LABEL: sample_b_2d: 960; VERDE: ; %bb.0: ; %main_body 961; VERDE-NEXT: s_mov_b64 s[12:13], exec 962; VERDE-NEXT: s_wqm_b64 exec, exec 963; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 964; VERDE-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 965; VERDE-NEXT: s_waitcnt vmcnt(0) 966; VERDE-NEXT: ; return to shader part epilog 967; 968; GFX6789-LABEL: sample_b_2d: 969; GFX6789: ; %bb.0: ; %main_body 970; GFX6789-NEXT: s_mov_b64 s[12:13], exec 971; GFX6789-NEXT: s_wqm_b64 exec, exec 972; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 973; GFX6789-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 974; GFX6789-NEXT: s_waitcnt vmcnt(0) 975; GFX6789-NEXT: ; return to shader part epilog 976; 977; GFX10PLUS-LABEL: sample_b_2d: 978; GFX10PLUS: ; %bb.0: ; %main_body 979; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 980; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 981; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 982; GFX10PLUS-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 983; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 984; GFX10PLUS-NEXT: ; return to shader part epilog 985main_body: 986 %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 987 ret <4 x float> %v 988} 989 990define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) { 991; VERDE-LABEL: sample_c_b_1d: 992; VERDE: ; %bb.0: ; %main_body 993; VERDE-NEXT: s_mov_b64 s[12:13], exec 994; VERDE-NEXT: s_wqm_b64 exec, exec 995; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 996; VERDE-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 997; VERDE-NEXT: s_waitcnt vmcnt(0) 998; VERDE-NEXT: ; return to shader part epilog 999; 1000; GFX6789-LABEL: sample_c_b_1d: 1001; GFX6789: ; %bb.0: ; %main_body 1002; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1003; GFX6789-NEXT: s_wqm_b64 exec, exec 1004; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1005; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1006; GFX6789-NEXT: s_waitcnt vmcnt(0) 1007; GFX6789-NEXT: ; return to shader part epilog 1008; 1009; GFX10PLUS-LABEL: sample_c_b_1d: 1010; GFX10PLUS: ; %bb.0: ; %main_body 1011; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1012; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1013; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1014; GFX10PLUS-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1015; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1016; GFX10PLUS-NEXT: ; return to shader part epilog 1017main_body: 1018 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1019 ret <4 x float> %v 1020} 1021 1022define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) { 1023; VERDE-LABEL: sample_c_b_2d: 1024; VERDE: ; %bb.0: ; %main_body 1025; VERDE-NEXT: s_mov_b64 s[12:13], exec 1026; VERDE-NEXT: s_wqm_b64 exec, exec 1027; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1028; VERDE-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1029; VERDE-NEXT: s_waitcnt vmcnt(0) 1030; VERDE-NEXT: ; return to shader part epilog 1031; 1032; GFX6789-LABEL: sample_c_b_2d: 1033; GFX6789: ; %bb.0: ; %main_body 1034; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1035; GFX6789-NEXT: s_wqm_b64 exec, exec 1036; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1037; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1038; GFX6789-NEXT: s_waitcnt vmcnt(0) 1039; GFX6789-NEXT: ; return to shader part epilog 1040; 1041; GFX10PLUS-LABEL: sample_c_b_2d: 1042; GFX10PLUS: ; %bb.0: ; %main_body 1043; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1044; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1045; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1046; GFX10PLUS-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1047; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1048; GFX10PLUS-NEXT: ; return to shader part epilog 1049main_body: 1050 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1051 ret <4 x float> %v 1052} 1053 1054define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) { 1055; VERDE-LABEL: sample_b_cl_1d: 1056; VERDE: ; %bb.0: ; %main_body 1057; VERDE-NEXT: s_mov_b64 s[12:13], exec 1058; VERDE-NEXT: s_wqm_b64 exec, exec 1059; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1060; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1061; VERDE-NEXT: s_waitcnt vmcnt(0) 1062; VERDE-NEXT: ; return to shader part epilog 1063; 1064; GFX6789-LABEL: sample_b_cl_1d: 1065; GFX6789: ; %bb.0: ; %main_body 1066; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1067; GFX6789-NEXT: s_wqm_b64 exec, exec 1068; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1069; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1070; GFX6789-NEXT: s_waitcnt vmcnt(0) 1071; GFX6789-NEXT: ; return to shader part epilog 1072; 1073; GFX10PLUS-LABEL: sample_b_cl_1d: 1074; GFX10PLUS: ; %bb.0: ; %main_body 1075; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1076; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1077; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1078; GFX10PLUS-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1079; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1080; GFX10PLUS-NEXT: ; return to shader part epilog 1081main_body: 1082 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1083 ret <4 x float> %v 1084} 1085 1086define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) { 1087; VERDE-LABEL: sample_b_cl_2d: 1088; VERDE: ; %bb.0: ; %main_body 1089; VERDE-NEXT: s_mov_b64 s[12:13], exec 1090; VERDE-NEXT: s_wqm_b64 exec, exec 1091; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1092; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1093; VERDE-NEXT: s_waitcnt vmcnt(0) 1094; VERDE-NEXT: ; return to shader part epilog 1095; 1096; GFX6789-LABEL: sample_b_cl_2d: 1097; GFX6789: ; %bb.0: ; %main_body 1098; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1099; GFX6789-NEXT: s_wqm_b64 exec, exec 1100; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1101; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1102; GFX6789-NEXT: s_waitcnt vmcnt(0) 1103; GFX6789-NEXT: ; return to shader part epilog 1104; 1105; GFX10PLUS-LABEL: sample_b_cl_2d: 1106; GFX10PLUS: ; %bb.0: ; %main_body 1107; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1108; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1109; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1110; GFX10PLUS-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1111; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1112; GFX10PLUS-NEXT: ; return to shader part epilog 1113main_body: 1114 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1115 ret <4 x float> %v 1116} 1117 1118define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) { 1119; VERDE-LABEL: sample_c_b_cl_1d: 1120; VERDE: ; %bb.0: ; %main_body 1121; VERDE-NEXT: s_mov_b64 s[12:13], exec 1122; VERDE-NEXT: s_wqm_b64 exec, exec 1123; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1124; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1125; VERDE-NEXT: s_waitcnt vmcnt(0) 1126; VERDE-NEXT: ; return to shader part epilog 1127; 1128; GFX6789-LABEL: sample_c_b_cl_1d: 1129; GFX6789: ; %bb.0: ; %main_body 1130; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1131; GFX6789-NEXT: s_wqm_b64 exec, exec 1132; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1133; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1134; GFX6789-NEXT: s_waitcnt vmcnt(0) 1135; GFX6789-NEXT: ; return to shader part epilog 1136; 1137; GFX10PLUS-LABEL: sample_c_b_cl_1d: 1138; GFX10PLUS: ; %bb.0: ; %main_body 1139; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1140; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1141; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1142; GFX10PLUS-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1143; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1144; GFX10PLUS-NEXT: ; return to shader part epilog 1145main_body: 1146 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1147 ret <4 x float> %v 1148} 1149 1150define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { 1151; VERDE-LABEL: sample_c_b_cl_2d: 1152; VERDE: ; %bb.0: ; %main_body 1153; VERDE-NEXT: s_mov_b64 s[12:13], exec 1154; VERDE-NEXT: s_wqm_b64 exec, exec 1155; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1156; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf 1157; VERDE-NEXT: s_waitcnt vmcnt(0) 1158; VERDE-NEXT: ; return to shader part epilog 1159; 1160; GFX6789-LABEL: sample_c_b_cl_2d: 1161; GFX6789: ; %bb.0: ; %main_body 1162; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1163; GFX6789-NEXT: s_wqm_b64 exec, exec 1164; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1165; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf 1166; GFX6789-NEXT: s_waitcnt vmcnt(0) 1167; GFX6789-NEXT: ; return to shader part epilog 1168; 1169; GFX10PLUS-LABEL: sample_c_b_cl_2d: 1170; GFX10PLUS: ; %bb.0: ; %main_body 1171; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1172; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1173; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1174; GFX10PLUS-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1175; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1176; GFX10PLUS-NEXT: ; return to shader part epilog 1177main_body: 1178 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1179 ret <4 x float> %v 1180} 1181 1182define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { 1183; VERDE-LABEL: sample_d_1d: 1184; VERDE: ; %bb.0: ; %main_body 1185; VERDE-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1186; VERDE-NEXT: s_waitcnt vmcnt(0) 1187; VERDE-NEXT: ; return to shader part epilog 1188; 1189; GFX6789-LABEL: sample_d_1d: 1190; GFX6789: ; %bb.0: ; %main_body 1191; GFX6789-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1192; GFX6789-NEXT: s_waitcnt vmcnt(0) 1193; GFX6789-NEXT: ; return to shader part epilog 1194; 1195; GFX10PLUS-LABEL: sample_d_1d: 1196; GFX10PLUS: ; %bb.0: ; %main_body 1197; GFX10PLUS-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1198; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1199; GFX10PLUS-NEXT: ; return to shader part epilog 1200main_body: 1201 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1202 ret <4 x float> %v 1203} 1204 1205define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { 1206; VERDE-LABEL: sample_d_2d: 1207; VERDE: ; %bb.0: ; %main_body 1208; VERDE-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf 1209; VERDE-NEXT: s_waitcnt vmcnt(0) 1210; VERDE-NEXT: ; return to shader part epilog 1211; 1212; GFX6789-LABEL: sample_d_2d: 1213; GFX6789: ; %bb.0: ; %main_body 1214; GFX6789-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf 1215; GFX6789-NEXT: s_waitcnt vmcnt(0) 1216; GFX6789-NEXT: ; return to shader part epilog 1217; 1218; GFX10PLUS-LABEL: sample_d_2d: 1219; GFX10PLUS: ; %bb.0: ; %main_body 1220; GFX10PLUS-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1221; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1222; GFX10PLUS-NEXT: ; return to shader part epilog 1223main_body: 1224 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1225 ret <4 x float> %v 1226} 1227 1228define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { 1229; VERDE-LABEL: sample_c_d_1d: 1230; VERDE: ; %bb.0: ; %main_body 1231; VERDE-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1232; VERDE-NEXT: s_waitcnt vmcnt(0) 1233; VERDE-NEXT: ; return to shader part epilog 1234; 1235; GFX6789-LABEL: sample_c_d_1d: 1236; GFX6789: ; %bb.0: ; %main_body 1237; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1238; GFX6789-NEXT: s_waitcnt vmcnt(0) 1239; GFX6789-NEXT: ; return to shader part epilog 1240; 1241; GFX10PLUS-LABEL: sample_c_d_1d: 1242; GFX10PLUS: ; %bb.0: ; %main_body 1243; GFX10PLUS-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1244; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1245; GFX10PLUS-NEXT: ; return to shader part epilog 1246main_body: 1247 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1248 ret <4 x float> %v 1249} 1250 1251define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { 1252; VERDE-LABEL: sample_c_d_2d: 1253; VERDE: ; %bb.0: ; %main_body 1254; VERDE-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf 1255; VERDE-NEXT: s_waitcnt vmcnt(0) 1256; VERDE-NEXT: ; return to shader part epilog 1257; 1258; GFX6789-LABEL: sample_c_d_2d: 1259; GFX6789: ; %bb.0: ; %main_body 1260; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf 1261; GFX6789-NEXT: s_waitcnt vmcnt(0) 1262; GFX6789-NEXT: ; return to shader part epilog 1263; 1264; GFX10PLUS-LABEL: sample_c_d_2d: 1265; GFX10PLUS: ; %bb.0: ; %main_body 1266; GFX10PLUS-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1267; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1268; GFX10PLUS-NEXT: ; return to shader part epilog 1269main_body: 1270 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1271 ret <4 x float> %v 1272} 1273 1274define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { 1275; VERDE-LABEL: sample_d_cl_1d: 1276; VERDE: ; %bb.0: ; %main_body 1277; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1278; VERDE-NEXT: s_waitcnt vmcnt(0) 1279; VERDE-NEXT: ; return to shader part epilog 1280; 1281; GFX6789-LABEL: sample_d_cl_1d: 1282; GFX6789: ; %bb.0: ; %main_body 1283; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1284; GFX6789-NEXT: s_waitcnt vmcnt(0) 1285; GFX6789-NEXT: ; return to shader part epilog 1286; 1287; GFX10PLUS-LABEL: sample_d_cl_1d: 1288; GFX10PLUS: ; %bb.0: ; %main_body 1289; GFX10PLUS-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1290; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1291; GFX10PLUS-NEXT: ; return to shader part epilog 1292main_body: 1293 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1294 ret <4 x float> %v 1295} 1296 1297define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { 1298; VERDE-LABEL: sample_d_cl_2d: 1299; VERDE: ; %bb.0: ; %main_body 1300; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf 1301; VERDE-NEXT: s_waitcnt vmcnt(0) 1302; VERDE-NEXT: ; return to shader part epilog 1303; 1304; GFX6789-LABEL: sample_d_cl_2d: 1305; GFX6789: ; %bb.0: ; %main_body 1306; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf 1307; GFX6789-NEXT: s_waitcnt vmcnt(0) 1308; GFX6789-NEXT: ; return to shader part epilog 1309; 1310; GFX10PLUS-LABEL: sample_d_cl_2d: 1311; GFX10PLUS: ; %bb.0: ; %main_body 1312; GFX10PLUS-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1313; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1314; GFX10PLUS-NEXT: ; return to shader part epilog 1315main_body: 1316 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1317 ret <4 x float> %v 1318} 1319 1320define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { 1321; VERDE-LABEL: sample_c_d_cl_1d: 1322; VERDE: ; %bb.0: ; %main_body 1323; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf 1324; VERDE-NEXT: s_waitcnt vmcnt(0) 1325; VERDE-NEXT: ; return to shader part epilog 1326; 1327; GFX6789-LABEL: sample_c_d_cl_1d: 1328; GFX6789: ; %bb.0: ; %main_body 1329; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf 1330; GFX6789-NEXT: s_waitcnt vmcnt(0) 1331; GFX6789-NEXT: ; return to shader part epilog 1332; 1333; GFX10PLUS-LABEL: sample_c_d_cl_1d: 1334; GFX10PLUS: ; %bb.0: ; %main_body 1335; GFX10PLUS-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1336; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1337; GFX10PLUS-NEXT: ; return to shader part epilog 1338main_body: 1339 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1340 ret <4 x float> %v 1341} 1342 1343define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { 1344; VERDE-LABEL: sample_c_d_cl_2d: 1345; VERDE: ; %bb.0: ; %main_body 1346; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf 1347; VERDE-NEXT: s_waitcnt vmcnt(0) 1348; VERDE-NEXT: ; return to shader part epilog 1349; 1350; GFX6789-LABEL: sample_c_d_cl_2d: 1351; GFX6789: ; %bb.0: ; %main_body 1352; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf 1353; GFX6789-NEXT: s_waitcnt vmcnt(0) 1354; GFX6789-NEXT: ; return to shader part epilog 1355; 1356; GFX10PLUS-LABEL: sample_c_d_cl_2d: 1357; GFX10PLUS: ; %bb.0: ; %main_body 1358; GFX10PLUS-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1359; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1360; GFX10PLUS-NEXT: ; return to shader part epilog 1361main_body: 1362 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1363 ret <4 x float> %v 1364} 1365 1366define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { 1367; VERDE-LABEL: sample_l_1d: 1368; VERDE: ; %bb.0: ; %main_body 1369; VERDE-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1370; VERDE-NEXT: s_waitcnt vmcnt(0) 1371; VERDE-NEXT: ; return to shader part epilog 1372; 1373; GFX6789-LABEL: sample_l_1d: 1374; GFX6789: ; %bb.0: ; %main_body 1375; GFX6789-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1376; GFX6789-NEXT: s_waitcnt vmcnt(0) 1377; GFX6789-NEXT: ; return to shader part epilog 1378; 1379; GFX10PLUS-LABEL: sample_l_1d: 1380; GFX10PLUS: ; %bb.0: ; %main_body 1381; GFX10PLUS-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1382; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1383; GFX10PLUS-NEXT: ; return to shader part epilog 1384main_body: 1385 %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1386 ret <4 x float> %v 1387} 1388 1389define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { 1390; VERDE-LABEL: sample_l_2d: 1391; VERDE: ; %bb.0: ; %main_body 1392; VERDE-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1393; VERDE-NEXT: s_waitcnt vmcnt(0) 1394; VERDE-NEXT: ; return to shader part epilog 1395; 1396; GFX6789-LABEL: sample_l_2d: 1397; GFX6789: ; %bb.0: ; %main_body 1398; GFX6789-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1399; GFX6789-NEXT: s_waitcnt vmcnt(0) 1400; GFX6789-NEXT: ; return to shader part epilog 1401; 1402; GFX10PLUS-LABEL: sample_l_2d: 1403; GFX10PLUS: ; %bb.0: ; %main_body 1404; GFX10PLUS-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1405; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1406; GFX10PLUS-NEXT: ; return to shader part epilog 1407main_body: 1408 %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1409 ret <4 x float> %v 1410} 1411 1412define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { 1413; VERDE-LABEL: sample_c_l_1d: 1414; VERDE: ; %bb.0: ; %main_body 1415; VERDE-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1416; VERDE-NEXT: s_waitcnt vmcnt(0) 1417; VERDE-NEXT: ; return to shader part epilog 1418; 1419; GFX6789-LABEL: sample_c_l_1d: 1420; GFX6789: ; %bb.0: ; %main_body 1421; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1422; GFX6789-NEXT: s_waitcnt vmcnt(0) 1423; GFX6789-NEXT: ; return to shader part epilog 1424; 1425; GFX10PLUS-LABEL: sample_c_l_1d: 1426; GFX10PLUS: ; %bb.0: ; %main_body 1427; GFX10PLUS-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1428; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1429; GFX10PLUS-NEXT: ; return to shader part epilog 1430main_body: 1431 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1432 ret <4 x float> %v 1433} 1434 1435define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { 1436; VERDE-LABEL: sample_c_l_2d: 1437; VERDE: ; %bb.0: ; %main_body 1438; VERDE-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1439; VERDE-NEXT: s_waitcnt vmcnt(0) 1440; VERDE-NEXT: ; return to shader part epilog 1441; 1442; GFX6789-LABEL: sample_c_l_2d: 1443; GFX6789: ; %bb.0: ; %main_body 1444; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1445; GFX6789-NEXT: s_waitcnt vmcnt(0) 1446; GFX6789-NEXT: ; return to shader part epilog 1447; 1448; GFX10PLUS-LABEL: sample_c_l_2d: 1449; GFX10PLUS: ; %bb.0: ; %main_body 1450; GFX10PLUS-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1451; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1452; GFX10PLUS-NEXT: ; return to shader part epilog 1453main_body: 1454 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1455 ret <4 x float> %v 1456} 1457 1458define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1459; VERDE-LABEL: sample_lz_1d: 1460; VERDE: ; %bb.0: ; %main_body 1461; VERDE-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf 1462; VERDE-NEXT: s_waitcnt vmcnt(0) 1463; VERDE-NEXT: ; return to shader part epilog 1464; 1465; GFX6789-LABEL: sample_lz_1d: 1466; GFX6789: ; %bb.0: ; %main_body 1467; GFX6789-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf 1468; GFX6789-NEXT: s_waitcnt vmcnt(0) 1469; GFX6789-NEXT: ; return to shader part epilog 1470; 1471; GFX10PLUS-LABEL: sample_lz_1d: 1472; GFX10PLUS: ; %bb.0: ; %main_body 1473; GFX10PLUS-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1474; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1475; GFX10PLUS-NEXT: ; return to shader part epilog 1476main_body: 1477 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1478 ret <4 x float> %v 1479} 1480 1481define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 1482; VERDE-LABEL: sample_lz_2d: 1483; VERDE: ; %bb.0: ; %main_body 1484; VERDE-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1485; VERDE-NEXT: s_waitcnt vmcnt(0) 1486; VERDE-NEXT: ; return to shader part epilog 1487; 1488; GFX6789-LABEL: sample_lz_2d: 1489; GFX6789: ; %bb.0: ; %main_body 1490; GFX6789-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1491; GFX6789-NEXT: s_waitcnt vmcnt(0) 1492; GFX6789-NEXT: ; return to shader part epilog 1493; 1494; GFX10PLUS-LABEL: sample_lz_2d: 1495; GFX10PLUS: ; %bb.0: ; %main_body 1496; GFX10PLUS-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1497; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1498; GFX10PLUS-NEXT: ; return to shader part epilog 1499main_body: 1500 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1501 ret <4 x float> %v 1502} 1503 1504define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { 1505; VERDE-LABEL: sample_c_lz_1d: 1506; VERDE: ; %bb.0: ; %main_body 1507; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1508; VERDE-NEXT: s_waitcnt vmcnt(0) 1509; VERDE-NEXT: ; return to shader part epilog 1510; 1511; GFX6789-LABEL: sample_c_lz_1d: 1512; GFX6789: ; %bb.0: ; %main_body 1513; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1514; GFX6789-NEXT: s_waitcnt vmcnt(0) 1515; GFX6789-NEXT: ; return to shader part epilog 1516; 1517; GFX10PLUS-LABEL: sample_c_lz_1d: 1518; GFX10PLUS: ; %bb.0: ; %main_body 1519; GFX10PLUS-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1520; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1521; GFX10PLUS-NEXT: ; return to shader part epilog 1522main_body: 1523 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1524 ret <4 x float> %v 1525} 1526 1527define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { 1528; VERDE-LABEL: sample_c_lz_2d: 1529; VERDE: ; %bb.0: ; %main_body 1530; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1531; VERDE-NEXT: s_waitcnt vmcnt(0) 1532; VERDE-NEXT: ; return to shader part epilog 1533; 1534; GFX6789-LABEL: sample_c_lz_2d: 1535; GFX6789: ; %bb.0: ; %main_body 1536; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1537; GFX6789-NEXT: s_waitcnt vmcnt(0) 1538; GFX6789-NEXT: ; return to shader part epilog 1539; 1540; GFX10PLUS-LABEL: sample_c_lz_2d: 1541; GFX10PLUS: ; %bb.0: ; %main_body 1542; GFX10PLUS-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1543; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1544; GFX10PLUS-NEXT: ; return to shader part epilog 1545main_body: 1546 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1547 ret <4 x float> %v 1548} 1549 1550define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { 1551; VERDE-LABEL: sample_c_d_o_2darray_V1: 1552; VERDE: ; %bb.0: ; %main_body 1553; VERDE-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da 1554; VERDE-NEXT: s_waitcnt vmcnt(0) 1555; VERDE-NEXT: ; return to shader part epilog 1556; 1557; GFX6789-LABEL: sample_c_d_o_2darray_V1: 1558; GFX6789: ; %bb.0: ; %main_body 1559; GFX6789-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da 1560; GFX6789-NEXT: s_waitcnt vmcnt(0) 1561; GFX6789-NEXT: ; return to shader part epilog 1562; 1563; GFX10PLUS-LABEL: sample_c_d_o_2darray_V1: 1564; GFX10PLUS: ; %bb.0: ; %main_body 1565; GFX10PLUS-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY 1566; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1567; GFX10PLUS-NEXT: ; return to shader part epilog 1568main_body: 1569 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1570 ret float %v 1571} 1572 1573define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, i32 addrspace(1)* inreg %out) { 1574; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe: 1575; VERDE: ; %bb.0: ; %main_body 1576; VERDE-NEXT: v_mov_b32_e32 v9, 0 1577; VERDE-NEXT: v_mov_b32_e32 v10, v9 1578; VERDE-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da 1579; VERDE-NEXT: s_mov_b32 s15, 0xf000 1580; VERDE-NEXT: s_mov_b32 s14, -1 1581; VERDE-NEXT: s_waitcnt vmcnt(0) 1582; VERDE-NEXT: v_mov_b32_e32 v0, v9 1583; VERDE-NEXT: buffer_store_dword v10, off, s[12:15], 0 1584; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1585; VERDE-NEXT: ; return to shader part epilog 1586; 1587; GFX6789-LABEL: sample_c_d_o_2darray_V1_tfe: 1588; GFX6789: ; %bb.0: ; %main_body 1589; GFX6789-NEXT: v_mov_b32_e32 v11, 0 1590; GFX6789-NEXT: v_mov_b32_e32 v12, v11 1591; GFX6789-NEXT: v_mov_b32_e32 v9, v11 1592; GFX6789-NEXT: v_mov_b32_e32 v10, v12 1593; GFX6789-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da 1594; GFX6789-NEXT: s_waitcnt vmcnt(0) 1595; GFX6789-NEXT: v_mov_b32_e32 v0, v9 1596; GFX6789-NEXT: global_store_dword v11, v10, s[12:13] 1597; GFX6789-NEXT: s_waitcnt vmcnt(0) 1598; GFX6789-NEXT: ; return to shader part epilog 1599; 1600; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe: 1601; GFX10: ; %bb.0: ; %main_body 1602; GFX10-NEXT: v_mov_b32_e32 v11, 0 1603; GFX10-NEXT: v_mov_b32_e32 v12, v11 1604; GFX10-NEXT: v_mov_b32_e32 v9, v11 1605; GFX10-NEXT: v_mov_b32_e32 v10, v12 1606; GFX10-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe 1607; GFX10-NEXT: s_waitcnt vmcnt(0) 1608; GFX10-NEXT: v_mov_b32_e32 v0, v9 1609; GFX10-NEXT: global_store_dword v11, v10, s[12:13] 1610; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1611; GFX10-NEXT: ; return to shader part epilog 1612; 1613; GFX11-LABEL: sample_c_d_o_2darray_V1_tfe: 1614; GFX11: ; %bb.0: ; %main_body 1615; GFX11-NEXT: v_mov_b32_e32 v11, 0 1616; GFX11-NEXT: v_mov_b32_e32 v12, v11 1617; GFX11-NEXT: v_mov_b32_e32 v9, v11 1618; GFX11-NEXT: v_mov_b32_e32 v10, v12 1619; GFX11-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe 1620; GFX11-NEXT: s_waitcnt vmcnt(0) 1621; GFX11-NEXT: v_mov_b32_e32 v0, v9 1622; GFX11-NEXT: global_store_b32 v11, v10, s[12:13] 1623; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1624; GFX11-NEXT: ; return to shader part epilog 1625main_body: 1626 %v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 1627 %v.vec = extractvalue {float, i32} %v, 0 1628 %v.err = extractvalue {float, i32} %v, 1 1629 store i32 %v.err, i32 addrspace(1)* %out, align 4 1630 ret float %v.vec 1631} 1632 1633define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { 1634; VERDE-LABEL: sample_c_d_o_2darray_V2: 1635; VERDE: ; %bb.0: ; %main_body 1636; VERDE-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da 1637; VERDE-NEXT: s_waitcnt vmcnt(0) 1638; VERDE-NEXT: ; return to shader part epilog 1639; 1640; GFX6789-LABEL: sample_c_d_o_2darray_V2: 1641; GFX6789: ; %bb.0: ; %main_body 1642; GFX6789-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da 1643; GFX6789-NEXT: s_waitcnt vmcnt(0) 1644; GFX6789-NEXT: ; return to shader part epilog 1645; 1646; GFX10PLUS-LABEL: sample_c_d_o_2darray_V2: 1647; GFX10PLUS: ; %bb.0: ; %main_body 1648; GFX10PLUS-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY 1649; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1650; GFX10PLUS-NEXT: ; return to shader part epilog 1651main_body: 1652 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1653 ret <2 x float> %v 1654} 1655 1656define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { 1657; VERDE-LABEL: sample_c_d_o_2darray_V2_tfe: 1658; VERDE: ; %bb.0: ; %main_body 1659; VERDE-NEXT: v_mov_b32_e32 v9, 0 1660; VERDE-NEXT: v_mov_b32_e32 v10, v9 1661; VERDE-NEXT: v_mov_b32_e32 v11, v9 1662; VERDE-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da 1663; VERDE-NEXT: s_waitcnt vmcnt(0) 1664; VERDE-NEXT: v_mov_b32_e32 v0, v9 1665; VERDE-NEXT: v_mov_b32_e32 v1, v10 1666; VERDE-NEXT: v_mov_b32_e32 v2, v11 1667; VERDE-NEXT: ; return to shader part epilog 1668; 1669; GFX6789-LABEL: sample_c_d_o_2darray_V2_tfe: 1670; GFX6789: ; %bb.0: ; %main_body 1671; GFX6789-NEXT: v_mov_b32_e32 v9, 0 1672; GFX6789-NEXT: v_mov_b32_e32 v10, v9 1673; GFX6789-NEXT: v_mov_b32_e32 v11, v9 1674; GFX6789-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da 1675; GFX6789-NEXT: s_waitcnt vmcnt(0) 1676; GFX6789-NEXT: v_mov_b32_e32 v0, v9 1677; GFX6789-NEXT: v_mov_b32_e32 v1, v10 1678; GFX6789-NEXT: v_mov_b32_e32 v2, v11 1679; GFX6789-NEXT: ; return to shader part epilog 1680; 1681; GFX10PLUS-LABEL: sample_c_d_o_2darray_V2_tfe: 1682; GFX10PLUS: ; %bb.0: ; %main_body 1683; GFX10PLUS-NEXT: v_mov_b32_e32 v9, 0 1684; GFX10PLUS-NEXT: v_mov_b32_e32 v10, v9 1685; GFX10PLUS-NEXT: v_mov_b32_e32 v11, v9 1686; GFX10PLUS-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe 1687; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1688; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v9 1689; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v10 1690; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v11 1691; GFX10PLUS-NEXT: ; return to shader part epilog 1692main_body: 1693 %v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 1694 %v.vec = extractvalue {<2 x float>, i32} %v, 0 1695 %v.f1 = extractelement <2 x float> %v.vec, i32 0 1696 %v.f2 = extractelement <2 x float> %v.vec, i32 1 1697 %v.err = extractvalue {<2 x float>, i32} %v, 1 1698 %v.errf = bitcast i32 %v.err to float 1699 %res.0 = insertelement <4 x float> undef, float %v.f1, i32 0 1700 %res.1 = insertelement <4 x float> %res.0, float %v.f2, i32 1 1701 %res.2 = insertelement <4 x float> %res.1, float %v.errf, i32 2 1702 ret <4 x float> %res.2 1703} 1704 1705define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1706; VERDE-LABEL: sample_1d_unorm: 1707; VERDE: ; %bb.0: ; %main_body 1708; VERDE-NEXT: s_mov_b64 s[12:13], exec 1709; VERDE-NEXT: s_wqm_b64 exec, exec 1710; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1711; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm 1712; VERDE-NEXT: s_waitcnt vmcnt(0) 1713; VERDE-NEXT: ; return to shader part epilog 1714; 1715; GFX6789-LABEL: sample_1d_unorm: 1716; GFX6789: ; %bb.0: ; %main_body 1717; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1718; GFX6789-NEXT: s_wqm_b64 exec, exec 1719; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1720; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm 1721; GFX6789-NEXT: s_waitcnt vmcnt(0) 1722; GFX6789-NEXT: ; return to shader part epilog 1723; 1724; GFX10PLUS-LABEL: sample_1d_unorm: 1725; GFX10PLUS: ; %bb.0: ; %main_body 1726; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1727; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1728; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1729; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm 1730; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1731; GFX10PLUS-NEXT: ; return to shader part epilog 1732main_body: 1733 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0) 1734 ret <4 x float> %v 1735} 1736 1737define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1738; VERDE-LABEL: sample_1d_glc: 1739; VERDE: ; %bb.0: ; %main_body 1740; VERDE-NEXT: s_mov_b64 s[12:13], exec 1741; VERDE-NEXT: s_wqm_b64 exec, exec 1742; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1743; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc 1744; VERDE-NEXT: s_waitcnt vmcnt(0) 1745; VERDE-NEXT: ; return to shader part epilog 1746; 1747; GFX6789-LABEL: sample_1d_glc: 1748; GFX6789: ; %bb.0: ; %main_body 1749; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1750; GFX6789-NEXT: s_wqm_b64 exec, exec 1751; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1752; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc 1753; GFX6789-NEXT: s_waitcnt vmcnt(0) 1754; GFX6789-NEXT: ; return to shader part epilog 1755; 1756; GFX10PLUS-LABEL: sample_1d_glc: 1757; GFX10PLUS: ; %bb.0: ; %main_body 1758; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1759; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1760; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1761; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc 1762; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1763; GFX10PLUS-NEXT: ; return to shader part epilog 1764main_body: 1765 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1) 1766 ret <4 x float> %v 1767} 1768 1769define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1770; VERDE-LABEL: sample_1d_slc: 1771; VERDE: ; %bb.0: ; %main_body 1772; VERDE-NEXT: s_mov_b64 s[12:13], exec 1773; VERDE-NEXT: s_wqm_b64 exec, exec 1774; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1775; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc 1776; VERDE-NEXT: s_waitcnt vmcnt(0) 1777; VERDE-NEXT: ; return to shader part epilog 1778; 1779; GFX6789-LABEL: sample_1d_slc: 1780; GFX6789: ; %bb.0: ; %main_body 1781; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1782; GFX6789-NEXT: s_wqm_b64 exec, exec 1783; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1784; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc 1785; GFX6789-NEXT: s_waitcnt vmcnt(0) 1786; GFX6789-NEXT: ; return to shader part epilog 1787; 1788; GFX10PLUS-LABEL: sample_1d_slc: 1789; GFX10PLUS: ; %bb.0: ; %main_body 1790; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1791; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1792; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1793; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc 1794; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1795; GFX10PLUS-NEXT: ; return to shader part epilog 1796main_body: 1797 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 2) 1798 ret <4 x float> %v 1799} 1800 1801define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1802; VERDE-LABEL: sample_1d_glc_slc: 1803; VERDE: ; %bb.0: ; %main_body 1804; VERDE-NEXT: s_mov_b64 s[12:13], exec 1805; VERDE-NEXT: s_wqm_b64 exec, exec 1806; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1807; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc 1808; VERDE-NEXT: s_waitcnt vmcnt(0) 1809; VERDE-NEXT: ; return to shader part epilog 1810; 1811; GFX6789-LABEL: sample_1d_glc_slc: 1812; GFX6789: ; %bb.0: ; %main_body 1813; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1814; GFX6789-NEXT: s_wqm_b64 exec, exec 1815; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1816; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc 1817; GFX6789-NEXT: s_waitcnt vmcnt(0) 1818; GFX6789-NEXT: ; return to shader part epilog 1819; 1820; GFX10PLUS-LABEL: sample_1d_glc_slc: 1821; GFX10PLUS: ; %bb.0: ; %main_body 1822; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1823; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1824; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1825; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc 1826; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1827; GFX10PLUS-NEXT: ; return to shader part epilog 1828main_body: 1829 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 3) 1830 ret <4 x float> %v 1831} 1832 1833define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1834; VERDE-LABEL: adjust_writemask_sample_0: 1835; VERDE: ; %bb.0: ; %main_body 1836; VERDE-NEXT: s_mov_b64 s[12:13], exec 1837; VERDE-NEXT: s_wqm_b64 exec, exec 1838; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1839; VERDE-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 1840; VERDE-NEXT: s_waitcnt vmcnt(0) 1841; VERDE-NEXT: ; return to shader part epilog 1842; 1843; GFX6789-LABEL: adjust_writemask_sample_0: 1844; GFX6789: ; %bb.0: ; %main_body 1845; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1846; GFX6789-NEXT: s_wqm_b64 exec, exec 1847; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1848; GFX6789-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 1849; GFX6789-NEXT: s_waitcnt vmcnt(0) 1850; GFX6789-NEXT: ; return to shader part epilog 1851; 1852; GFX10PLUS-LABEL: adjust_writemask_sample_0: 1853; GFX10PLUS: ; %bb.0: ; %main_body 1854; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1855; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1856; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1857; GFX10PLUS-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D 1858; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1859; GFX10PLUS-NEXT: ; return to shader part epilog 1860main_body: 1861 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1862 %elt0 = extractelement <4 x float> %r, i32 0 1863 ret float %elt0 1864} 1865 1866define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1867; VERDE-LABEL: adjust_writemask_sample_01: 1868; VERDE: ; %bb.0: ; %main_body 1869; VERDE-NEXT: s_mov_b64 s[12:13], exec 1870; VERDE-NEXT: s_wqm_b64 exec, exec 1871; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1872; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 1873; VERDE-NEXT: s_waitcnt vmcnt(0) 1874; VERDE-NEXT: ; return to shader part epilog 1875; 1876; GFX6789-LABEL: adjust_writemask_sample_01: 1877; GFX6789: ; %bb.0: ; %main_body 1878; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1879; GFX6789-NEXT: s_wqm_b64 exec, exec 1880; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1881; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 1882; GFX6789-NEXT: s_waitcnt vmcnt(0) 1883; GFX6789-NEXT: ; return to shader part epilog 1884; 1885; GFX10PLUS-LABEL: adjust_writemask_sample_01: 1886; GFX10PLUS: ; %bb.0: ; %main_body 1887; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1888; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1889; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1890; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D 1891; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1892; GFX10PLUS-NEXT: ; return to shader part epilog 1893main_body: 1894 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1895 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1> 1896 ret <2 x float> %out 1897} 1898 1899define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1900; VERDE-LABEL: adjust_writemask_sample_012: 1901; VERDE: ; %bb.0: ; %main_body 1902; VERDE-NEXT: s_mov_b64 s[12:13], exec 1903; VERDE-NEXT: s_wqm_b64 exec, exec 1904; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1905; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 1906; VERDE-NEXT: s_waitcnt vmcnt(0) 1907; VERDE-NEXT: ; return to shader part epilog 1908; 1909; GFX6789-LABEL: adjust_writemask_sample_012: 1910; GFX6789: ; %bb.0: ; %main_body 1911; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1912; GFX6789-NEXT: s_wqm_b64 exec, exec 1913; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1914; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 1915; GFX6789-NEXT: s_waitcnt vmcnt(0) 1916; GFX6789-NEXT: ; return to shader part epilog 1917; 1918; GFX10PLUS-LABEL: adjust_writemask_sample_012: 1919; GFX10PLUS: ; %bb.0: ; %main_body 1920; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1921; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1922; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1923; GFX10PLUS-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_1D 1924; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1925; GFX10PLUS-NEXT: ; return to shader part epilog 1926main_body: 1927 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1928 %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 1929 ret <3 x float> %out 1930} 1931 1932define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1933; VERDE-LABEL: adjust_writemask_sample_12: 1934; VERDE: ; %bb.0: ; %main_body 1935; VERDE-NEXT: s_mov_b64 s[12:13], exec 1936; VERDE-NEXT: s_wqm_b64 exec, exec 1937; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1938; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 1939; VERDE-NEXT: s_waitcnt vmcnt(0) 1940; VERDE-NEXT: ; return to shader part epilog 1941; 1942; GFX6789-LABEL: adjust_writemask_sample_12: 1943; GFX6789: ; %bb.0: ; %main_body 1944; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1945; GFX6789-NEXT: s_wqm_b64 exec, exec 1946; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1947; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 1948; GFX6789-NEXT: s_waitcnt vmcnt(0) 1949; GFX6789-NEXT: ; return to shader part epilog 1950; 1951; GFX10PLUS-LABEL: adjust_writemask_sample_12: 1952; GFX10PLUS: ; %bb.0: ; %main_body 1953; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1954; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1955; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1956; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D 1957; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1958; GFX10PLUS-NEXT: ; return to shader part epilog 1959main_body: 1960 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1961 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2> 1962 ret <2 x float> %out 1963} 1964 1965define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1966; VERDE-LABEL: adjust_writemask_sample_03: 1967; VERDE: ; %bb.0: ; %main_body 1968; VERDE-NEXT: s_mov_b64 s[12:13], exec 1969; VERDE-NEXT: s_wqm_b64 exec, exec 1970; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1971; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 1972; VERDE-NEXT: s_waitcnt vmcnt(0) 1973; VERDE-NEXT: ; return to shader part epilog 1974; 1975; GFX6789-LABEL: adjust_writemask_sample_03: 1976; GFX6789: ; %bb.0: ; %main_body 1977; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1978; GFX6789-NEXT: s_wqm_b64 exec, exec 1979; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1980; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 1981; GFX6789-NEXT: s_waitcnt vmcnt(0) 1982; GFX6789-NEXT: ; return to shader part epilog 1983; 1984; GFX10PLUS-LABEL: adjust_writemask_sample_03: 1985; GFX10PLUS: ; %bb.0: ; %main_body 1986; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1987; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1988; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1989; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 dim:SQ_RSRC_IMG_1D 1990; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1991; GFX10PLUS-NEXT: ; return to shader part epilog 1992main_body: 1993 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1994 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3> 1995 ret <2 x float> %out 1996} 1997 1998define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1999; VERDE-LABEL: adjust_writemask_sample_13: 2000; VERDE: ; %bb.0: ; %main_body 2001; VERDE-NEXT: s_mov_b64 s[12:13], exec 2002; VERDE-NEXT: s_wqm_b64 exec, exec 2003; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2004; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa 2005; VERDE-NEXT: s_waitcnt vmcnt(0) 2006; VERDE-NEXT: ; return to shader part epilog 2007; 2008; GFX6789-LABEL: adjust_writemask_sample_13: 2009; GFX6789: ; %bb.0: ; %main_body 2010; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2011; GFX6789-NEXT: s_wqm_b64 exec, exec 2012; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2013; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa 2014; GFX6789-NEXT: s_waitcnt vmcnt(0) 2015; GFX6789-NEXT: ; return to shader part epilog 2016; 2017; GFX10PLUS-LABEL: adjust_writemask_sample_13: 2018; GFX10PLUS: ; %bb.0: ; %main_body 2019; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2020; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2021; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2022; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D 2023; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2024; GFX10PLUS-NEXT: ; return to shader part epilog 2025main_body: 2026 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2027 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3> 2028 ret <2 x float> %out 2029} 2030 2031define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2032; VERDE-LABEL: adjust_writemask_sample_123: 2033; VERDE: ; %bb.0: ; %main_body 2034; VERDE-NEXT: s_mov_b64 s[12:13], exec 2035; VERDE-NEXT: s_wqm_b64 exec, exec 2036; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2037; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe 2038; VERDE-NEXT: s_waitcnt vmcnt(0) 2039; VERDE-NEXT: ; return to shader part epilog 2040; 2041; GFX6789-LABEL: adjust_writemask_sample_123: 2042; GFX6789: ; %bb.0: ; %main_body 2043; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2044; GFX6789-NEXT: s_wqm_b64 exec, exec 2045; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2046; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe 2047; GFX6789-NEXT: s_waitcnt vmcnt(0) 2048; GFX6789-NEXT: ; return to shader part epilog 2049; 2050; GFX10PLUS-LABEL: adjust_writemask_sample_123: 2051; GFX10PLUS: ; %bb.0: ; %main_body 2052; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2053; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2054; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2055; GFX10PLUS-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe dim:SQ_RSRC_IMG_1D 2056; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2057; GFX10PLUS-NEXT: ; return to shader part epilog 2058main_body: 2059 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2060 %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 2061 ret <3 x float> %out 2062} 2063 2064define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2065; VERDE-LABEL: adjust_writemask_sample_none_enabled: 2066; VERDE: ; %bb.0: ; %main_body 2067; VERDE-NEXT: ; return to shader part epilog 2068; 2069; GFX6789-LABEL: adjust_writemask_sample_none_enabled: 2070; GFX6789: ; %bb.0: ; %main_body 2071; GFX6789-NEXT: ; return to shader part epilog 2072; 2073; GFX10PLUS-LABEL: adjust_writemask_sample_none_enabled: 2074; GFX10PLUS: ; %bb.0: ; %main_body 2075; GFX10PLUS-NEXT: ; return to shader part epilog 2076main_body: 2077 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2078 ret <4 x float> %r 2079} 2080 2081define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2082; VERDE-LABEL: adjust_writemask_sample_123_to_12: 2083; VERDE: ; %bb.0: ; %main_body 2084; VERDE-NEXT: s_mov_b64 s[12:13], exec 2085; VERDE-NEXT: s_wqm_b64 exec, exec 2086; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2087; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 2088; VERDE-NEXT: s_waitcnt vmcnt(0) 2089; VERDE-NEXT: ; return to shader part epilog 2090; 2091; GFX6789-LABEL: adjust_writemask_sample_123_to_12: 2092; GFX6789: ; %bb.0: ; %main_body 2093; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2094; GFX6789-NEXT: s_wqm_b64 exec, exec 2095; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2096; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 2097; GFX6789-NEXT: s_waitcnt vmcnt(0) 2098; GFX6789-NEXT: ; return to shader part epilog 2099; 2100; GFX10PLUS-LABEL: adjust_writemask_sample_123_to_12: 2101; GFX10PLUS: ; %bb.0: ; %main_body 2102; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2103; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2104; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2105; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D 2106; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2107; GFX10PLUS-NEXT: ; return to shader part epilog 2108main_body: 2109 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2110 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1> 2111 ret <2 x float> %out 2112} 2113 2114define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2115; VERDE-LABEL: adjust_writemask_sample_013_to_13: 2116; VERDE: ; %bb.0: ; %main_body 2117; VERDE-NEXT: s_mov_b64 s[12:13], exec 2118; VERDE-NEXT: s_wqm_b64 exec, exec 2119; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2120; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa 2121; VERDE-NEXT: s_waitcnt vmcnt(0) 2122; VERDE-NEXT: ; return to shader part epilog 2123; 2124; GFX6789-LABEL: adjust_writemask_sample_013_to_13: 2125; GFX6789: ; %bb.0: ; %main_body 2126; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2127; GFX6789-NEXT: s_wqm_b64 exec, exec 2128; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2129; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa 2130; GFX6789-NEXT: s_waitcnt vmcnt(0) 2131; GFX6789-NEXT: ; return to shader part epilog 2132; 2133; GFX10PLUS-LABEL: adjust_writemask_sample_013_to_13: 2134; GFX10PLUS: ; %bb.0: ; %main_body 2135; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2136; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2137; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2138; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D 2139; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2140; GFX10PLUS-NEXT: ; return to shader part epilog 2141main_body: 2142 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2143 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2> 2144 ret <2 x float> %out 2145} 2146 2147declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2148declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2149declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2150declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2151declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2152declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2153declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2154 2155declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2156declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2157declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2158declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2159declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2160declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2161 2162declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2163declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2164declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2165declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2166declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2167declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2168declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2169declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2170 2171declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2172declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2173declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2174declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2175declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2176declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2177declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2178declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2179 2180declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2181declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2182declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2183declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2184 2185declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2186declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2187declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2188declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2189 2190declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2191declare {float, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2192declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2193declare {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2194 2195attributes #0 = { nounwind } 2196attributes #1 = { nounwind readonly } 2197attributes #2 = { nounwind readnone } 2198