1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 3; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 4 5define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { 6; GFX10-LABEL: sample_d_1d: 7; GFX10: ; %bb.0: ; %main_body 8; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 9; GFX10-NEXT: s_waitcnt vmcnt(0) 10; GFX10-NEXT: ; return to shader part epilog 11main_body: 12 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 13 ret <4 x float> %v 14} 15 16define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 17; GFX10-LABEL: sample_d_2d: 18; GFX10: ; %bb.0: ; %main_body 19; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 20; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 21; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 22; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 23; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 24; GFX10-NEXT: s_waitcnt vmcnt(0) 25; GFX10-NEXT: ; return to shader part epilog 26main_body: 27 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 28 ret <4 x float> %v 29} 30 31define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) { 32; GFX10-LABEL: sample_d_3d: 33; GFX10: ; %bb.0: ; %main_body 34; GFX10-NEXT: v_mov_b32_e32 v9, v3 35; GFX10-NEXT: v_mov_b32_e32 v3, v2 36; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 37; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v9 38; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v2 39; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0 40; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 41; GFX10-NEXT: s_waitcnt vmcnt(0) 42; GFX10-NEXT: ; return to shader part epilog 43main_body: 44 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 45 ret <4 x float> %v 46} 47 48define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { 49; GFX10-LABEL: sample_c_d_1d: 50; GFX10: ; %bb.0: ; %main_body 51; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 52; GFX10-NEXT: s_waitcnt vmcnt(0) 53; GFX10-NEXT: ; return to shader part epilog 54main_body: 55 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 56 ret <4 x float> %v 57} 58 59define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 60; GFX10-LABEL: sample_c_d_2d: 61; GFX10: ; %bb.0: ; %main_body 62; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 63; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 64; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 65; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 66; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 67; GFX10-NEXT: s_waitcnt vmcnt(0) 68; GFX10-NEXT: ; return to shader part epilog 69main_body: 70 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 71 ret <4 x float> %v 72} 73 74define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { 75; GFX10-LABEL: sample_d_cl_1d: 76; GFX10: ; %bb.0: ; %main_body 77; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 78; GFX10-NEXT: s_waitcnt vmcnt(0) 79; GFX10-NEXT: ; return to shader part epilog 80main_body: 81 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 82 ret <4 x float> %v 83} 84 85define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 86; GFX10-LABEL: sample_d_cl_2d: 87; GFX10: ; %bb.0: ; %main_body 88; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 89; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 90; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 91; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 92; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 93; GFX10-NEXT: s_waitcnt vmcnt(0) 94; GFX10-NEXT: ; return to shader part epilog 95main_body: 96 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 97 ret <4 x float> %v 98} 99 100define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { 101; GFX10-LABEL: sample_c_d_cl_1d: 102; GFX10: ; %bb.0: ; %main_body 103; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 104; GFX10-NEXT: s_waitcnt vmcnt(0) 105; GFX10-NEXT: ; return to shader part epilog 106main_body: 107 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 108 ret <4 x float> %v 109} 110 111define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 112; GFX10-LABEL: sample_c_d_cl_2d: 113; GFX10: ; %bb.0: ; %main_body 114; GFX10-NEXT: v_mov_b32_e32 v8, v2 115; GFX10-NEXT: v_mov_b32_e32 v2, v0 116; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v3 117; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 118; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v0 119; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v1 120; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 121; GFX10-NEXT: s_waitcnt vmcnt(0) 122; GFX10-NEXT: ; return to shader part epilog 123main_body: 124 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 125 ret <4 x float> %v 126} 127 128define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { 129; GFX10-LABEL: sample_c_d_o_2darray_V1: 130; GFX10: ; %bb.0: ; %main_body 131; GFX10-NEXT: v_mov_b32_e32 v9, v2 132; GFX10-NEXT: v_mov_b32_e32 v10, v3 133; GFX10-NEXT: v_mov_b32_e32 v3, v1 134; GFX10-NEXT: v_mov_b32_e32 v2, v0 135; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v4 136; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v9 137; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v0 138; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v1 139; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY 140; GFX10-NEXT: s_waitcnt vmcnt(0) 141; GFX10-NEXT: ; return to shader part epilog 142main_body: 143 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 144 ret float %v 145} 146 147define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { 148; GFX10-LABEL: sample_c_d_o_2darray_V2: 149; GFX10: ; %bb.0: ; %main_body 150; GFX10-NEXT: v_mov_b32_e32 v9, v2 151; GFX10-NEXT: v_mov_b32_e32 v10, v3 152; GFX10-NEXT: v_mov_b32_e32 v3, v1 153; GFX10-NEXT: v_mov_b32_e32 v2, v0 154; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v4 155; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v9 156; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v0 157; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v1 158; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY 159; GFX10-NEXT: s_waitcnt vmcnt(0) 160; GFX10-NEXT: ; return to shader part epilog 161main_body: 162 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 163 ret <2 x float> %v 164} 165 166declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 167declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 168declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 169declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 170declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 171declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 172declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 173declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 174declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 175 176declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 177declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 178 179attributes #0 = { nounwind } 180attributes #1 = { nounwind readonly } 181attributes #2 = { nounwind readnone } 182