1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 3 4define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { 5; GFX10-LABEL: sample_cd_1d: 6; GFX10: ; %bb.0: ; %main_body 7; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 8; GFX10-NEXT: s_waitcnt vmcnt(0) 9; GFX10-NEXT: ; return to shader part epilog 10main_body: 11 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 12 ret <4 x float> %v 13} 14 15define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 16; GFX10-LABEL: sample_cd_2d: 17; GFX10: ; %bb.0: ; %main_body 18; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 19; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 20; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 21; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 22; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 23; GFX10-NEXT: s_waitcnt vmcnt(0) 24; GFX10-NEXT: ; return to shader part epilog 25main_body: 26 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 27 ret <4 x float> %v 28} 29 30define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { 31; GFX10-LABEL: sample_c_cd_1d: 32; GFX10: ; %bb.0: ; %main_body 33; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 34; GFX10-NEXT: s_waitcnt vmcnt(0) 35; GFX10-NEXT: ; return to shader part epilog 36main_body: 37 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 38 ret <4 x float> %v 39} 40 41define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 42; GFX10-LABEL: sample_c_cd_2d: 43; GFX10: ; %bb.0: ; %main_body 44; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 45; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 46; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 47; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 48; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 49; GFX10-NEXT: s_waitcnt vmcnt(0) 50; GFX10-NEXT: ; return to shader part epilog 51main_body: 52 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 53 ret <4 x float> %v 54} 55 56define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { 57; GFX10-LABEL: sample_cd_cl_1d: 58; GFX10: ; %bb.0: ; %main_body 59; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 60; GFX10-NEXT: s_waitcnt vmcnt(0) 61; GFX10-NEXT: ; return to shader part epilog 62main_body: 63 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 64 ret <4 x float> %v 65} 66 67define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 68; GFX10-LABEL: sample_cd_cl_2d: 69; GFX10: ; %bb.0: ; %main_body 70; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 71; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 72; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 73; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 74; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 75; GFX10-NEXT: s_waitcnt vmcnt(0) 76; GFX10-NEXT: ; return to shader part epilog 77main_body: 78 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 79 ret <4 x float> %v 80} 81 82define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { 83; GFX10-LABEL: sample_c_cd_cl_1d: 84; GFX10: ; %bb.0: ; %main_body 85; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 86; GFX10-NEXT: s_waitcnt vmcnt(0) 87; GFX10-NEXT: ; return to shader part epilog 88main_body: 89 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 90 ret <4 x float> %v 91} 92 93define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 94; GFX10-LABEL: sample_c_cd_cl_2d: 95; GFX10: ; %bb.0: ; %main_body 96; GFX10-NEXT: v_mov_b32_e32 v8, v2 97; GFX10-NEXT: v_mov_b32_e32 v2, v0 98; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v3 99; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 100; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v0 101; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v1 102; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 103; GFX10-NEXT: s_waitcnt vmcnt(0) 104; GFX10-NEXT: ; return to shader part epilog 105main_body: 106 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 107 ret <4 x float> %v 108} 109 110declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 111declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 112declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 113declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 114declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 115declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 116declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 117declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 118 119attributes #0 = { nounwind } 120attributes #1 = { nounwind readonly } 121attributes #2 = { nounwind readnone } 122