1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
3; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
4
5define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
6; GFX10-LABEL: sample_d_1d:
7; GFX10:       ; %bb.0: ; %main_body
8; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
9; GFX10-NEXT:    s_waitcnt vmcnt(0)
10; GFX10-NEXT:    ; return to shader part epilog
11main_body:
12  %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
13  ret <4 x float> %v
14}
15
16define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
17; GFX10-LABEL: sample_d_2d:
18; GFX10:       ; %bb.0: ; %main_body
19; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
20; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
21; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
22; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
23; GFX10-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
24; GFX10-NEXT:    s_waitcnt vmcnt(0)
25; GFX10-NEXT:    ; return to shader part epilog
26main_body:
27  %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
28  ret <4 x float> %v
29}
30
31define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
32; GFX10-LABEL: sample_d_3d:
33; GFX10:       ; %bb.0: ; %main_body
34; GFX10-NEXT:    v_mov_b32_e32 v9, v3
35; GFX10-NEXT:    v_mov_b32_e32 v3, v2
36; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
37; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v9
38; GFX10-NEXT:    v_lshl_or_b32 v4, v4, 16, v2
39; GFX10-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
40; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
41; GFX10-NEXT:    s_waitcnt vmcnt(0)
42; GFX10-NEXT:    ; return to shader part epilog
43main_body:
44  %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
45  ret <4 x float> %v
46}
47
48define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
49; GFX10-LABEL: sample_c_d_1d:
50; GFX10:       ; %bb.0: ; %main_body
51; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
52; GFX10-NEXT:    s_waitcnt vmcnt(0)
53; GFX10-NEXT:    ; return to shader part epilog
54main_body:
55  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
56  ret <4 x float> %v
57}
58
59define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
60; GFX10-LABEL: sample_c_d_2d:
61; GFX10:       ; %bb.0: ; %main_body
62; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
63; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
64; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
65; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
66; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
67; GFX10-NEXT:    s_waitcnt vmcnt(0)
68; GFX10-NEXT:    ; return to shader part epilog
69main_body:
70  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
71  ret <4 x float> %v
72}
73
74define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
75; GFX10-LABEL: sample_d_cl_1d:
76; GFX10:       ; %bb.0: ; %main_body
77; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
78; GFX10-NEXT:    s_waitcnt vmcnt(0)
79; GFX10-NEXT:    ; return to shader part epilog
80main_body:
81  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
82  ret <4 x float> %v
83}
84
85define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
86; GFX10-LABEL: sample_d_cl_2d:
87; GFX10:       ; %bb.0: ; %main_body
88; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
89; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
90; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
91; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
92; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
93; GFX10-NEXT:    s_waitcnt vmcnt(0)
94; GFX10-NEXT:    ; return to shader part epilog
95main_body:
96  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
97  ret <4 x float> %v
98}
99
100define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
101; GFX10-LABEL: sample_c_d_cl_1d:
102; GFX10:       ; %bb.0: ; %main_body
103; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
104; GFX10-NEXT:    s_waitcnt vmcnt(0)
105; GFX10-NEXT:    ; return to shader part epilog
106main_body:
107  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
108  ret <4 x float> %v
109}
110
111define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
112; GFX10-LABEL: sample_c_d_cl_2d:
113; GFX10:       ; %bb.0: ; %main_body
114; GFX10-NEXT:    v_mov_b32_e32 v8, v2
115; GFX10-NEXT:    v_mov_b32_e32 v2, v0
116; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v3
117; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
118; GFX10-NEXT:    v_lshl_or_b32 v4, v4, 16, v0
119; GFX10-NEXT:    v_lshl_or_b32 v3, v8, 16, v1
120; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
121; GFX10-NEXT:    s_waitcnt vmcnt(0)
122; GFX10-NEXT:    ; return to shader part epilog
123main_body:
124  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
125  ret <4 x float> %v
126}
127
128define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
129; GFX10-LABEL: sample_c_d_o_2darray_V1:
130; GFX10:       ; %bb.0: ; %main_body
131; GFX10-NEXT:    v_mov_b32_e32 v9, v2
132; GFX10-NEXT:    v_mov_b32_e32 v10, v3
133; GFX10-NEXT:    v_mov_b32_e32 v3, v1
134; GFX10-NEXT:    v_mov_b32_e32 v2, v0
135; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v4
136; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v9
137; GFX10-NEXT:    v_lshl_or_b32 v5, v5, 16, v0
138; GFX10-NEXT:    v_lshl_or_b32 v4, v10, 16, v1
139; GFX10-NEXT:    image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
140; GFX10-NEXT:    s_waitcnt vmcnt(0)
141; GFX10-NEXT:    ; return to shader part epilog
142main_body:
143  %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
144  ret float %v
145}
146
147define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
148; GFX10-LABEL: sample_c_d_o_2darray_V2:
149; GFX10:       ; %bb.0: ; %main_body
150; GFX10-NEXT:    v_mov_b32_e32 v9, v2
151; GFX10-NEXT:    v_mov_b32_e32 v10, v3
152; GFX10-NEXT:    v_mov_b32_e32 v3, v1
153; GFX10-NEXT:    v_mov_b32_e32 v2, v0
154; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v4
155; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v9
156; GFX10-NEXT:    v_lshl_or_b32 v5, v5, 16, v0
157; GFX10-NEXT:    v_lshl_or_b32 v4, v10, 16, v1
158; GFX10-NEXT:    image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
159; GFX10-NEXT:    s_waitcnt vmcnt(0)
160; GFX10-NEXT:    ; return to shader part epilog
161main_body:
162  %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
163  ret <2 x float> %v
164}
165
166declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
167declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
168declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
169declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
170declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
171declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
172declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
173declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
174declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
175
176declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
177declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
178
179attributes #0 = { nounwind }
180attributes #1 = { nounwind readonly }
181attributes #2 = { nounwind readnone }
182