1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
4
5define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
6; GFX9-LABEL: sample_cd_1d:
7; GFX9:       ; %bb.0: ; %main_body
8; GFX9-NEXT:    image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
9; GFX9-NEXT:    s_waitcnt vmcnt(0)
10; GFX9-NEXT:    ; return to shader part epilog
11;
12; GFX10-LABEL: sample_cd_1d:
13; GFX10:       ; %bb.0: ; %main_body
14; GFX10-NEXT:    image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
15; GFX10-NEXT:    s_waitcnt vmcnt(0)
16; GFX10-NEXT:    ; return to shader part epilog
17main_body:
18  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
19  ret <4 x float> %v
20}
21
22define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
23; GFX9-LABEL: sample_cd_2d:
24; GFX9:       ; %bb.0: ; %main_body
25; GFX9-NEXT:    v_and_b32_e32 v4, 0xffff, v4
26; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
27; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
28; GFX9-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
29; GFX9-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
30; GFX9-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
31; GFX9-NEXT:    image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16
32; GFX9-NEXT:    s_waitcnt vmcnt(0)
33; GFX9-NEXT:    ; return to shader part epilog
34;
35; GFX10-LABEL: sample_cd_2d:
36; GFX10:       ; %bb.0: ; %main_body
37; GFX10-NEXT:    v_and_b32_e32 v4, 0xffff, v4
38; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
39; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
40; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
41; GFX10-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
42; GFX10-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
43; GFX10-NEXT:    image_sample_cd_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
44; GFX10-NEXT:    s_waitcnt vmcnt(0)
45; GFX10-NEXT:    ; return to shader part epilog
46main_body:
47  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
48  ret <4 x float> %v
49}
50
51define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
52; GFX9-LABEL: sample_c_cd_1d:
53; GFX9:       ; %bb.0: ; %main_body
54; GFX9-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
55; GFX9-NEXT:    s_waitcnt vmcnt(0)
56; GFX9-NEXT:    ; return to shader part epilog
57;
58; GFX10-LABEL: sample_c_cd_1d:
59; GFX10:       ; %bb.0: ; %main_body
60; GFX10-NEXT:    image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
61; GFX10-NEXT:    s_waitcnt vmcnt(0)
62; GFX10-NEXT:    ; return to shader part epilog
63main_body:
64  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
65  ret <4 x float> %v
66}
67
68define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
69; GFX9-LABEL: sample_c_cd_2d:
70; GFX9:       ; %bb.0: ; %main_body
71; GFX9-NEXT:    v_mov_b32_e32 v7, v3
72; GFX9-NEXT:    v_mov_b32_e32 v8, v2
73; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v5
74; GFX9-NEXT:    v_lshl_or_b32 v3, v6, 16, v2
75; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v7
76; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
77; GFX9-NEXT:    v_lshl_or_b32 v2, v4, 16, v2
78; GFX9-NEXT:    v_lshl_or_b32 v1, v8, 16, v1
79; GFX9-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
80; GFX9-NEXT:    s_waitcnt vmcnt(0)
81; GFX9-NEXT:    ; return to shader part epilog
82;
83; GFX10-LABEL: sample_c_cd_2d:
84; GFX10:       ; %bb.0: ; %main_body
85; GFX10-NEXT:    v_and_b32_e32 v5, 0xffff, v5
86; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
87; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
88; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
89; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
90; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
91; GFX10-NEXT:    image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
92; GFX10-NEXT:    s_waitcnt vmcnt(0)
93; GFX10-NEXT:    ; return to shader part epilog
94main_body:
95  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
96  ret <4 x float> %v
97}
98
99define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
100; GFX9-LABEL: sample_cd_cl_1d:
101; GFX9:       ; %bb.0: ; %main_body
102; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
103; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
104; GFX9-NEXT:    image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
105; GFX9-NEXT:    s_waitcnt vmcnt(0)
106; GFX9-NEXT:    ; return to shader part epilog
107;
108; GFX10-LABEL: sample_cd_cl_1d:
109; GFX10:       ; %bb.0: ; %main_body
110; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
111; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
112; GFX10-NEXT:    image_sample_cd_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
113; GFX10-NEXT:    s_waitcnt vmcnt(0)
114; GFX10-NEXT:    ; return to shader part epilog
115main_body:
116  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
117  ret <4 x float> %v
118}
119
120define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
121; GFX9-LABEL: sample_cd_cl_2d:
122; GFX9:       ; %bb.0: ; %main_body
123; GFX9-NEXT:    v_and_b32_e32 v4, 0xffff, v4
124; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
125; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
126; GFX9-NEXT:    v_lshl_or_b32 v5, v5, 16, v4
127; GFX9-NEXT:    v_lshl_or_b32 v4, v3, 16, v2
128; GFX9-NEXT:    v_lshl_or_b32 v3, v1, 16, v0
129; GFX9-NEXT:    image_sample_cd_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16
130; GFX9-NEXT:    s_waitcnt vmcnt(0)
131; GFX9-NEXT:    ; return to shader part epilog
132;
133; GFX10-LABEL: sample_cd_cl_2d:
134; GFX10:       ; %bb.0: ; %main_body
135; GFX10-NEXT:    v_and_b32_e32 v4, 0xffff, v4
136; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
137; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
138; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
139; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
140; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
141; GFX10-NEXT:    image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
142; GFX10-NEXT:    s_waitcnt vmcnt(0)
143; GFX10-NEXT:    ; return to shader part epilog
144main_body:
145  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
146  ret <4 x float> %v
147}
148
149define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
150; GFX9-LABEL: sample_c_cd_cl_1d:
151; GFX9:       ; %bb.0: ; %main_body
152; GFX9-NEXT:    v_and_b32_e32 v3, 0xffff, v3
153; GFX9-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
154; GFX9-NEXT:    image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
155; GFX9-NEXT:    s_waitcnt vmcnt(0)
156; GFX9-NEXT:    ; return to shader part epilog
157;
158; GFX10-LABEL: sample_c_cd_cl_1d:
159; GFX10:       ; %bb.0: ; %main_body
160; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
161; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
162; GFX10-NEXT:    image_sample_c_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
163; GFX10-NEXT:    s_waitcnt vmcnt(0)
164; GFX10-NEXT:    ; return to shader part epilog
165main_body:
166  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
167  ret <4 x float> %v
168}
169
170define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
171; GFX9-LABEL: sample_c_cd_cl_2d:
172; GFX9:       ; %bb.0: ; %main_body
173; GFX9-NEXT:    v_mov_b32_e32 v11, v7
174; GFX9-NEXT:    v_mov_b32_e32 v7, v0
175; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v5
176; GFX9-NEXT:    v_lshl_or_b32 v10, v6, 16, v0
177; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v3
178; GFX9-NEXT:    v_lshl_or_b32 v9, v4, 16, v0
179; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
180; GFX9-NEXT:    v_lshl_or_b32 v8, v2, 16, v0
181; GFX9-NEXT:    image_sample_c_cd_cl v[0:3], v[7:11], s[0:7], s[8:11] dmask:0xf a16
182; GFX9-NEXT:    s_waitcnt vmcnt(0)
183; GFX9-NEXT:    ; return to shader part epilog
184;
185; GFX10-LABEL: sample_c_cd_cl_2d:
186; GFX10:       ; %bb.0: ; %main_body
187; GFX10-NEXT:    v_and_b32_e32 v5, 0xffff, v5
188; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
189; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
190; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
191; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
192; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
193; GFX10-NEXT:    image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
194; GFX10-NEXT:    s_waitcnt vmcnt(0)
195; GFX10-NEXT:    ; return to shader part epilog
196main_body:
197  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
198  ret <4 x float> %v
199}
200
201declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
202declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
203declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
204declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
205declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
206declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
207declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
208declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
209
210attributes #0 = { nounwind }
211attributes #1 = { nounwind readonly }
212attributes #2 = { nounwind readnone }
213