1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 4 5define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { 6; GFX9-LABEL: sample_1d: 7; GFX9: ; %bb.0: ; %main_body 8; GFX9-NEXT: s_mov_b64 s[12:13], exec 9; GFX9-NEXT: s_wqm_b64 exec, exec 10; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 11; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 12; GFX9-NEXT: s_waitcnt vmcnt(0) 13; GFX9-NEXT: ; return to shader part epilog 14; 15; GFX10-LABEL: sample_1d: 16; GFX10: ; %bb.0: ; %main_body 17; GFX10-NEXT: s_mov_b32 s12, exec_lo 18; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 19; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 20; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 21; GFX10-NEXT: s_waitcnt vmcnt(0) 22; GFX10-NEXT: ; return to shader part epilog 23main_body: 24 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 25 ret <4 x float> %v 26} 27 28define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { 29; GFX9-LABEL: sample_2d: 30; GFX9: ; %bb.0: ; %main_body 31; GFX9-NEXT: s_mov_b64 s[12:13], exec 32; GFX9-NEXT: s_wqm_b64 exec, exec 33; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 34; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 35; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 36; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 37; GFX9-NEXT: s_waitcnt vmcnt(0) 38; GFX9-NEXT: ; return to shader part epilog 39; 40; GFX10-LABEL: sample_2d: 41; GFX10: ; %bb.0: ; %main_body 42; GFX10-NEXT: s_mov_b32 s12, exec_lo 43; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 44; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 45; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 46; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 47; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 48; GFX10-NEXT: s_waitcnt vmcnt(0) 49; GFX10-NEXT: ; return to shader part epilog 50main_body: 51 %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 52 ret <4 x float> %v 53} 54 55define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) { 56; GFX9-LABEL: sample_3d: 57; GFX9: ; %bb.0: ; %main_body 58; GFX9-NEXT: s_mov_b64 s[12:13], exec 59; GFX9-NEXT: s_wqm_b64 exec, exec 60; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 61; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 62; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 63; GFX9-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 64; GFX9-NEXT: s_waitcnt vmcnt(0) 65; GFX9-NEXT: ; return to shader part epilog 66; 67; GFX10-LABEL: sample_3d: 68; GFX10: ; %bb.0: ; %main_body 69; GFX10-NEXT: s_mov_b32 s12, exec_lo 70; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 71; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 72; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 73; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 74; GFX10-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 75; GFX10-NEXT: s_waitcnt vmcnt(0) 76; GFX10-NEXT: ; return to shader part epilog 77main_body: 78 %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 79 ret <4 x float> %v 80} 81 82define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) { 83; GFX9-LABEL: sample_cube: 84; GFX9: ; %bb.0: ; %main_body 85; GFX9-NEXT: s_mov_b64 s[12:13], exec 86; GFX9-NEXT: s_wqm_b64 exec, exec 87; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 88; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 89; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 90; GFX9-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da 91; GFX9-NEXT: s_waitcnt vmcnt(0) 92; GFX9-NEXT: ; return to shader part epilog 93; 94; GFX10-LABEL: sample_cube: 95; GFX10: ; %bb.0: ; %main_body 96; GFX10-NEXT: s_mov_b32 s12, exec_lo 97; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 98; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 99; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 100; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 101; GFX10-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16 102; GFX10-NEXT: s_waitcnt vmcnt(0) 103; GFX10-NEXT: ; return to shader part epilog 104main_body: 105 %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 106 ret <4 x float> %v 107} 108 109define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) { 110; GFX9-LABEL: sample_1darray: 111; GFX9: ; %bb.0: ; %main_body 112; GFX9-NEXT: s_mov_b64 s[12:13], exec 113; GFX9-NEXT: s_wqm_b64 exec, exec 114; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 115; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 116; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 117; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da 118; GFX9-NEXT: s_waitcnt vmcnt(0) 119; GFX9-NEXT: ; return to shader part epilog 120; 121; GFX10-LABEL: sample_1darray: 122; GFX10: ; %bb.0: ; %main_body 123; GFX10-NEXT: s_mov_b32 s12, exec_lo 124; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 125; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 126; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 127; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 128; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY a16 129; GFX10-NEXT: s_waitcnt vmcnt(0) 130; GFX10-NEXT: ; return to shader part epilog 131main_body: 132 %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 133 ret <4 x float> %v 134} 135 136define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) { 137; GFX9-LABEL: sample_2darray: 138; GFX9: ; %bb.0: ; %main_body 139; GFX9-NEXT: s_mov_b64 s[12:13], exec 140; GFX9-NEXT: s_wqm_b64 exec, exec 141; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 142; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 143; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 144; GFX9-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da 145; GFX9-NEXT: s_waitcnt vmcnt(0) 146; GFX9-NEXT: ; return to shader part epilog 147; 148; GFX10-LABEL: sample_2darray: 149; GFX10: ; %bb.0: ; %main_body 150; GFX10-NEXT: s_mov_b32 s12, exec_lo 151; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 152; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 153; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 154; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 155; GFX10-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16 156; GFX10-NEXT: s_waitcnt vmcnt(0) 157; GFX10-NEXT: ; return to shader part epilog 158main_body: 159 %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 160 ret <4 x float> %v 161} 162 163define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { 164; GFX9-LABEL: sample_c_1d: 165; GFX9: ; %bb.0: ; %main_body 166; GFX9-NEXT: s_mov_b64 s[12:13], exec 167; GFX9-NEXT: s_wqm_b64 exec, exec 168; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 169; GFX9-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 170; GFX9-NEXT: s_waitcnt vmcnt(0) 171; GFX9-NEXT: ; return to shader part epilog 172; 173; GFX10-LABEL: sample_c_1d: 174; GFX10: ; %bb.0: ; %main_body 175; GFX10-NEXT: s_mov_b32 s12, exec_lo 176; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 177; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 178; GFX10-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 179; GFX10-NEXT: s_waitcnt vmcnt(0) 180; GFX10-NEXT: ; return to shader part epilog 181main_body: 182 %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 183 ret <4 x float> %v 184} 185 186define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { 187; GFX9-LABEL: sample_c_2d: 188; GFX9: ; %bb.0: ; %main_body 189; GFX9-NEXT: s_mov_b64 s[12:13], exec 190; GFX9-NEXT: s_wqm_b64 exec, exec 191; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 192; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 193; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 194; GFX9-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 195; GFX9-NEXT: s_waitcnt vmcnt(0) 196; GFX9-NEXT: ; return to shader part epilog 197; 198; GFX10-LABEL: sample_c_2d: 199; GFX10: ; %bb.0: ; %main_body 200; GFX10-NEXT: s_mov_b32 s12, exec_lo 201; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 202; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 203; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 204; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 205; GFX10-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 206; GFX10-NEXT: s_waitcnt vmcnt(0) 207; GFX10-NEXT: ; return to shader part epilog 208main_body: 209 %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 210 ret <4 x float> %v 211} 212 213define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) { 214; GFX9-LABEL: sample_cl_1d: 215; GFX9: ; %bb.0: ; %main_body 216; GFX9-NEXT: s_mov_b64 s[12:13], exec 217; GFX9-NEXT: s_wqm_b64 exec, exec 218; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 219; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 220; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 221; GFX9-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 222; GFX9-NEXT: s_waitcnt vmcnt(0) 223; GFX9-NEXT: ; return to shader part epilog 224; 225; GFX10-LABEL: sample_cl_1d: 226; GFX10: ; %bb.0: ; %main_body 227; GFX10-NEXT: s_mov_b32 s12, exec_lo 228; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 229; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 230; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 231; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 232; GFX10-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 233; GFX10-NEXT: s_waitcnt vmcnt(0) 234; GFX10-NEXT: ; return to shader part epilog 235main_body: 236 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 237 ret <4 x float> %v 238} 239 240define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) { 241; GFX9-LABEL: sample_cl_2d: 242; GFX9: ; %bb.0: ; %main_body 243; GFX9-NEXT: s_mov_b64 s[12:13], exec 244; GFX9-NEXT: s_wqm_b64 exec, exec 245; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 246; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 247; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 248; GFX9-NEXT: image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 249; GFX9-NEXT: s_waitcnt vmcnt(0) 250; GFX9-NEXT: ; return to shader part epilog 251; 252; GFX10-LABEL: sample_cl_2d: 253; GFX10: ; %bb.0: ; %main_body 254; GFX10-NEXT: s_mov_b32 s12, exec_lo 255; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 256; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 257; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 258; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 259; GFX10-NEXT: image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 260; GFX10-NEXT: s_waitcnt vmcnt(0) 261; GFX10-NEXT: ; return to shader part epilog 262main_body: 263 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 264 ret <4 x float> %v 265} 266 267define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) { 268; GFX9-LABEL: sample_c_cl_1d: 269; GFX9: ; %bb.0: ; %main_body 270; GFX9-NEXT: s_mov_b64 s[12:13], exec 271; GFX9-NEXT: s_wqm_b64 exec, exec 272; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 273; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 274; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 275; GFX9-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 276; GFX9-NEXT: s_waitcnt vmcnt(0) 277; GFX9-NEXT: ; return to shader part epilog 278; 279; GFX10-LABEL: sample_c_cl_1d: 280; GFX10: ; %bb.0: ; %main_body 281; GFX10-NEXT: s_mov_b32 s12, exec_lo 282; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 283; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 284; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 285; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 286; GFX10-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 287; GFX10-NEXT: s_waitcnt vmcnt(0) 288; GFX10-NEXT: ; return to shader part epilog 289main_body: 290 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 291 ret <4 x float> %v 292} 293 294define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) { 295; GFX9-LABEL: sample_c_cl_2d: 296; GFX9: ; %bb.0: ; %main_body 297; GFX9-NEXT: s_mov_b64 s[12:13], exec 298; GFX9-NEXT: s_wqm_b64 exec, exec 299; GFX9-NEXT: v_mov_b32_e32 v5, v3 300; GFX9-NEXT: v_mov_b32_e32 v3, v0 301; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1 302; GFX9-NEXT: v_lshl_or_b32 v4, v2, 16, v0 303; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 304; GFX9-NEXT: image_sample_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16 305; GFX9-NEXT: s_waitcnt vmcnt(0) 306; GFX9-NEXT: ; return to shader part epilog 307; 308; GFX10-LABEL: sample_c_cl_2d: 309; GFX10: ; %bb.0: ; %main_body 310; GFX10-NEXT: s_mov_b32 s12, exec_lo 311; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 312; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 313; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 314; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 315; GFX10-NEXT: image_sample_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 316; GFX10-NEXT: s_waitcnt vmcnt(0) 317; GFX10-NEXT: ; return to shader part epilog 318main_body: 319 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 320 ret <4 x float> %v 321} 322 323define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s) { 324; GFX9-LABEL: sample_b_1d: 325; GFX9: ; %bb.0: ; %main_body 326; GFX9-NEXT: s_mov_b64 s[12:13], exec 327; GFX9-NEXT: s_wqm_b64 exec, exec 328; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 329; GFX9-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 330; GFX9-NEXT: s_waitcnt vmcnt(0) 331; GFX9-NEXT: ; return to shader part epilog 332; 333; GFX10-LABEL: sample_b_1d: 334; GFX10: ; %bb.0: ; %main_body 335; GFX10-NEXT: s_mov_b32 s12, exec_lo 336; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 337; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 338; GFX10-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 339; GFX10-NEXT: s_waitcnt vmcnt(0) 340; GFX10-NEXT: ; return to shader part epilog 341main_body: 342 %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 343 ret <4 x float> %v 344} 345 346define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) { 347; GFX9-LABEL: sample_b_2d: 348; GFX9: ; %bb.0: ; %main_body 349; GFX9-NEXT: s_mov_b64 s[12:13], exec 350; GFX9-NEXT: s_wqm_b64 exec, exec 351; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 352; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 353; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 354; GFX9-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 355; GFX9-NEXT: s_waitcnt vmcnt(0) 356; GFX9-NEXT: ; return to shader part epilog 357; 358; GFX10-LABEL: sample_b_2d: 359; GFX10: ; %bb.0: ; %main_body 360; GFX10-NEXT: s_mov_b32 s12, exec_lo 361; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 362; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 363; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 364; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 365; GFX10-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 366; GFX10-NEXT: s_waitcnt vmcnt(0) 367; GFX10-NEXT: ; return to shader part epilog 368main_body: 369 %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 370 ret <4 x float> %v 371} 372 373define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s) { 374; GFX9-LABEL: sample_c_b_1d: 375; GFX9: ; %bb.0: ; %main_body 376; GFX9-NEXT: s_mov_b64 s[12:13], exec 377; GFX9-NEXT: s_wqm_b64 exec, exec 378; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 379; GFX9-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 380; GFX9-NEXT: s_waitcnt vmcnt(0) 381; GFX9-NEXT: ; return to shader part epilog 382; 383; GFX10-LABEL: sample_c_b_1d: 384; GFX10: ; %bb.0: ; %main_body 385; GFX10-NEXT: s_mov_b32 s12, exec_lo 386; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 387; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 388; GFX10-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 389; GFX10-NEXT: s_waitcnt vmcnt(0) 390; GFX10-NEXT: ; return to shader part epilog 391main_body: 392 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 393 ret <4 x float> %v 394} 395 396define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) { 397; GFX9-LABEL: sample_c_b_2d: 398; GFX9: ; %bb.0: ; %main_body 399; GFX9-NEXT: s_mov_b64 s[12:13], exec 400; GFX9-NEXT: s_wqm_b64 exec, exec 401; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 402; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 403; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 404; GFX9-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 405; GFX9-NEXT: s_waitcnt vmcnt(0) 406; GFX9-NEXT: ; return to shader part epilog 407; 408; GFX10-LABEL: sample_c_b_2d: 409; GFX10: ; %bb.0: ; %main_body 410; GFX10-NEXT: s_mov_b32 s12, exec_lo 411; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 412; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 413; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 414; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 415; GFX10-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 416; GFX10-NEXT: s_waitcnt vmcnt(0) 417; GFX10-NEXT: ; return to shader part epilog 418main_body: 419 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 420 ret <4 x float> %v 421} 422 423define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %clamp) { 424; GFX9-LABEL: sample_b_cl_1d: 425; GFX9: ; %bb.0: ; %main_body 426; GFX9-NEXT: s_mov_b64 s[12:13], exec 427; GFX9-NEXT: s_wqm_b64 exec, exec 428; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 429; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 430; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 431; GFX9-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 432; GFX9-NEXT: s_waitcnt vmcnt(0) 433; GFX9-NEXT: ; return to shader part epilog 434; 435; GFX10-LABEL: sample_b_cl_1d: 436; GFX10: ; %bb.0: ; %main_body 437; GFX10-NEXT: s_mov_b32 s12, exec_lo 438; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 439; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 440; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 441; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 442; GFX10-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 443; GFX10-NEXT: s_waitcnt vmcnt(0) 444; GFX10-NEXT: ; return to shader part epilog 445main_body: 446 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 447 ret <4 x float> %v 448} 449 450define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) { 451; GFX9-LABEL: sample_b_cl_2d: 452; GFX9: ; %bb.0: ; %main_body 453; GFX9-NEXT: s_mov_b64 s[12:13], exec 454; GFX9-NEXT: s_wqm_b64 exec, exec 455; GFX9-NEXT: v_mov_b32_e32 v5, v3 456; GFX9-NEXT: v_mov_b32_e32 v3, v0 457; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1 458; GFX9-NEXT: v_lshl_or_b32 v4, v2, 16, v0 459; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 460; GFX9-NEXT: image_sample_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16 461; GFX9-NEXT: s_waitcnt vmcnt(0) 462; GFX9-NEXT: ; return to shader part epilog 463; 464; GFX10-LABEL: sample_b_cl_2d: 465; GFX10: ; %bb.0: ; %main_body 466; GFX10-NEXT: s_mov_b32 s12, exec_lo 467; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 468; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 469; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 470; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 471; GFX10-NEXT: image_sample_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 472; GFX10-NEXT: s_waitcnt vmcnt(0) 473; GFX10-NEXT: ; return to shader part epilog 474main_body: 475 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 476 ret <4 x float> %v 477} 478 479define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %clamp) { 480; GFX9-LABEL: sample_c_b_cl_1d: 481; GFX9: ; %bb.0: ; %main_body 482; GFX9-NEXT: s_mov_b64 s[12:13], exec 483; GFX9-NEXT: s_wqm_b64 exec, exec 484; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 485; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 486; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 487; GFX9-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 488; GFX9-NEXT: s_waitcnt vmcnt(0) 489; GFX9-NEXT: ; return to shader part epilog 490; 491; GFX10-LABEL: sample_c_b_cl_1d: 492; GFX10: ; %bb.0: ; %main_body 493; GFX10-NEXT: s_mov_b32 s12, exec_lo 494; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 495; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 496; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 497; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 498; GFX10-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 499; GFX10-NEXT: s_waitcnt vmcnt(0) 500; GFX10-NEXT: ; return to shader part epilog 501main_body: 502 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 503 ret <4 x float> %v 504} 505 506define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) { 507; GFX9-LABEL: sample_c_b_cl_2d: 508; GFX9: ; %bb.0: ; %main_body 509; GFX9-NEXT: s_mov_b64 s[12:13], exec 510; GFX9-NEXT: s_wqm_b64 exec, exec 511; GFX9-NEXT: v_mov_b32_e32 v7, v4 512; GFX9-NEXT: v_mov_b32_e32 v4, v0 513; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v2 514; GFX9-NEXT: v_mov_b32_e32 v5, v1 515; GFX9-NEXT: v_lshl_or_b32 v6, v3, 16, v0 516; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 517; GFX9-NEXT: image_sample_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0xf a16 518; GFX9-NEXT: s_waitcnt vmcnt(0) 519; GFX9-NEXT: ; return to shader part epilog 520; 521; GFX10-LABEL: sample_c_b_cl_2d: 522; GFX10: ; %bb.0: ; %main_body 523; GFX10-NEXT: s_mov_b32 s12, exec_lo 524; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 525; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 526; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 527; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 528; GFX10-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 529; GFX10-NEXT: s_waitcnt vmcnt(0) 530; GFX10-NEXT: ; return to shader part epilog 531main_body: 532 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 533 ret <4 x float> %v 534} 535 536define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { 537; GFX9-LABEL: sample_d_1d: 538; GFX9: ; %bb.0: ; %main_body 539; GFX9-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 540; GFX9-NEXT: s_waitcnt vmcnt(0) 541; GFX9-NEXT: ; return to shader part epilog 542; 543; GFX10-LABEL: sample_d_1d: 544; GFX10: ; %bb.0: ; %main_body 545; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 546; GFX10-NEXT: s_waitcnt vmcnt(0) 547; GFX10-NEXT: ; return to shader part epilog 548main_body: 549 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 550 ret <4 x float> %v 551} 552 553define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 554; GFX9-LABEL: sample_d_2d: 555; GFX9: ; %bb.0: ; %main_body 556; GFX9-NEXT: v_mov_b32_e32 v6, 0xffff 557; GFX9-NEXT: v_and_b32_e32 v4, v6, v4 558; GFX9-NEXT: v_and_b32_e32 v2, v6, v2 559; GFX9-NEXT: v_and_b32_e32 v0, v6, v0 560; GFX9-NEXT: v_lshl_or_b32 v4, v5, 16, v4 561; GFX9-NEXT: v_lshl_or_b32 v3, v3, 16, v2 562; GFX9-NEXT: v_lshl_or_b32 v2, v1, 16, v0 563; GFX9-NEXT: image_sample_d v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16 564; GFX9-NEXT: s_waitcnt vmcnt(0) 565; GFX9-NEXT: ; return to shader part epilog 566; 567; GFX10-LABEL: sample_d_2d: 568; GFX10: ; %bb.0: ; %main_body 569; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff 570; GFX10-NEXT: v_and_b32_e32 v4, v6, v4 571; GFX10-NEXT: v_and_b32_e32 v2, v6, v2 572; GFX10-NEXT: v_and_b32_e32 v0, v6, v0 573; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 574; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 575; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0 576; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 577; GFX10-NEXT: s_waitcnt vmcnt(0) 578; GFX10-NEXT: ; return to shader part epilog 579main_body: 580 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 581 ret <4 x float> %v 582} 583 584define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) { 585; GFX9-LABEL: sample_d_3d: 586; GFX9: ; %bb.0: ; %main_body 587; GFX9-NEXT: v_mov_b32_e32 v12, v8 588; GFX9-NEXT: v_mov_b32_e32 v8, v2 589; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 590; GFX9-NEXT: v_mov_b32_e32 v10, v5 591; GFX9-NEXT: v_and_b32_e32 v5, v2, v6 592; GFX9-NEXT: v_and_b32_e32 v3, v2, v3 593; GFX9-NEXT: v_and_b32_e32 v0, v2, v0 594; GFX9-NEXT: v_lshl_or_b32 v11, v7, 16, v5 595; GFX9-NEXT: v_lshl_or_b32 v9, v4, 16, v3 596; GFX9-NEXT: v_lshl_or_b32 v7, v1, 16, v0 597; GFX9-NEXT: image_sample_d v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf a16 598; GFX9-NEXT: s_waitcnt vmcnt(0) 599; GFX9-NEXT: ; return to shader part epilog 600; 601; GFX10-LABEL: sample_d_3d: 602; GFX10: ; %bb.0: ; %main_body 603; GFX10-NEXT: v_mov_b32_e32 v12, v8 604; GFX10-NEXT: v_mov_b32_e32 v8, v2 605; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff 606; GFX10-NEXT: v_mov_b32_e32 v10, v5 607; GFX10-NEXT: v_and_b32_e32 v5, v2, v6 608; GFX10-NEXT: v_and_b32_e32 v3, v2, v3 609; GFX10-NEXT: v_and_b32_e32 v0, v2, v0 610; GFX10-NEXT: v_lshl_or_b32 v11, v7, 16, v5 611; GFX10-NEXT: v_lshl_or_b32 v9, v4, 16, v3 612; GFX10-NEXT: v_lshl_or_b32 v7, v1, 16, v0 613; GFX10-NEXT: image_sample_d_g16 v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 614; GFX10-NEXT: s_waitcnt vmcnt(0) 615; GFX10-NEXT: ; return to shader part epilog 616main_body: 617 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 618 ret <4 x float> %v 619} 620 621define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { 622; GFX9-LABEL: sample_c_d_1d: 623; GFX9: ; %bb.0: ; %main_body 624; GFX9-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 625; GFX9-NEXT: s_waitcnt vmcnt(0) 626; GFX9-NEXT: ; return to shader part epilog 627; 628; GFX10-LABEL: sample_c_d_1d: 629; GFX10: ; %bb.0: ; %main_body 630; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 631; GFX10-NEXT: s_waitcnt vmcnt(0) 632; GFX10-NEXT: ; return to shader part epilog 633main_body: 634 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 635 ret <4 x float> %v 636} 637 638define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 639; GFX9-LABEL: sample_c_d_2d: 640; GFX9: ; %bb.0: ; %main_body 641; GFX9-NEXT: v_mov_b32_e32 v9, 0xffff 642; GFX9-NEXT: v_mov_b32_e32 v7, v3 643; GFX9-NEXT: v_mov_b32_e32 v8, v2 644; GFX9-NEXT: v_and_b32_e32 v2, v9, v5 645; GFX9-NEXT: v_lshl_or_b32 v3, v6, 16, v2 646; GFX9-NEXT: v_and_b32_e32 v2, v9, v7 647; GFX9-NEXT: v_and_b32_e32 v1, v9, v1 648; GFX9-NEXT: v_lshl_or_b32 v2, v4, 16, v2 649; GFX9-NEXT: v_lshl_or_b32 v1, v8, 16, v1 650; GFX9-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 651; GFX9-NEXT: s_waitcnt vmcnt(0) 652; GFX9-NEXT: ; return to shader part epilog 653; 654; GFX10-LABEL: sample_c_d_2d: 655; GFX10: ; %bb.0: ; %main_body 656; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 657; GFX10-NEXT: v_and_b32_e32 v5, v7, v5 658; GFX10-NEXT: v_and_b32_e32 v3, v7, v3 659; GFX10-NEXT: v_and_b32_e32 v1, v7, v1 660; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 661; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 662; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 663; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 664; GFX10-NEXT: s_waitcnt vmcnt(0) 665; GFX10-NEXT: ; return to shader part epilog 666main_body: 667 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 668 ret <4 x float> %v 669} 670 671define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { 672; GFX9-LABEL: sample_d_cl_1d: 673; GFX9: ; %bb.0: ; %main_body 674; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 675; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 676; GFX9-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 677; GFX9-NEXT: s_waitcnt vmcnt(0) 678; GFX9-NEXT: ; return to shader part epilog 679; 680; GFX10-LABEL: sample_d_cl_1d: 681; GFX10: ; %bb.0: ; %main_body 682; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 683; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 684; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 685; GFX10-NEXT: s_waitcnt vmcnt(0) 686; GFX10-NEXT: ; return to shader part epilog 687main_body: 688 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 689 ret <4 x float> %v 690} 691 692define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 693; GFX9-LABEL: sample_d_cl_2d: 694; GFX9: ; %bb.0: ; %main_body 695; GFX9-NEXT: v_mov_b32_e32 v7, 0xffff 696; GFX9-NEXT: v_and_b32_e32 v4, v7, v4 697; GFX9-NEXT: v_and_b32_e32 v2, v7, v2 698; GFX9-NEXT: v_and_b32_e32 v0, v7, v0 699; GFX9-NEXT: v_lshl_or_b32 v5, v5, 16, v4 700; GFX9-NEXT: v_lshl_or_b32 v4, v3, 16, v2 701; GFX9-NEXT: v_lshl_or_b32 v3, v1, 16, v0 702; GFX9-NEXT: image_sample_d_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16 703; GFX9-NEXT: s_waitcnt vmcnt(0) 704; GFX9-NEXT: ; return to shader part epilog 705; 706; GFX10-LABEL: sample_d_cl_2d: 707; GFX10: ; %bb.0: ; %main_body 708; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 709; GFX10-NEXT: v_and_b32_e32 v4, v7, v4 710; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 711; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 712; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 713; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 714; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 715; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 716; GFX10-NEXT: s_waitcnt vmcnt(0) 717; GFX10-NEXT: ; return to shader part epilog 718main_body: 719 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 720 ret <4 x float> %v 721} 722 723define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { 724; GFX9-LABEL: sample_c_d_cl_1d: 725; GFX9: ; %bb.0: ; %main_body 726; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v3 727; GFX9-NEXT: v_lshl_or_b32 v3, v4, 16, v3 728; GFX9-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 729; GFX9-NEXT: s_waitcnt vmcnt(0) 730; GFX9-NEXT: ; return to shader part epilog 731; 732; GFX10-LABEL: sample_c_d_cl_1d: 733; GFX10: ; %bb.0: ; %main_body 734; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 735; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 736; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 737; GFX10-NEXT: s_waitcnt vmcnt(0) 738; GFX10-NEXT: ; return to shader part epilog 739main_body: 740 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 741 ret <4 x float> %v 742} 743 744define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 745; GFX9-LABEL: sample_c_d_cl_2d: 746; GFX9: ; %bb.0: ; %main_body 747; GFX9-NEXT: v_mov_b32_e32 v11, v7 748; GFX9-NEXT: v_mov_b32_e32 v7, v0 749; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff 750; GFX9-NEXT: v_and_b32_e32 v5, v0, v5 751; GFX9-NEXT: v_and_b32_e32 v3, v0, v3 752; GFX9-NEXT: v_and_b32_e32 v0, v0, v1 753; GFX9-NEXT: v_lshl_or_b32 v10, v6, 16, v5 754; GFX9-NEXT: v_lshl_or_b32 v9, v4, 16, v3 755; GFX9-NEXT: v_lshl_or_b32 v8, v2, 16, v0 756; GFX9-NEXT: image_sample_c_d_cl v[0:3], v[7:11], s[0:7], s[8:11] dmask:0xf a16 757; GFX9-NEXT: s_waitcnt vmcnt(0) 758; GFX9-NEXT: ; return to shader part epilog 759; 760; GFX10-LABEL: sample_c_d_cl_2d: 761; GFX10: ; %bb.0: ; %main_body 762; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff 763; GFX10-NEXT: v_and_b32_e32 v5, v8, v5 764; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 765; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 766; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 767; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 768; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 769; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 770; GFX10-NEXT: s_waitcnt vmcnt(0) 771; GFX10-NEXT: ; return to shader part epilog 772main_body: 773 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 774 ret <4 x float> %v 775} 776 777define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { 778; GFX9-LABEL: sample_cd_1d: 779; GFX9: ; %bb.0: ; %main_body 780; GFX9-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 781; GFX9-NEXT: s_waitcnt vmcnt(0) 782; GFX9-NEXT: ; return to shader part epilog 783; 784; GFX10-LABEL: sample_cd_1d: 785; GFX10: ; %bb.0: ; %main_body 786; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 787; GFX10-NEXT: s_waitcnt vmcnt(0) 788; GFX10-NEXT: ; return to shader part epilog 789main_body: 790 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 791 ret <4 x float> %v 792} 793 794define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 795; GFX9-LABEL: sample_cd_2d: 796; GFX9: ; %bb.0: ; %main_body 797; GFX9-NEXT: v_mov_b32_e32 v6, 0xffff 798; GFX9-NEXT: v_and_b32_e32 v4, v6, v4 799; GFX9-NEXT: v_and_b32_e32 v2, v6, v2 800; GFX9-NEXT: v_and_b32_e32 v0, v6, v0 801; GFX9-NEXT: v_lshl_or_b32 v4, v5, 16, v4 802; GFX9-NEXT: v_lshl_or_b32 v3, v3, 16, v2 803; GFX9-NEXT: v_lshl_or_b32 v2, v1, 16, v0 804; GFX9-NEXT: image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16 805; GFX9-NEXT: s_waitcnt vmcnt(0) 806; GFX9-NEXT: ; return to shader part epilog 807; 808; GFX10-LABEL: sample_cd_2d: 809; GFX10: ; %bb.0: ; %main_body 810; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff 811; GFX10-NEXT: v_and_b32_e32 v4, v6, v4 812; GFX10-NEXT: v_and_b32_e32 v2, v6, v2 813; GFX10-NEXT: v_and_b32_e32 v0, v6, v0 814; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 815; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 816; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0 817; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 818; GFX10-NEXT: s_waitcnt vmcnt(0) 819; GFX10-NEXT: ; return to shader part epilog 820main_body: 821 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 822 ret <4 x float> %v 823} 824 825define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { 826; GFX9-LABEL: sample_c_cd_1d: 827; GFX9: ; %bb.0: ; %main_body 828; GFX9-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 829; GFX9-NEXT: s_waitcnt vmcnt(0) 830; GFX9-NEXT: ; return to shader part epilog 831; 832; GFX10-LABEL: sample_c_cd_1d: 833; GFX10: ; %bb.0: ; %main_body 834; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 835; GFX10-NEXT: s_waitcnt vmcnt(0) 836; GFX10-NEXT: ; return to shader part epilog 837main_body: 838 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 839 ret <4 x float> %v 840} 841 842define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 843; GFX9-LABEL: sample_c_cd_2d: 844; GFX9: ; %bb.0: ; %main_body 845; GFX9-NEXT: v_mov_b32_e32 v9, 0xffff 846; GFX9-NEXT: v_mov_b32_e32 v7, v3 847; GFX9-NEXT: v_mov_b32_e32 v8, v2 848; GFX9-NEXT: v_and_b32_e32 v2, v9, v5 849; GFX9-NEXT: v_lshl_or_b32 v3, v6, 16, v2 850; GFX9-NEXT: v_and_b32_e32 v2, v9, v7 851; GFX9-NEXT: v_and_b32_e32 v1, v9, v1 852; GFX9-NEXT: v_lshl_or_b32 v2, v4, 16, v2 853; GFX9-NEXT: v_lshl_or_b32 v1, v8, 16, v1 854; GFX9-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 855; GFX9-NEXT: s_waitcnt vmcnt(0) 856; GFX9-NEXT: ; return to shader part epilog 857; 858; GFX10-LABEL: sample_c_cd_2d: 859; GFX10: ; %bb.0: ; %main_body 860; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 861; GFX10-NEXT: v_and_b32_e32 v5, v7, v5 862; GFX10-NEXT: v_and_b32_e32 v3, v7, v3 863; GFX10-NEXT: v_and_b32_e32 v1, v7, v1 864; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 865; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 866; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 867; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 868; GFX10-NEXT: s_waitcnt vmcnt(0) 869; GFX10-NEXT: ; return to shader part epilog 870main_body: 871 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 872 ret <4 x float> %v 873} 874 875define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { 876; GFX9-LABEL: sample_cd_cl_1d: 877; GFX9: ; %bb.0: ; %main_body 878; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 879; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 880; GFX9-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 881; GFX9-NEXT: s_waitcnt vmcnt(0) 882; GFX9-NEXT: ; return to shader part epilog 883; 884; GFX10-LABEL: sample_cd_cl_1d: 885; GFX10: ; %bb.0: ; %main_body 886; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 887; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 888; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 889; GFX10-NEXT: s_waitcnt vmcnt(0) 890; GFX10-NEXT: ; return to shader part epilog 891main_body: 892 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 893 ret <4 x float> %v 894} 895 896define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 897; GFX9-LABEL: sample_cd_cl_2d: 898; GFX9: ; %bb.0: ; %main_body 899; GFX9-NEXT: v_mov_b32_e32 v7, 0xffff 900; GFX9-NEXT: v_and_b32_e32 v4, v7, v4 901; GFX9-NEXT: v_and_b32_e32 v2, v7, v2 902; GFX9-NEXT: v_and_b32_e32 v0, v7, v0 903; GFX9-NEXT: v_lshl_or_b32 v5, v5, 16, v4 904; GFX9-NEXT: v_lshl_or_b32 v4, v3, 16, v2 905; GFX9-NEXT: v_lshl_or_b32 v3, v1, 16, v0 906; GFX9-NEXT: image_sample_cd_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16 907; GFX9-NEXT: s_waitcnt vmcnt(0) 908; GFX9-NEXT: ; return to shader part epilog 909; 910; GFX10-LABEL: sample_cd_cl_2d: 911; GFX10: ; %bb.0: ; %main_body 912; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 913; GFX10-NEXT: v_and_b32_e32 v4, v7, v4 914; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 915; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 916; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 917; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 918; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 919; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 920; GFX10-NEXT: s_waitcnt vmcnt(0) 921; GFX10-NEXT: ; return to shader part epilog 922main_body: 923 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 924 ret <4 x float> %v 925} 926 927define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { 928; GFX9-LABEL: sample_c_cd_cl_1d: 929; GFX9: ; %bb.0: ; %main_body 930; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v3 931; GFX9-NEXT: v_lshl_or_b32 v3, v4, 16, v3 932; GFX9-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 933; GFX9-NEXT: s_waitcnt vmcnt(0) 934; GFX9-NEXT: ; return to shader part epilog 935; 936; GFX10-LABEL: sample_c_cd_cl_1d: 937; GFX10: ; %bb.0: ; %main_body 938; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 939; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 940; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 941; GFX10-NEXT: s_waitcnt vmcnt(0) 942; GFX10-NEXT: ; return to shader part epilog 943main_body: 944 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 945 ret <4 x float> %v 946} 947 948define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 949; GFX9-LABEL: sample_c_cd_cl_2d: 950; GFX9: ; %bb.0: ; %main_body 951; GFX9-NEXT: v_mov_b32_e32 v11, v7 952; GFX9-NEXT: v_mov_b32_e32 v7, v0 953; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff 954; GFX9-NEXT: v_and_b32_e32 v5, v0, v5 955; GFX9-NEXT: v_and_b32_e32 v3, v0, v3 956; GFX9-NEXT: v_and_b32_e32 v0, v0, v1 957; GFX9-NEXT: v_lshl_or_b32 v10, v6, 16, v5 958; GFX9-NEXT: v_lshl_or_b32 v9, v4, 16, v3 959; GFX9-NEXT: v_lshl_or_b32 v8, v2, 16, v0 960; GFX9-NEXT: image_sample_c_cd_cl v[0:3], v[7:11], s[0:7], s[8:11] dmask:0xf a16 961; GFX9-NEXT: s_waitcnt vmcnt(0) 962; GFX9-NEXT: ; return to shader part epilog 963; 964; GFX10-LABEL: sample_c_cd_cl_2d: 965; GFX10: ; %bb.0: ; %main_body 966; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff 967; GFX10-NEXT: v_and_b32_e32 v5, v8, v5 968; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 969; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 970; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 971; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 972; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 973; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 974; GFX10-NEXT: s_waitcnt vmcnt(0) 975; GFX10-NEXT: ; return to shader part epilog 976main_body: 977 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 978 ret <4 x float> %v 979} 980 981define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) { 982; GFX9-LABEL: sample_l_1d: 983; GFX9: ; %bb.0: ; %main_body 984; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 985; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 986; GFX9-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 987; GFX9-NEXT: s_waitcnt vmcnt(0) 988; GFX9-NEXT: ; return to shader part epilog 989; 990; GFX10-LABEL: sample_l_1d: 991; GFX10: ; %bb.0: ; %main_body 992; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 993; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 994; GFX10-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 995; GFX10-NEXT: s_waitcnt vmcnt(0) 996; GFX10-NEXT: ; return to shader part epilog 997main_body: 998 %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 999 ret <4 x float> %v 1000} 1001 1002define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) { 1003; GFX9-LABEL: sample_l_2d: 1004; GFX9: ; %bb.0: ; %main_body 1005; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 1006; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 1007; GFX9-NEXT: image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 1008; GFX9-NEXT: s_waitcnt vmcnt(0) 1009; GFX9-NEXT: ; return to shader part epilog 1010; 1011; GFX10-LABEL: sample_l_2d: 1012; GFX10: ; %bb.0: ; %main_body 1013; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 1014; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 1015; GFX10-NEXT: image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 1016; GFX10-NEXT: s_waitcnt vmcnt(0) 1017; GFX10-NEXT: ; return to shader part epilog 1018main_body: 1019 %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1020 ret <4 x float> %v 1021} 1022 1023define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) { 1024; GFX9-LABEL: sample_c_l_1d: 1025; GFX9: ; %bb.0: ; %main_body 1026; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 1027; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 1028; GFX9-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 1029; GFX9-NEXT: s_waitcnt vmcnt(0) 1030; GFX9-NEXT: ; return to shader part epilog 1031; 1032; GFX10-LABEL: sample_c_l_1d: 1033; GFX10: ; %bb.0: ; %main_body 1034; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 1035; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 1036; GFX10-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 1037; GFX10-NEXT: s_waitcnt vmcnt(0) 1038; GFX10-NEXT: ; return to shader part epilog 1039main_body: 1040 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1041 ret <4 x float> %v 1042} 1043 1044define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { 1045; GFX9-LABEL: sample_c_l_2d: 1046; GFX9: ; %bb.0: ; %main_body 1047; GFX9-NEXT: v_mov_b32_e32 v5, v3 1048; GFX9-NEXT: v_mov_b32_e32 v3, v0 1049; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1 1050; GFX9-NEXT: v_lshl_or_b32 v4, v2, 16, v0 1051; GFX9-NEXT: image_sample_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16 1052; GFX9-NEXT: s_waitcnt vmcnt(0) 1053; GFX9-NEXT: ; return to shader part epilog 1054; 1055; GFX10-LABEL: sample_c_l_2d: 1056; GFX10: ; %bb.0: ; %main_body 1057; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 1058; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 1059; GFX10-NEXT: image_sample_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 1060; GFX10-NEXT: s_waitcnt vmcnt(0) 1061; GFX10-NEXT: ; return to shader part epilog 1062main_body: 1063 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1064 ret <4 x float> %v 1065} 1066 1067define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { 1068; GFX9-LABEL: sample_lz_1d: 1069; GFX9: ; %bb.0: ; %main_body 1070; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 1071; GFX9-NEXT: s_waitcnt vmcnt(0) 1072; GFX9-NEXT: ; return to shader part epilog 1073; 1074; GFX10-LABEL: sample_lz_1d: 1075; GFX10: ; %bb.0: ; %main_body 1076; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 1077; GFX10-NEXT: s_waitcnt vmcnt(0) 1078; GFX10-NEXT: ; return to shader part epilog 1079main_body: 1080 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1081 ret <4 x float> %v 1082} 1083 1084define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { 1085; GFX9-LABEL: sample_lz_2d: 1086; GFX9: ; %bb.0: ; %main_body 1087; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 1088; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1089; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 1090; GFX9-NEXT: s_waitcnt vmcnt(0) 1091; GFX9-NEXT: ; return to shader part epilog 1092; 1093; GFX10-LABEL: sample_lz_2d: 1094; GFX10: ; %bb.0: ; %main_body 1095; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 1096; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1097; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 1098; GFX10-NEXT: s_waitcnt vmcnt(0) 1099; GFX10-NEXT: ; return to shader part epilog 1100main_body: 1101 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1102 ret <4 x float> %v 1103} 1104 1105define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { 1106; GFX9-LABEL: sample_c_lz_1d: 1107; GFX9: ; %bb.0: ; %main_body 1108; GFX9-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 1109; GFX9-NEXT: s_waitcnt vmcnt(0) 1110; GFX9-NEXT: ; return to shader part epilog 1111; 1112; GFX10-LABEL: sample_c_lz_1d: 1113; GFX10: ; %bb.0: ; %main_body 1114; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 1115; GFX10-NEXT: s_waitcnt vmcnt(0) 1116; GFX10-NEXT: ; return to shader part epilog 1117main_body: 1118 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1119 ret <4 x float> %v 1120} 1121 1122define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { 1123; GFX9-LABEL: sample_c_lz_2d: 1124; GFX9: ; %bb.0: ; %main_body 1125; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 1126; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 1127; GFX9-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 1128; GFX9-NEXT: s_waitcnt vmcnt(0) 1129; GFX9-NEXT: ; return to shader part epilog 1130; 1131; GFX10-LABEL: sample_c_lz_2d: 1132; GFX10: ; %bb.0: ; %main_body 1133; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 1134; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 1135; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 1136; GFX10-NEXT: s_waitcnt vmcnt(0) 1137; GFX10-NEXT: ; return to shader part epilog 1138main_body: 1139 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1140 ret <4 x float> %v 1141} 1142 1143define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { 1144; GFX9-LABEL: sample_c_d_o_2darray_V1: 1145; GFX9: ; %bb.0: ; %main_body 1146; GFX9-NEXT: v_mov_b32_e32 v13, v8 1147; GFX9-NEXT: v_mov_b32_e32 v8, v0 1148; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff 1149; GFX9-NEXT: v_mov_b32_e32 v9, v1 1150; GFX9-NEXT: v_and_b32_e32 v1, v0, v6 1151; GFX9-NEXT: v_lshl_or_b32 v12, v7, 16, v1 1152; GFX9-NEXT: v_and_b32_e32 v1, v0, v4 1153; GFX9-NEXT: v_and_b32_e32 v0, v0, v2 1154; GFX9-NEXT: v_lshl_or_b32 v11, v5, 16, v1 1155; GFX9-NEXT: v_lshl_or_b32 v10, v3, 16, v0 1156; GFX9-NEXT: image_sample_c_d_o v0, v[8:13], s[0:7], s[8:11] dmask:0x4 a16 da 1157; GFX9-NEXT: s_waitcnt vmcnt(0) 1158; GFX9-NEXT: ; return to shader part epilog 1159; 1160; GFX10-LABEL: sample_c_d_o_2darray_V1: 1161; GFX10: ; %bb.0: ; %main_body 1162; GFX10-NEXT: v_mov_b32_e32 v13, v8 1163; GFX10-NEXT: v_mov_b32_e32 v8, v0 1164; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff 1165; GFX10-NEXT: v_mov_b32_e32 v9, v1 1166; GFX10-NEXT: v_and_b32_e32 v1, v0, v6 1167; GFX10-NEXT: v_and_b32_e32 v4, v0, v4 1168; GFX10-NEXT: v_and_b32_e32 v0, v0, v2 1169; GFX10-NEXT: v_lshl_or_b32 v12, v7, 16, v1 1170; GFX10-NEXT: v_lshl_or_b32 v11, v5, 16, v4 1171; GFX10-NEXT: v_lshl_or_b32 v10, v3, 16, v0 1172; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[8:13], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 1173; GFX10-NEXT: s_waitcnt vmcnt(0) 1174; GFX10-NEXT: ; return to shader part epilog 1175main_body: 1176 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1177 ret float %v 1178} 1179 1180define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { 1181; GFX9-LABEL: sample_c_d_o_2darray_V2: 1182; GFX9: ; %bb.0: ; %main_body 1183; GFX9-NEXT: v_mov_b32_e32 v13, v8 1184; GFX9-NEXT: v_mov_b32_e32 v8, v0 1185; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff 1186; GFX9-NEXT: v_mov_b32_e32 v9, v1 1187; GFX9-NEXT: v_and_b32_e32 v1, v0, v6 1188; GFX9-NEXT: v_lshl_or_b32 v12, v7, 16, v1 1189; GFX9-NEXT: v_and_b32_e32 v1, v0, v4 1190; GFX9-NEXT: v_and_b32_e32 v0, v0, v2 1191; GFX9-NEXT: v_lshl_or_b32 v11, v5, 16, v1 1192; GFX9-NEXT: v_lshl_or_b32 v10, v3, 16, v0 1193; GFX9-NEXT: image_sample_c_d_o v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 a16 da 1194; GFX9-NEXT: s_waitcnt vmcnt(0) 1195; GFX9-NEXT: ; return to shader part epilog 1196; 1197; GFX10-LABEL: sample_c_d_o_2darray_V2: 1198; GFX10: ; %bb.0: ; %main_body 1199; GFX10-NEXT: v_mov_b32_e32 v13, v8 1200; GFX10-NEXT: v_mov_b32_e32 v8, v0 1201; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff 1202; GFX10-NEXT: v_mov_b32_e32 v9, v1 1203; GFX10-NEXT: v_and_b32_e32 v1, v0, v6 1204; GFX10-NEXT: v_and_b32_e32 v4, v0, v4 1205; GFX10-NEXT: v_and_b32_e32 v0, v0, v2 1206; GFX10-NEXT: v_lshl_or_b32 v12, v7, 16, v1 1207; GFX10-NEXT: v_lshl_or_b32 v11, v5, 16, v4 1208; GFX10-NEXT: v_lshl_or_b32 v10, v3, 16, v0 1209; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 1210; GFX10-NEXT: s_waitcnt vmcnt(0) 1211; GFX10-NEXT: ; return to shader part epilog 1212main_body: 1213 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1214 ret <2 x float> %v 1215} 1216 1217declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1218declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1219declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1220declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1221declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1222declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1223declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1224 1225declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1226declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1227declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1228declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1229declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1230declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1231 1232declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1233declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1234declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32, half, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1235declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1236declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1237declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1238declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1239declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1240 1241declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1242declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1243declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1244declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1245declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1246declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1247declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1248declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1249declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1250 1251declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1252declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1253declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1254declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1255declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1256declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1257declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1258declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1259 1260declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1261declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1262declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1263declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1264 1265declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1266declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1267declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1268declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1269 1270declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1271declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1272 1273attributes #0 = { nounwind } 1274attributes #1 = { nounwind readonly } 1275attributes #2 = { nounwind readnone } 1276