1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 4; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 5 6define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { 7; GFX9-LABEL: sample_1d: 8; GFX9: ; %bb.0: ; %main_body 9; GFX9-NEXT: s_mov_b64 s[12:13], exec 10; GFX9-NEXT: s_wqm_b64 exec, exec 11; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 12; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 13; GFX9-NEXT: s_waitcnt vmcnt(0) 14; GFX9-NEXT: ; return to shader part epilog 15; 16; GFX10-LABEL: sample_1d: 17; GFX10: ; %bb.0: ; %main_body 18; GFX10-NEXT: s_mov_b32 s12, exec_lo 19; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 20; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 21; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 22; GFX10-NEXT: s_waitcnt vmcnt(0) 23; GFX10-NEXT: ; return to shader part epilog 24main_body: 25 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 26 ret <4 x float> %v 27} 28 29define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { 30; GFX9-LABEL: sample_2d: 31; GFX9: ; %bb.0: ; %main_body 32; GFX9-NEXT: s_mov_b64 s[12:13], exec 33; GFX9-NEXT: s_wqm_b64 exec, exec 34; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 35; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 36; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 37; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 38; GFX9-NEXT: s_waitcnt vmcnt(0) 39; GFX9-NEXT: ; return to shader part epilog 40; 41; GFX10-LABEL: sample_2d: 42; GFX10: ; %bb.0: ; %main_body 43; GFX10-NEXT: s_mov_b32 s12, exec_lo 44; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 45; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 46; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 47; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 48; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 49; GFX10-NEXT: s_waitcnt vmcnt(0) 50; GFX10-NEXT: ; return to shader part epilog 51main_body: 52 %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 53 ret <4 x float> %v 54} 55 56define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) { 57; GFX9-LABEL: sample_3d: 58; GFX9: ; %bb.0: ; %main_body 59; GFX9-NEXT: s_mov_b64 s[12:13], exec 60; GFX9-NEXT: s_wqm_b64 exec, exec 61; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 62; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 63; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 64; GFX9-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 65; GFX9-NEXT: s_waitcnt vmcnt(0) 66; GFX9-NEXT: ; return to shader part epilog 67; 68; GFX10-LABEL: sample_3d: 69; GFX10: ; %bb.0: ; %main_body 70; GFX10-NEXT: s_mov_b32 s12, exec_lo 71; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 72; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 73; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 74; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 75; GFX10-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 76; GFX10-NEXT: s_waitcnt vmcnt(0) 77; GFX10-NEXT: ; return to shader part epilog 78main_body: 79 %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 80 ret <4 x float> %v 81} 82 83define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) { 84; GFX9-LABEL: sample_cube: 85; GFX9: ; %bb.0: ; %main_body 86; GFX9-NEXT: s_mov_b64 s[12:13], exec 87; GFX9-NEXT: s_wqm_b64 exec, exec 88; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 89; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 90; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 91; GFX9-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da 92; GFX9-NEXT: s_waitcnt vmcnt(0) 93; GFX9-NEXT: ; return to shader part epilog 94; 95; GFX10-LABEL: sample_cube: 96; GFX10: ; %bb.0: ; %main_body 97; GFX10-NEXT: s_mov_b32 s12, exec_lo 98; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 99; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 100; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 101; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 102; GFX10-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16 103; GFX10-NEXT: s_waitcnt vmcnt(0) 104; GFX10-NEXT: ; return to shader part epilog 105main_body: 106 %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 107 ret <4 x float> %v 108} 109 110define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) { 111; GFX9-LABEL: sample_1darray: 112; GFX9: ; %bb.0: ; %main_body 113; GFX9-NEXT: s_mov_b64 s[12:13], exec 114; GFX9-NEXT: s_wqm_b64 exec, exec 115; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 116; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 117; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 118; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da 119; GFX9-NEXT: s_waitcnt vmcnt(0) 120; GFX9-NEXT: ; return to shader part epilog 121; 122; GFX10-LABEL: sample_1darray: 123; GFX10: ; %bb.0: ; %main_body 124; GFX10-NEXT: s_mov_b32 s12, exec_lo 125; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 126; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 127; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 128; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 129; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY a16 130; GFX10-NEXT: s_waitcnt vmcnt(0) 131; GFX10-NEXT: ; return to shader part epilog 132main_body: 133 %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 134 ret <4 x float> %v 135} 136 137define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) { 138; GFX9-LABEL: sample_2darray: 139; GFX9: ; %bb.0: ; %main_body 140; GFX9-NEXT: s_mov_b64 s[12:13], exec 141; GFX9-NEXT: s_wqm_b64 exec, exec 142; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 143; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 144; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 145; GFX9-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da 146; GFX9-NEXT: s_waitcnt vmcnt(0) 147; GFX9-NEXT: ; return to shader part epilog 148; 149; GFX10-LABEL: sample_2darray: 150; GFX10: ; %bb.0: ; %main_body 151; GFX10-NEXT: s_mov_b32 s12, exec_lo 152; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 153; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 154; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 155; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 156; GFX10-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16 157; GFX10-NEXT: s_waitcnt vmcnt(0) 158; GFX10-NEXT: ; return to shader part epilog 159main_body: 160 %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 161 ret <4 x float> %v 162} 163 164define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { 165; GFX9-LABEL: sample_c_1d: 166; GFX9: ; %bb.0: ; %main_body 167; GFX9-NEXT: s_mov_b64 s[12:13], exec 168; GFX9-NEXT: s_wqm_b64 exec, exec 169; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 170; GFX9-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 171; GFX9-NEXT: s_waitcnt vmcnt(0) 172; GFX9-NEXT: ; return to shader part epilog 173; 174; GFX10-LABEL: sample_c_1d: 175; GFX10: ; %bb.0: ; %main_body 176; GFX10-NEXT: s_mov_b32 s12, exec_lo 177; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 178; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 179; GFX10-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 180; GFX10-NEXT: s_waitcnt vmcnt(0) 181; GFX10-NEXT: ; return to shader part epilog 182main_body: 183 %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 184 ret <4 x float> %v 185} 186 187define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { 188; GFX9-LABEL: sample_c_2d: 189; GFX9: ; %bb.0: ; %main_body 190; GFX9-NEXT: s_mov_b64 s[12:13], exec 191; GFX9-NEXT: s_wqm_b64 exec, exec 192; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 193; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 194; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 195; GFX9-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 196; GFX9-NEXT: s_waitcnt vmcnt(0) 197; GFX9-NEXT: ; return to shader part epilog 198; 199; GFX10-LABEL: sample_c_2d: 200; GFX10: ; %bb.0: ; %main_body 201; GFX10-NEXT: s_mov_b32 s12, exec_lo 202; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 203; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 204; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 205; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 206; GFX10-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 207; GFX10-NEXT: s_waitcnt vmcnt(0) 208; GFX10-NEXT: ; return to shader part epilog 209main_body: 210 %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 211 ret <4 x float> %v 212} 213 214define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) { 215; GFX9-LABEL: sample_cl_1d: 216; GFX9: ; %bb.0: ; %main_body 217; GFX9-NEXT: s_mov_b64 s[12:13], exec 218; GFX9-NEXT: s_wqm_b64 exec, exec 219; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 220; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 221; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 222; GFX9-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 223; GFX9-NEXT: s_waitcnt vmcnt(0) 224; GFX9-NEXT: ; return to shader part epilog 225; 226; GFX10-LABEL: sample_cl_1d: 227; GFX10: ; %bb.0: ; %main_body 228; GFX10-NEXT: s_mov_b32 s12, exec_lo 229; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 230; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 231; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 232; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 233; GFX10-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 234; GFX10-NEXT: s_waitcnt vmcnt(0) 235; GFX10-NEXT: ; return to shader part epilog 236main_body: 237 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 238 ret <4 x float> %v 239} 240 241define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) { 242; GFX9-LABEL: sample_cl_2d: 243; GFX9: ; %bb.0: ; %main_body 244; GFX9-NEXT: s_mov_b64 s[12:13], exec 245; GFX9-NEXT: s_wqm_b64 exec, exec 246; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 247; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 248; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 249; GFX9-NEXT: image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 250; GFX9-NEXT: s_waitcnt vmcnt(0) 251; GFX9-NEXT: ; return to shader part epilog 252; 253; GFX10-LABEL: sample_cl_2d: 254; GFX10: ; %bb.0: ; %main_body 255; GFX10-NEXT: s_mov_b32 s12, exec_lo 256; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 257; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 258; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 259; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 260; GFX10-NEXT: image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 261; GFX10-NEXT: s_waitcnt vmcnt(0) 262; GFX10-NEXT: ; return to shader part epilog 263main_body: 264 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 265 ret <4 x float> %v 266} 267 268define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) { 269; GFX9-LABEL: sample_c_cl_1d: 270; GFX9: ; %bb.0: ; %main_body 271; GFX9-NEXT: s_mov_b64 s[12:13], exec 272; GFX9-NEXT: s_wqm_b64 exec, exec 273; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 274; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 275; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 276; GFX9-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 277; GFX9-NEXT: s_waitcnt vmcnt(0) 278; GFX9-NEXT: ; return to shader part epilog 279; 280; GFX10-LABEL: sample_c_cl_1d: 281; GFX10: ; %bb.0: ; %main_body 282; GFX10-NEXT: s_mov_b32 s12, exec_lo 283; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 284; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 285; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 286; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 287; GFX10-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 288; GFX10-NEXT: s_waitcnt vmcnt(0) 289; GFX10-NEXT: ; return to shader part epilog 290main_body: 291 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 292 ret <4 x float> %v 293} 294 295define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) { 296; GFX9-LABEL: sample_c_cl_2d: 297; GFX9: ; %bb.0: ; %main_body 298; GFX9-NEXT: s_mov_b64 s[12:13], exec 299; GFX9-NEXT: s_wqm_b64 exec, exec 300; GFX9-NEXT: v_mov_b32_e32 v5, v3 301; GFX9-NEXT: v_mov_b32_e32 v3, v0 302; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1 303; GFX9-NEXT: v_lshl_or_b32 v4, v2, 16, v0 304; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 305; GFX9-NEXT: image_sample_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16 306; GFX9-NEXT: s_waitcnt vmcnt(0) 307; GFX9-NEXT: ; return to shader part epilog 308; 309; GFX10-LABEL: sample_c_cl_2d: 310; GFX10: ; %bb.0: ; %main_body 311; GFX10-NEXT: s_mov_b32 s12, exec_lo 312; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 313; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 314; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 315; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 316; GFX10-NEXT: image_sample_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 317; GFX10-NEXT: s_waitcnt vmcnt(0) 318; GFX10-NEXT: ; return to shader part epilog 319main_body: 320 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 321 ret <4 x float> %v 322} 323 324define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s) { 325; GFX9-LABEL: sample_b_1d: 326; GFX9: ; %bb.0: ; %main_body 327; GFX9-NEXT: s_mov_b64 s[12:13], exec 328; GFX9-NEXT: s_wqm_b64 exec, exec 329; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 330; GFX9-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 331; GFX9-NEXT: s_waitcnt vmcnt(0) 332; GFX9-NEXT: ; return to shader part epilog 333; 334; GFX10-LABEL: sample_b_1d: 335; GFX10: ; %bb.0: ; %main_body 336; GFX10-NEXT: s_mov_b32 s12, exec_lo 337; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 338; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 339; GFX10-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 340; GFX10-NEXT: s_waitcnt vmcnt(0) 341; GFX10-NEXT: ; return to shader part epilog 342main_body: 343 %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 344 ret <4 x float> %v 345} 346 347define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) { 348; GFX9-LABEL: sample_b_2d: 349; GFX9: ; %bb.0: ; %main_body 350; GFX9-NEXT: s_mov_b64 s[12:13], exec 351; GFX9-NEXT: s_wqm_b64 exec, exec 352; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 353; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 354; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 355; GFX9-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 356; GFX9-NEXT: s_waitcnt vmcnt(0) 357; GFX9-NEXT: ; return to shader part epilog 358; 359; GFX10-LABEL: sample_b_2d: 360; GFX10: ; %bb.0: ; %main_body 361; GFX10-NEXT: s_mov_b32 s12, exec_lo 362; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 363; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 364; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 365; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 366; GFX10-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 367; GFX10-NEXT: s_waitcnt vmcnt(0) 368; GFX10-NEXT: ; return to shader part epilog 369main_body: 370 %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 371 ret <4 x float> %v 372} 373 374define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s) { 375; GFX9-LABEL: sample_c_b_1d: 376; GFX9: ; %bb.0: ; %main_body 377; GFX9-NEXT: s_mov_b64 s[12:13], exec 378; GFX9-NEXT: s_wqm_b64 exec, exec 379; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 380; GFX9-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 381; GFX9-NEXT: s_waitcnt vmcnt(0) 382; GFX9-NEXT: ; return to shader part epilog 383; 384; GFX10-LABEL: sample_c_b_1d: 385; GFX10: ; %bb.0: ; %main_body 386; GFX10-NEXT: s_mov_b32 s12, exec_lo 387; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 388; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 389; GFX10-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 390; GFX10-NEXT: s_waitcnt vmcnt(0) 391; GFX10-NEXT: ; return to shader part epilog 392main_body: 393 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 394 ret <4 x float> %v 395} 396 397define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) { 398; GFX9-LABEL: sample_c_b_2d: 399; GFX9: ; %bb.0: ; %main_body 400; GFX9-NEXT: s_mov_b64 s[12:13], exec 401; GFX9-NEXT: s_wqm_b64 exec, exec 402; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 403; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 404; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 405; GFX9-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 406; GFX9-NEXT: s_waitcnt vmcnt(0) 407; GFX9-NEXT: ; return to shader part epilog 408; 409; GFX10-LABEL: sample_c_b_2d: 410; GFX10: ; %bb.0: ; %main_body 411; GFX10-NEXT: s_mov_b32 s12, exec_lo 412; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 413; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 414; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 415; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 416; GFX10-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 417; GFX10-NEXT: s_waitcnt vmcnt(0) 418; GFX10-NEXT: ; return to shader part epilog 419main_body: 420 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 421 ret <4 x float> %v 422} 423 424define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %clamp) { 425; GFX9-LABEL: sample_b_cl_1d: 426; GFX9: ; %bb.0: ; %main_body 427; GFX9-NEXT: s_mov_b64 s[12:13], exec 428; GFX9-NEXT: s_wqm_b64 exec, exec 429; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 430; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 431; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 432; GFX9-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 433; GFX9-NEXT: s_waitcnt vmcnt(0) 434; GFX9-NEXT: ; return to shader part epilog 435; 436; GFX10-LABEL: sample_b_cl_1d: 437; GFX10: ; %bb.0: ; %main_body 438; GFX10-NEXT: s_mov_b32 s12, exec_lo 439; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 440; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 441; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 442; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 443; GFX10-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 444; GFX10-NEXT: s_waitcnt vmcnt(0) 445; GFX10-NEXT: ; return to shader part epilog 446main_body: 447 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 448 ret <4 x float> %v 449} 450 451define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) { 452; GFX9-LABEL: sample_b_cl_2d: 453; GFX9: ; %bb.0: ; %main_body 454; GFX9-NEXT: s_mov_b64 s[12:13], exec 455; GFX9-NEXT: s_wqm_b64 exec, exec 456; GFX9-NEXT: v_mov_b32_e32 v5, v3 457; GFX9-NEXT: v_mov_b32_e32 v3, v0 458; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1 459; GFX9-NEXT: v_lshl_or_b32 v4, v2, 16, v0 460; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 461; GFX9-NEXT: image_sample_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16 462; GFX9-NEXT: s_waitcnt vmcnt(0) 463; GFX9-NEXT: ; return to shader part epilog 464; 465; GFX10-LABEL: sample_b_cl_2d: 466; GFX10: ; %bb.0: ; %main_body 467; GFX10-NEXT: s_mov_b32 s12, exec_lo 468; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 469; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 470; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 471; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 472; GFX10-NEXT: image_sample_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 473; GFX10-NEXT: s_waitcnt vmcnt(0) 474; GFX10-NEXT: ; return to shader part epilog 475main_body: 476 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 477 ret <4 x float> %v 478} 479 480define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %clamp) { 481; GFX9-LABEL: sample_c_b_cl_1d: 482; GFX9: ; %bb.0: ; %main_body 483; GFX9-NEXT: s_mov_b64 s[12:13], exec 484; GFX9-NEXT: s_wqm_b64 exec, exec 485; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 486; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 487; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 488; GFX9-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 489; GFX9-NEXT: s_waitcnt vmcnt(0) 490; GFX9-NEXT: ; return to shader part epilog 491; 492; GFX10-LABEL: sample_c_b_cl_1d: 493; GFX10: ; %bb.0: ; %main_body 494; GFX10-NEXT: s_mov_b32 s12, exec_lo 495; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 496; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 497; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 498; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 499; GFX10-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 500; GFX10-NEXT: s_waitcnt vmcnt(0) 501; GFX10-NEXT: ; return to shader part epilog 502main_body: 503 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 504 ret <4 x float> %v 505} 506 507define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) { 508; GFX9-LABEL: sample_c_b_cl_2d: 509; GFX9: ; %bb.0: ; %main_body 510; GFX9-NEXT: s_mov_b64 s[12:13], exec 511; GFX9-NEXT: s_wqm_b64 exec, exec 512; GFX9-NEXT: v_mov_b32_e32 v7, v4 513; GFX9-NEXT: v_mov_b32_e32 v4, v0 514; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v2 515; GFX9-NEXT: v_mov_b32_e32 v5, v1 516; GFX9-NEXT: v_lshl_or_b32 v6, v3, 16, v0 517; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] 518; GFX9-NEXT: image_sample_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0xf a16 519; GFX9-NEXT: s_waitcnt vmcnt(0) 520; GFX9-NEXT: ; return to shader part epilog 521; 522; GFX10-LABEL: sample_c_b_cl_2d: 523; GFX10: ; %bb.0: ; %main_body 524; GFX10-NEXT: s_mov_b32 s12, exec_lo 525; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 526; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 527; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 528; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 529; GFX10-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 530; GFX10-NEXT: s_waitcnt vmcnt(0) 531; GFX10-NEXT: ; return to shader part epilog 532main_body: 533 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 534 ret <4 x float> %v 535} 536 537define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { 538; GFX9-LABEL: sample_d_1d: 539; GFX9: ; %bb.0: ; %main_body 540; GFX9-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 541; GFX9-NEXT: s_waitcnt vmcnt(0) 542; GFX9-NEXT: ; return to shader part epilog 543; 544; GFX10-LABEL: sample_d_1d: 545; GFX10: ; %bb.0: ; %main_body 546; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 547; GFX10-NEXT: s_waitcnt vmcnt(0) 548; GFX10-NEXT: ; return to shader part epilog 549main_body: 550 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 551 ret <4 x float> %v 552} 553 554define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 555; GFX9-LABEL: sample_d_2d: 556; GFX9: ; %bb.0: ; %main_body 557; GFX9-NEXT: v_and_b32_e32 v4, 0xffff, v4 558; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 559; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 560; GFX9-NEXT: v_lshl_or_b32 v4, v5, 16, v4 561; GFX9-NEXT: v_lshl_or_b32 v3, v3, 16, v2 562; GFX9-NEXT: v_lshl_or_b32 v2, v1, 16, v0 563; GFX9-NEXT: image_sample_d v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16 564; GFX9-NEXT: s_waitcnt vmcnt(0) 565; GFX9-NEXT: ; return to shader part epilog 566; 567; GFX10-LABEL: sample_d_2d: 568; GFX10: ; %bb.0: ; %main_body 569; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 570; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 571; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 572; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 573; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 574; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0 575; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 576; GFX10-NEXT: s_waitcnt vmcnt(0) 577; GFX10-NEXT: ; return to shader part epilog 578main_body: 579 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 580 ret <4 x float> %v 581} 582 583define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) { 584; GFX9-LABEL: sample_d_3d: 585; GFX9: ; %bb.0: ; %main_body 586; GFX9-NEXT: v_mov_b32_e32 v12, v8 587; GFX9-NEXT: v_mov_b32_e32 v8, v2 588; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v6 589; GFX9-NEXT: v_lshl_or_b32 v11, v7, 16, v2 590; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v3 591; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 592; GFX9-NEXT: v_mov_b32_e32 v10, v5 593; GFX9-NEXT: v_lshl_or_b32 v9, v4, 16, v2 594; GFX9-NEXT: v_lshl_or_b32 v7, v1, 16, v0 595; GFX9-NEXT: image_sample_d v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf a16 596; GFX9-NEXT: s_waitcnt vmcnt(0) 597; GFX9-NEXT: ; return to shader part epilog 598; 599; GFX10-LABEL: sample_d_3d: 600; GFX10: ; %bb.0: ; %main_body 601; GFX10-NEXT: v_mov_b32_e32 v12, v8 602; GFX10-NEXT: v_mov_b32_e32 v8, v2 603; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v6 604; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 605; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 606; GFX10-NEXT: v_mov_b32_e32 v10, v5 607; GFX10-NEXT: v_lshl_or_b32 v11, v7, 16, v2 608; GFX10-NEXT: v_lshl_or_b32 v9, v4, 16, v3 609; GFX10-NEXT: v_lshl_or_b32 v7, v1, 16, v0 610; GFX10-NEXT: image_sample_d_g16 v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 611; GFX10-NEXT: s_waitcnt vmcnt(0) 612; GFX10-NEXT: ; return to shader part epilog 613main_body: 614 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 615 ret <4 x float> %v 616} 617 618define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { 619; GFX9-LABEL: sample_c_d_1d: 620; GFX9: ; %bb.0: ; %main_body 621; GFX9-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 622; GFX9-NEXT: s_waitcnt vmcnt(0) 623; GFX9-NEXT: ; return to shader part epilog 624; 625; GFX10-LABEL: sample_c_d_1d: 626; GFX10: ; %bb.0: ; %main_body 627; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 628; GFX10-NEXT: s_waitcnt vmcnt(0) 629; GFX10-NEXT: ; return to shader part epilog 630main_body: 631 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 632 ret <4 x float> %v 633} 634 635define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 636; GFX9-LABEL: sample_c_d_2d: 637; GFX9: ; %bb.0: ; %main_body 638; GFX9-NEXT: v_mov_b32_e32 v7, v3 639; GFX9-NEXT: v_mov_b32_e32 v8, v2 640; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v5 641; GFX9-NEXT: v_lshl_or_b32 v3, v6, 16, v2 642; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v7 643; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 644; GFX9-NEXT: v_lshl_or_b32 v2, v4, 16, v2 645; GFX9-NEXT: v_lshl_or_b32 v1, v8, 16, v1 646; GFX9-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 647; GFX9-NEXT: s_waitcnt vmcnt(0) 648; GFX9-NEXT: ; return to shader part epilog 649; 650; GFX10-LABEL: sample_c_d_2d: 651; GFX10: ; %bb.0: ; %main_body 652; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 653; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 654; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 655; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 656; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 657; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 658; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 659; GFX10-NEXT: s_waitcnt vmcnt(0) 660; GFX10-NEXT: ; return to shader part epilog 661main_body: 662 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 663 ret <4 x float> %v 664} 665 666define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { 667; GFX9-LABEL: sample_d_cl_1d: 668; GFX9: ; %bb.0: ; %main_body 669; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 670; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 671; GFX9-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 672; GFX9-NEXT: s_waitcnt vmcnt(0) 673; GFX9-NEXT: ; return to shader part epilog 674; 675; GFX10-LABEL: sample_d_cl_1d: 676; GFX10: ; %bb.0: ; %main_body 677; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 678; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 679; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 680; GFX10-NEXT: s_waitcnt vmcnt(0) 681; GFX10-NEXT: ; return to shader part epilog 682main_body: 683 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 684 ret <4 x float> %v 685} 686 687define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 688; GFX9-LABEL: sample_d_cl_2d: 689; GFX9: ; %bb.0: ; %main_body 690; GFX9-NEXT: v_and_b32_e32 v4, 0xffff, v4 691; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 692; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 693; GFX9-NEXT: v_lshl_or_b32 v5, v5, 16, v4 694; GFX9-NEXT: v_lshl_or_b32 v4, v3, 16, v2 695; GFX9-NEXT: v_lshl_or_b32 v3, v1, 16, v0 696; GFX9-NEXT: image_sample_d_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16 697; GFX9-NEXT: s_waitcnt vmcnt(0) 698; GFX9-NEXT: ; return to shader part epilog 699; 700; GFX10-LABEL: sample_d_cl_2d: 701; GFX10: ; %bb.0: ; %main_body 702; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 703; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 704; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 705; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 706; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 707; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 708; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 709; GFX10-NEXT: s_waitcnt vmcnt(0) 710; GFX10-NEXT: ; return to shader part epilog 711main_body: 712 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 713 ret <4 x float> %v 714} 715 716define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { 717; GFX9-LABEL: sample_c_d_cl_1d: 718; GFX9: ; %bb.0: ; %main_body 719; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v3 720; GFX9-NEXT: v_lshl_or_b32 v3, v4, 16, v3 721; GFX9-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 722; GFX9-NEXT: s_waitcnt vmcnt(0) 723; GFX9-NEXT: ; return to shader part epilog 724; 725; GFX10-LABEL: sample_c_d_cl_1d: 726; GFX10: ; %bb.0: ; %main_body 727; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 728; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 729; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 730; GFX10-NEXT: s_waitcnt vmcnt(0) 731; GFX10-NEXT: ; return to shader part epilog 732main_body: 733 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 734 ret <4 x float> %v 735} 736 737define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 738; GFX9-LABEL: sample_c_d_cl_2d: 739; GFX9: ; %bb.0: ; %main_body 740; GFX9-NEXT: v_mov_b32_e32 v11, v7 741; GFX9-NEXT: v_mov_b32_e32 v7, v0 742; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v5 743; GFX9-NEXT: v_lshl_or_b32 v10, v6, 16, v0 744; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v3 745; GFX9-NEXT: v_lshl_or_b32 v9, v4, 16, v0 746; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1 747; GFX9-NEXT: v_lshl_or_b32 v8, v2, 16, v0 748; GFX9-NEXT: image_sample_c_d_cl v[0:3], v[7:11], s[0:7], s[8:11] dmask:0xf a16 749; GFX9-NEXT: s_waitcnt vmcnt(0) 750; GFX9-NEXT: ; return to shader part epilog 751; 752; GFX10-LABEL: sample_c_d_cl_2d: 753; GFX10: ; %bb.0: ; %main_body 754; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 755; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 756; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 757; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 758; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 759; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 760; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 761; GFX10-NEXT: s_waitcnt vmcnt(0) 762; GFX10-NEXT: ; return to shader part epilog 763main_body: 764 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 765 ret <4 x float> %v 766} 767 768define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) { 769; GFX9-LABEL: sample_l_1d: 770; GFX9: ; %bb.0: ; %main_body 771; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 772; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 773; GFX9-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 774; GFX9-NEXT: s_waitcnt vmcnt(0) 775; GFX9-NEXT: ; return to shader part epilog 776; 777; GFX10-LABEL: sample_l_1d: 778; GFX10: ; %bb.0: ; %main_body 779; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 780; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 781; GFX10-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 782; GFX10-NEXT: s_waitcnt vmcnt(0) 783; GFX10-NEXT: ; return to shader part epilog 784main_body: 785 %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 786 ret <4 x float> %v 787} 788 789define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) { 790; GFX9-LABEL: sample_l_2d: 791; GFX9: ; %bb.0: ; %main_body 792; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 793; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0 794; GFX9-NEXT: image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 795; GFX9-NEXT: s_waitcnt vmcnt(0) 796; GFX9-NEXT: ; return to shader part epilog 797; 798; GFX10-LABEL: sample_l_2d: 799; GFX10: ; %bb.0: ; %main_body 800; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 801; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0 802; GFX10-NEXT: image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 803; GFX10-NEXT: s_waitcnt vmcnt(0) 804; GFX10-NEXT: ; return to shader part epilog 805main_body: 806 %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 807 ret <4 x float> %v 808} 809 810define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) { 811; GFX9-LABEL: sample_c_l_1d: 812; GFX9: ; %bb.0: ; %main_body 813; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 814; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 815; GFX9-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 816; GFX9-NEXT: s_waitcnt vmcnt(0) 817; GFX9-NEXT: ; return to shader part epilog 818; 819; GFX10-LABEL: sample_c_l_1d: 820; GFX10: ; %bb.0: ; %main_body 821; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 822; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 823; GFX10-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 824; GFX10-NEXT: s_waitcnt vmcnt(0) 825; GFX10-NEXT: ; return to shader part epilog 826main_body: 827 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 828 ret <4 x float> %v 829} 830 831define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { 832; GFX9-LABEL: sample_c_l_2d: 833; GFX9: ; %bb.0: ; %main_body 834; GFX9-NEXT: v_mov_b32_e32 v5, v3 835; GFX9-NEXT: v_mov_b32_e32 v3, v0 836; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1 837; GFX9-NEXT: v_lshl_or_b32 v4, v2, 16, v0 838; GFX9-NEXT: image_sample_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16 839; GFX9-NEXT: s_waitcnt vmcnt(0) 840; GFX9-NEXT: ; return to shader part epilog 841; 842; GFX10-LABEL: sample_c_l_2d: 843; GFX10: ; %bb.0: ; %main_body 844; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 845; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 846; GFX10-NEXT: image_sample_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 847; GFX10-NEXT: s_waitcnt vmcnt(0) 848; GFX10-NEXT: ; return to shader part epilog 849main_body: 850 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 851 ret <4 x float> %v 852} 853 854define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { 855; GFX9-LABEL: sample_lz_1d: 856; GFX9: ; %bb.0: ; %main_body 857; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 858; GFX9-NEXT: s_waitcnt vmcnt(0) 859; GFX9-NEXT: ; return to shader part epilog 860; 861; GFX10-LABEL: sample_lz_1d: 862; GFX10: ; %bb.0: ; %main_body 863; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 864; GFX10-NEXT: s_waitcnt vmcnt(0) 865; GFX10-NEXT: ; return to shader part epilog 866main_body: 867 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 868 ret <4 x float> %v 869} 870 871define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { 872; GFX9-LABEL: sample_lz_2d: 873; GFX9: ; %bb.0: ; %main_body 874; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 875; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 876; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 877; GFX9-NEXT: s_waitcnt vmcnt(0) 878; GFX9-NEXT: ; return to shader part epilog 879; 880; GFX10-LABEL: sample_lz_2d: 881; GFX10: ; %bb.0: ; %main_body 882; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 883; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 884; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 885; GFX10-NEXT: s_waitcnt vmcnt(0) 886; GFX10-NEXT: ; return to shader part epilog 887main_body: 888 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 889 ret <4 x float> %v 890} 891 892define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { 893; GFX9-LABEL: sample_c_lz_1d: 894; GFX9: ; %bb.0: ; %main_body 895; GFX9-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 896; GFX9-NEXT: s_waitcnt vmcnt(0) 897; GFX9-NEXT: ; return to shader part epilog 898; 899; GFX10-LABEL: sample_c_lz_1d: 900; GFX10: ; %bb.0: ; %main_body 901; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 902; GFX10-NEXT: s_waitcnt vmcnt(0) 903; GFX10-NEXT: ; return to shader part epilog 904main_body: 905 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 906 ret <4 x float> %v 907} 908 909define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { 910; GFX9-LABEL: sample_c_lz_2d: 911; GFX9: ; %bb.0: ; %main_body 912; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 913; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 914; GFX9-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 915; GFX9-NEXT: s_waitcnt vmcnt(0) 916; GFX9-NEXT: ; return to shader part epilog 917; 918; GFX10-LABEL: sample_c_lz_2d: 919; GFX10: ; %bb.0: ; %main_body 920; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 921; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 922; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 923; GFX10-NEXT: s_waitcnt vmcnt(0) 924; GFX10-NEXT: ; return to shader part epilog 925main_body: 926 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 927 ret <4 x float> %v 928} 929 930define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { 931; GFX9-LABEL: sample_c_d_o_2darray_V1: 932; GFX9: ; %bb.0: ; %main_body 933; GFX9-NEXT: v_mov_b32_e32 v13, v8 934; GFX9-NEXT: v_mov_b32_e32 v8, v0 935; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v6 936; GFX9-NEXT: v_lshl_or_b32 v12, v7, 16, v0 937; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v4 938; GFX9-NEXT: v_lshl_or_b32 v11, v5, 16, v0 939; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v2 940; GFX9-NEXT: v_mov_b32_e32 v9, v1 941; GFX9-NEXT: v_lshl_or_b32 v10, v3, 16, v0 942; GFX9-NEXT: image_sample_c_d_o v0, v[8:13], s[0:7], s[8:11] dmask:0x4 a16 da 943; GFX9-NEXT: s_waitcnt vmcnt(0) 944; GFX9-NEXT: ; return to shader part epilog 945; 946; GFX10-LABEL: sample_c_d_o_2darray_V1: 947; GFX10: ; %bb.0: ; %main_body 948; GFX10-NEXT: v_mov_b32_e32 v13, v8 949; GFX10-NEXT: v_mov_b32_e32 v9, v1 950; GFX10-NEXT: v_mov_b32_e32 v8, v0 951; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v6 952; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v4 953; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 954; GFX10-NEXT: v_lshl_or_b32 v12, v7, 16, v0 955; GFX10-NEXT: v_lshl_or_b32 v11, v5, 16, v1 956; GFX10-NEXT: v_lshl_or_b32 v10, v3, 16, v2 957; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[8:13], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 958; GFX10-NEXT: s_waitcnt vmcnt(0) 959; GFX10-NEXT: ; return to shader part epilog 960main_body: 961 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 962 ret float %v 963} 964 965define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { 966; GFX9-LABEL: sample_c_d_o_2darray_V2: 967; GFX9: ; %bb.0: ; %main_body 968; GFX9-NEXT: v_mov_b32_e32 v13, v8 969; GFX9-NEXT: v_mov_b32_e32 v8, v0 970; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v6 971; GFX9-NEXT: v_lshl_or_b32 v12, v7, 16, v0 972; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v4 973; GFX9-NEXT: v_lshl_or_b32 v11, v5, 16, v0 974; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v2 975; GFX9-NEXT: v_mov_b32_e32 v9, v1 976; GFX9-NEXT: v_lshl_or_b32 v10, v3, 16, v0 977; GFX9-NEXT: image_sample_c_d_o v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 a16 da 978; GFX9-NEXT: s_waitcnt vmcnt(0) 979; GFX9-NEXT: ; return to shader part epilog 980; 981; GFX10-LABEL: sample_c_d_o_2darray_V2: 982; GFX10: ; %bb.0: ; %main_body 983; GFX10-NEXT: v_mov_b32_e32 v13, v8 984; GFX10-NEXT: v_mov_b32_e32 v9, v1 985; GFX10-NEXT: v_mov_b32_e32 v8, v0 986; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v6 987; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v4 988; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 989; GFX10-NEXT: v_lshl_or_b32 v12, v7, 16, v0 990; GFX10-NEXT: v_lshl_or_b32 v11, v5, 16, v1 991; GFX10-NEXT: v_lshl_or_b32 v10, v3, 16, v2 992; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 993; GFX10-NEXT: s_waitcnt vmcnt(0) 994; GFX10-NEXT: ; return to shader part epilog 995main_body: 996 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 997 ret <2 x float> %v 998} 999 1000declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1001declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1002declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1003declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1004declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1005declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1006declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1007 1008declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1009declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1010declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1011declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1012declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1013declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1014 1015declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1016declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1017declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32, half, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1018declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1019declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1020declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1021declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1022declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1023 1024declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1025declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1026declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1027declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1028declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1029declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1030declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1031declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1032declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1033 1034declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1035declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1036declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1037declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1038 1039declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1040declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1041declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1042declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1043 1044declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1045declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1046 1047attributes #0 = { nounwind } 1048attributes #1 = { nounwind readonly } 1049attributes #2 = { nounwind readnone } 1050