1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 4 5define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { 6; GFX9-LABEL: sample_cd_1d: 7; GFX9: ; %bb.0: ; %main_body 8; GFX9-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 9; GFX9-NEXT: s_waitcnt vmcnt(0) 10; GFX9-NEXT: ; return to shader part epilog 11; 12; GFX10-LABEL: sample_cd_1d: 13; GFX10: ; %bb.0: ; %main_body 14; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 15; GFX10-NEXT: s_waitcnt vmcnt(0) 16; GFX10-NEXT: ; return to shader part epilog 17main_body: 18 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 19 ret <4 x float> %v 20} 21 22define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 23; GFX9-LABEL: sample_cd_2d: 24; GFX9: ; %bb.0: ; %main_body 25; GFX9-NEXT: v_and_b32_e32 v4, 0xffff, v4 26; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 27; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 28; GFX9-NEXT: v_lshl_or_b32 v4, v5, 16, v4 29; GFX9-NEXT: v_lshl_or_b32 v3, v3, 16, v2 30; GFX9-NEXT: v_lshl_or_b32 v2, v1, 16, v0 31; GFX9-NEXT: image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16 32; GFX9-NEXT: s_waitcnt vmcnt(0) 33; GFX9-NEXT: ; return to shader part epilog 34; 35; GFX10-LABEL: sample_cd_2d: 36; GFX10: ; %bb.0: ; %main_body 37; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 38; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 39; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 40; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 41; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 42; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0 43; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 44; GFX10-NEXT: s_waitcnt vmcnt(0) 45; GFX10-NEXT: ; return to shader part epilog 46main_body: 47 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 48 ret <4 x float> %v 49} 50 51define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { 52; GFX9-LABEL: sample_c_cd_1d: 53; GFX9: ; %bb.0: ; %main_body 54; GFX9-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 55; GFX9-NEXT: s_waitcnt vmcnt(0) 56; GFX9-NEXT: ; return to shader part epilog 57; 58; GFX10-LABEL: sample_c_cd_1d: 59; GFX10: ; %bb.0: ; %main_body 60; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 61; GFX10-NEXT: s_waitcnt vmcnt(0) 62; GFX10-NEXT: ; return to shader part epilog 63main_body: 64 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 65 ret <4 x float> %v 66} 67 68define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 69; GFX9-LABEL: sample_c_cd_2d: 70; GFX9: ; %bb.0: ; %main_body 71; GFX9-NEXT: v_mov_b32_e32 v7, v3 72; GFX9-NEXT: v_mov_b32_e32 v8, v2 73; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v5 74; GFX9-NEXT: v_lshl_or_b32 v3, v6, 16, v2 75; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v7 76; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 77; GFX9-NEXT: v_lshl_or_b32 v2, v4, 16, v2 78; GFX9-NEXT: v_lshl_or_b32 v1, v8, 16, v1 79; GFX9-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 80; GFX9-NEXT: s_waitcnt vmcnt(0) 81; GFX9-NEXT: ; return to shader part epilog 82; 83; GFX10-LABEL: sample_c_cd_2d: 84; GFX10: ; %bb.0: ; %main_body 85; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 86; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 87; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 88; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 89; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 90; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 91; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 92; GFX10-NEXT: s_waitcnt vmcnt(0) 93; GFX10-NEXT: ; return to shader part epilog 94main_body: 95 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 96 ret <4 x float> %v 97} 98 99define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { 100; GFX9-LABEL: sample_cd_cl_1d: 101; GFX9: ; %bb.0: ; %main_body 102; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 103; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 104; GFX9-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 105; GFX9-NEXT: s_waitcnt vmcnt(0) 106; GFX9-NEXT: ; return to shader part epilog 107; 108; GFX10-LABEL: sample_cd_cl_1d: 109; GFX10: ; %bb.0: ; %main_body 110; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 111; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 112; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 113; GFX10-NEXT: s_waitcnt vmcnt(0) 114; GFX10-NEXT: ; return to shader part epilog 115main_body: 116 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 117 ret <4 x float> %v 118} 119 120define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 121; GFX9-LABEL: sample_cd_cl_2d: 122; GFX9: ; %bb.0: ; %main_body 123; GFX9-NEXT: v_and_b32_e32 v4, 0xffff, v4 124; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 125; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 126; GFX9-NEXT: v_lshl_or_b32 v5, v5, 16, v4 127; GFX9-NEXT: v_lshl_or_b32 v4, v3, 16, v2 128; GFX9-NEXT: v_lshl_or_b32 v3, v1, 16, v0 129; GFX9-NEXT: image_sample_cd_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16 130; GFX9-NEXT: s_waitcnt vmcnt(0) 131; GFX9-NEXT: ; return to shader part epilog 132; 133; GFX10-LABEL: sample_cd_cl_2d: 134; GFX10: ; %bb.0: ; %main_body 135; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 136; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 137; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 138; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 139; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 140; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 141; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 142; GFX10-NEXT: s_waitcnt vmcnt(0) 143; GFX10-NEXT: ; return to shader part epilog 144main_body: 145 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 146 ret <4 x float> %v 147} 148 149define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { 150; GFX9-LABEL: sample_c_cd_cl_1d: 151; GFX9: ; %bb.0: ; %main_body 152; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v3 153; GFX9-NEXT: v_lshl_or_b32 v3, v4, 16, v3 154; GFX9-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 155; GFX9-NEXT: s_waitcnt vmcnt(0) 156; GFX9-NEXT: ; return to shader part epilog 157; 158; GFX10-LABEL: sample_c_cd_cl_1d: 159; GFX10: ; %bb.0: ; %main_body 160; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 161; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 162; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 163; GFX10-NEXT: s_waitcnt vmcnt(0) 164; GFX10-NEXT: ; return to shader part epilog 165main_body: 166 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 167 ret <4 x float> %v 168} 169 170define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 171; GFX9-LABEL: sample_c_cd_cl_2d: 172; GFX9: ; %bb.0: ; %main_body 173; GFX9-NEXT: v_mov_b32_e32 v11, v7 174; GFX9-NEXT: v_mov_b32_e32 v7, v0 175; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v5 176; GFX9-NEXT: v_lshl_or_b32 v10, v6, 16, v0 177; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v3 178; GFX9-NEXT: v_lshl_or_b32 v9, v4, 16, v0 179; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1 180; GFX9-NEXT: v_lshl_or_b32 v8, v2, 16, v0 181; GFX9-NEXT: image_sample_c_cd_cl v[0:3], v[7:11], s[0:7], s[8:11] dmask:0xf a16 182; GFX9-NEXT: s_waitcnt vmcnt(0) 183; GFX9-NEXT: ; return to shader part epilog 184; 185; GFX10-LABEL: sample_c_cd_cl_2d: 186; GFX10: ; %bb.0: ; %main_body 187; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 188; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 189; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 190; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 191; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 192; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 193; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 194; GFX10-NEXT: s_waitcnt vmcnt(0) 195; GFX10-NEXT: ; return to shader part epilog 196main_body: 197 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 198 ret <4 x float> %v 199} 200 201declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 202declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 203declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 204declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 205declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 206declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 207declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 208declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 209 210attributes #0 = { nounwind } 211attributes #1 = { nounwind readonly } 212attributes #2 = { nounwind readnone } 213