1; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX908 %s 2; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX90A %s 3; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX90A %s 4; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX90A %s 5; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX90A %s 6 7declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) 8 9; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_vgpr: 10; GFX908: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}] 11; GFX90A: v_mfma_f32_32x32x1{{.*}} v[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, v[{{[0-9:]+}}] 12define amdgpu_kernel void @test_mfma_f32_32x32x1f32_vgpr(<32 x float> addrspace(1)* %arg) #0 { 13bb: 14 %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg 15 %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0) 16 store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg 17 ret void 18} 19 20; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_agpr: 21; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}] 22define amdgpu_kernel void @test_mfma_f32_32x32x1f32_agpr(<32 x float> addrspace(1)* %arg) #1 { 23bb: 24 %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg 25 %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0) 26 store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg 27 ret void 28} 29 30; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_inline_asm_virtual_agpr: 31; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}] 32define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_virtual_agpr(<32 x float> addrspace(1)* %arg) #0 { 33bb: 34 %acc = call i32 asm sideeffect "; def $0", "={a0}"() 35 %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg 36 %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0) 37 store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg 38 ret void 39} 40 41; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_inline_asm_phys_agpr: 42; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}] 43define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_phys_agpr(<32 x float> addrspace(1)* %arg) #0 { 44bb: 45 call void asm sideeffect "; use $0", "{a[100:131]}"(<32 x float> undef) 46 %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg 47 %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0) 48 store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg 49 ret void 50} 51 52; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_inline_asm_no_agprs: 53; GFX908: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}] 54; GFX90A: v_mfma_f32_32x32x1{{.*}} v[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, v[{{[0-9:]+}}] 55define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_no_agprs(<32 x float> addrspace(1)* %arg) #0 { 56bb: 57 %acc = call i32 asm sideeffect "; def $0", "={v0}"() 58 %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg 59 %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0) 60 store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg 61 ret void 62} 63 64; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_call: 65; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}] 66define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call(<32 x float> addrspace(1)* %arg) #0 { 67bb: 68 call void @foo() 69 %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg 70 %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0) 71 store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg 72 ret void 73} 74 75; We could avoid scan to find calls since we see these during lowering before selection. 76; However, in SDag lowering and selection is done block by block, so it would only work 77; in Global ISel. 78 79; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_call_multi_bb: 80; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}] 81define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call_multi_bb(<32 x float> addrspace(1)* %arg) #0 { 82bb1: 83 %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg 84 %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 1, i32 2, i32 3) 85 store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg 86 br i1 undef, label %bb2, label %bb3 87 br label %bb2 88 89bb2: 90 call void @foo() 91 br label %bb3 92 93bb3: 94 ret void 95} 96 97; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry: 98; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}] 99define void @test_mfma_f32_32x32x1f32_nonentry(<32 x float> addrspace(1)* %arg) #0 { 100bb: 101 %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg 102 %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0) 103 store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg 104 ret void 105} 106 107declare void @foo() 108 109attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" } 110attributes #1 = { "amdgpu-flat-work-group-size"="1,256" } 111