1; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s 5 6; GCN-LABEL: {{^}}gws_init_odd_reg: 7; GFX908-DAG: ds_gws_init v1 gds 8; GFX90A-DAG: ds_gws_init v2 gds 9; GCN-DAG: ds_gws_init v0 gds 10define amdgpu_ps void @gws_init_odd_reg(<2 x i32> %arg) { 11 %vgpr.0 = extractelement <2 x i32> %arg, i32 0 12 %vgpr.1 = extractelement <2 x i32> %arg, i32 1 13 call void @llvm.amdgcn.ds.gws.init(i32 %vgpr.0, i32 0) 14 call void @llvm.amdgcn.ds.gws.init(i32 %vgpr.1, i32 0) 15 ret void 16} 17 18; GCN-LABEL: {{^}}gws_sema_br_odd_reg: 19; GFX908-DAG: ds_gws_sema_br v1 gds 20; GFX90A-DAG: ds_gws_sema_br v2 gds 21; GCN-DAG: ds_gws_sema_br v0 gds 22define amdgpu_ps void @gws_sema_br_odd_reg(<2 x i32> %arg) { 23 %vgpr.0 = extractelement <2 x i32> %arg, i32 0 24 %vgpr.1 = extractelement <2 x i32> %arg, i32 1 25 call void @llvm.amdgcn.ds.gws.sema.br(i32 %vgpr.0, i32 0) 26 call void @llvm.amdgcn.ds.gws.sema.br(i32 %vgpr.1, i32 0) 27 ret void 28} 29 30; GCN-LABEL: {{^}}gws_barrier_odd_reg: 31; GFX908-DAG: ds_gws_barrier v1 gds 32; GFX90A-DAG: ds_gws_barrier v2 gds 33; GCN-DAG: ds_gws_barrier v0 gds 34define amdgpu_ps void @gws_barrier_odd_reg(<2 x i32> %arg) { 35 %vgpr.0 = extractelement <2 x i32> %arg, i32 0 36 %vgpr.1 = extractelement <2 x i32> %arg, i32 1 37 call void @llvm.amdgcn.ds.gws.barrier(i32 %vgpr.0, i32 0) 38 call void @llvm.amdgcn.ds.gws.barrier(i32 %vgpr.1, i32 0) 39 ret void 40} 41 42; GCN-LABEL: {{^}}gws_init_odd_agpr: 43; GFX908-COUNT-2: ds_gws_init v{{[0-9]+}} gds 44; GFX90A-COUNT-2: ds_gws_init {{[va][0-9]?[02468]}} gds 45define amdgpu_ps void @gws_init_odd_agpr(<4 x i32> %arg) { 46bb: 47 %mai = tail call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 1, i32 2, <4 x i32> %arg, i32 0, i32 0, i32 0) 48 %agpr.0 = extractelement <4 x i32> %mai, i32 0 49 %agpr.1 = extractelement <4 x i32> %mai, i32 1 50 call void @llvm.amdgcn.ds.gws.init(i32 %agpr.0, i32 0) 51 call void @llvm.amdgcn.ds.gws.init(i32 %agpr.1, i32 0) 52 ret void 53} 54 55declare void @llvm.amdgcn.ds.gws.init(i32, i32) 56declare void @llvm.amdgcn.ds.gws.sema.br(i32, i32) 57declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) 58declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32) 59