1; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s
5
6; GCN-LABEL: {{^}}gws_init_odd_reg:
7; GFX908-DAG: ds_gws_init v1 gds
8; GFX90A-DAG: ds_gws_init v2 gds
9; GCN-DAG:    ds_gws_init v0 gds
10define amdgpu_ps void @gws_init_odd_reg(<2 x i32> %arg) {
11  %vgpr.0 = extractelement <2 x i32> %arg, i32 0
12  %vgpr.1 = extractelement <2 x i32> %arg, i32 1
13  call void @llvm.amdgcn.ds.gws.init(i32 %vgpr.0, i32 0)
14  call void @llvm.amdgcn.ds.gws.init(i32 %vgpr.1, i32 0)
15  ret void
16}
17
18; GCN-LABEL: {{^}}gws_sema_br_odd_reg:
19; GFX908-DAG: ds_gws_sema_br v1 gds
20; GFX90A-DAG: ds_gws_sema_br v2 gds
21; GCN-DAG:    ds_gws_sema_br v0 gds
22define amdgpu_ps void @gws_sema_br_odd_reg(<2 x i32> %arg) {
23  %vgpr.0 = extractelement <2 x i32> %arg, i32 0
24  %vgpr.1 = extractelement <2 x i32> %arg, i32 1
25  call void @llvm.amdgcn.ds.gws.sema.br(i32 %vgpr.0, i32 0)
26  call void @llvm.amdgcn.ds.gws.sema.br(i32 %vgpr.1, i32 0)
27  ret void
28}
29
30; GCN-LABEL: {{^}}gws_barrier_odd_reg:
31; GFX908-DAG: ds_gws_barrier v1 gds
32; GFX90A-DAG: ds_gws_barrier v2 gds
33; GCN-DAG:    ds_gws_barrier v0 gds
34define amdgpu_ps void @gws_barrier_odd_reg(<2 x i32> %arg) {
35  %vgpr.0 = extractelement <2 x i32> %arg, i32 0
36  %vgpr.1 = extractelement <2 x i32> %arg, i32 1
37  call void @llvm.amdgcn.ds.gws.barrier(i32 %vgpr.0, i32 0)
38  call void @llvm.amdgcn.ds.gws.barrier(i32 %vgpr.1, i32 0)
39  ret void
40}
41
42; GCN-LABEL: {{^}}gws_init_odd_agpr:
43; GFX908-COUNT-2: ds_gws_init v{{[0-9]+}} gds
44; GFX90A-COUNT-2: ds_gws_init {{[va][0-9]?[02468]}} gds
45define amdgpu_ps void @gws_init_odd_agpr(<4 x i32> %arg) {
46bb:
47  %mai = tail call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 1, i32 2, <4 x i32> %arg, i32 0, i32 0, i32 0)
48  %agpr.0 = extractelement <4 x i32> %mai, i32 0
49  %agpr.1 = extractelement <4 x i32> %mai, i32 1
50  call void @llvm.amdgcn.ds.gws.init(i32 %agpr.0, i32 0)
51  call void @llvm.amdgcn.ds.gws.init(i32 %agpr.1, i32 0)
52  ret void
53}
54
55declare void @llvm.amdgcn.ds.gws.init(i32, i32)
56declare void @llvm.amdgcn.ds.gws.sema.br(i32, i32)
57declare void @llvm.amdgcn.ds.gws.barrier(i32, i32)
58declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32)
59