1; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s 2; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W32 %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s 4; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s 5; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s 6 7; RUN: opt -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s 8; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s 9; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s 10; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s 11; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s 12; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s 13; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s 14; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s 15; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s 16; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s 17 18; GCN-LABEL: {{^}}fold_wavefrontsize: 19; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize( 20 21; W32: v_mov_b32_e32 [[V:v[0-9]+]], 32 22; W64: v_mov_b32_e32 [[V:v[0-9]+]], 64 23; GCN: store_{{dword|b32}} v{{.+}}, [[V]] 24 25; OPT-W32: store i32 32, i32 addrspace(1)* %arg, align 4 26; OPT-W64: store i32 64, i32 addrspace(1)* %arg, align 4 27; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() 28; OPT-WXX: store i32 %tmp, i32 addrspace(1)* %arg, align 4 29; OPT-NEXT: ret void 30 31define amdgpu_kernel void @fold_wavefrontsize(i32 addrspace(1)* nocapture %arg) { 32bb: 33 %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 34 store i32 %tmp, i32 addrspace(1)* %arg, align 4 35 ret void 36} 37 38; GCN-LABEL: {{^}}fold_and_optimize_wavefrontsize: 39; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize( 40 41; W32: v_mov_b32_e32 [[V:v[0-9]+]], 1{{$}} 42; W64: v_mov_b32_e32 [[V:v[0-9]+]], 2{{$}} 43; GCN-NOT: cndmask 44; GCN: store_{{dword|b32}} v{{.+}}, [[V]] 45 46; OPT-W32: store i32 1, i32 addrspace(1)* %arg, align 4 47; OPT-W64: store i32 2, i32 addrspace(1)* %arg, align 4 48; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() 49; OPT-WXX: %tmp1 = icmp ugt i32 %tmp, 32 50; OPT-WXX: %tmp2 = select i1 %tmp1, i32 2, i32 1 51; OPT-WXX: store i32 %tmp2, i32 addrspace(1)* %arg 52; OPT-NEXT: ret void 53 54define amdgpu_kernel void @fold_and_optimize_wavefrontsize(i32 addrspace(1)* nocapture %arg) { 55bb: 56 %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 57 %tmp1 = icmp ugt i32 %tmp, 32 58 %tmp2 = select i1 %tmp1, i32 2, i32 1 59 store i32 %tmp2, i32 addrspace(1)* %arg 60 ret void 61} 62 63; GCN-LABEL: {{^}}fold_and_optimize_if_wavefrontsize: 64; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize( 65 66; OPT: bb: 67; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() 68; OPT-WXX: %tmp1 = icmp ugt i32 %tmp, 32 69; OPT-WXX: bb3: 70; OPT-W64: store i32 1, i32 addrspace(1)* %arg, align 4 71; OPT-NEXT: ret void 72 73define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(i32 addrspace(1)* nocapture %arg) { 74bb: 75 %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 76 %tmp1 = icmp ugt i32 %tmp, 32 77 br i1 %tmp1, label %bb2, label %bb3 78 79bb2: ; preds = %bb 80 store i32 1, i32 addrspace(1)* %arg, align 4 81 br label %bb3 82 83bb3: ; preds = %bb2, %bb 84 ret void 85} 86 87declare i32 @llvm.amdgcn.wavefrontsize() #0 88 89attributes #0 = { nounwind readnone speculatable } 90